Skip to content

Commit b72092f

Browse files
committed
optimizeTopK in view
1 parent a6867f9 commit b72092f

File tree

3 files changed

+55
-37
lines changed

3 files changed

+55
-37
lines changed

core/src/main/java/com/arangodb/entity/arangosearch/ArangoSearchPropertiesEntity.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ public final class ArangoSearchPropertiesEntity extends ViewEntity {
4141
private Collection<CollectionLink> links;
4242
private ArangoSearchCompression primarySortCompression;
4343
private Collection<StoredValue> storedValues;
44+
private Collection<String> optimizeTopK;
4445
private Boolean primarySortCache;
4546
private Boolean primaryKeyCache;
4647

@@ -121,6 +122,14 @@ public Collection<StoredValue> getStoredValues() {
121122
return storedValues;
122123
}
123124

125+
/**
126+
* @return An array of strings defining optimized sort expressions.
127+
* @since ArangoDB 3.11, Enterprise Edition only
128+
*/
129+
public Collection<String> getOptimizeTopK() {
130+
return optimizeTopK;
131+
}
132+
124133
public Boolean getPrimarySortCache() {
125134
return primarySortCache;
126135
}

core/src/main/java/com/arangodb/model/arangosearch/ArangoSearchCreateOptions.java

Lines changed: 38 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ public final class ArangoSearchCreateOptions {
4343
private Collection<PrimarySort> primarySorts;
4444
private ArangoSearchCompression primarySortCompression;
4545
private Collection<StoredValue> storedValues;
46+
private Collection<String> optimizeTopK;
4647
private Boolean primarySortCache;
4748
private Boolean primaryKeyCache;
4849

@@ -57,14 +58,11 @@ ArangoSearchCreateOptions name(final String name) {
5758
}
5859

5960
/**
60-
* @param consolidationIntervalMsec Wait at least this many milliseconds between committing index data changes
61-
* and making them visible to
62-
* queries (default: 60000, to disable use: 0). For the case where there are a
63-
* lot of inserts/updates, a
64-
* lower value, until commit, will cause the index not to account for them and
65-
* memory usage would
66-
* continue to grow. For the case where there are a few inserts/updates, a
67-
* higher value will impact
61+
* @param consolidationIntervalMsec Wait at least this many milliseconds between committing index data changes and
62+
* making them visible to queries (default: 60000, to disable use: 0). For the case
63+
* where there are a lot of inserts/updates, a lower value, until commit, will
64+
* cause the index not to account for them and memory usage would continue to grow.
65+
* For the case where there are a few inserts/updates, a higher value will impact
6866
* performance and waste disk space for each commit call without any added
6967
* benefits.
7068
* @return options
@@ -76,26 +74,19 @@ public ArangoSearchCreateOptions consolidationIntervalMsec(final Long consolidat
7674

7775
/**
7876
* @param commitIntervalMsec Wait at least this many milliseconds between committing view data store changes and
79-
* making documents visible to
80-
* queries (default: 1000, to disable use: 0). For the case where there are a lot of
81-
* inserts/updates, a lower value,
82-
* until commit, will cause the index not to account for them and memory usage would
83-
* continue to grow. For the case
84-
* where there are a few inserts/updates, a higher value will impact performance and
85-
* waste disk space for each
86-
* commit call without any added benefits. Background: For data retrieval ArangoSearch
87-
* views follow the concept of
88-
* “eventually-consistent”, i.e. eventually all the data in ArangoDB will be matched by
89-
* corresponding query
90-
* expressions. The concept of ArangoSearch view “commit” operation is introduced to
91-
* control the upper-bound on the
92-
* time until document addition/removals are actually reflected by corresponding query
93-
* expressions. Once a “commit”
94-
* operation is complete all documents added/removed prior to the start of the “commit”
95-
* operation will be reflected
96-
* by queries invoked in subsequent ArangoDB transactions, in-progress ArangoDB
97-
* transactions will still continue to
98-
* return a repeatable-read state.
77+
* making documents visible to queries (default: 1000, to disable use: 0). For the case
78+
* where there are a lot of inserts/updates, a lower value, until commit, will cause the
79+
* index not to account for them and memory usage would continue to grow. For the case
80+
* where there are a few inserts/updates, a higher value will impact performance and waste
81+
* disk space for each commit call without any added benefits. Background: For data
82+
* retrieval ArangoSearch views follow the concept of “eventually-consistent”, i.e.
83+
* eventually all the data in ArangoDB will be matched by corresponding query expressions.
84+
* The concept of ArangoSearch view “commit” operation is introduced to control the
85+
* upper-bound on the time until document addition/removals are actually reflected by
86+
* corresponding query expressions. Once a “commit” operation is complete all documents
87+
* added/removed prior to the start of the “commit” operation will be reflected by queries
88+
* invoked in subsequent ArangoDB transactions, in-progress ArangoDB transactions will
89+
* still continue to return a repeatable-read state.
9990
* @return options
10091
*/
10192
public ArangoSearchCreateOptions commitIntervalMsec(final Long commitIntervalMsec) {
@@ -105,14 +96,11 @@ public ArangoSearchCreateOptions commitIntervalMsec(final Long commitIntervalMse
10596

10697
/**
10798
* @param cleanupIntervalStep Wait at least this many commits between removing unused files in data directory
108-
* (default: 10, to
109-
* disable use: 0). For the case where the consolidation policies merge segments often
110-
* (i.e. a lot of
111-
* commit+consolidate), a lower value will cause a lot of disk space to be wasted. For
112-
* the case where the
113-
* consolidation policies rarely merge segments (i.e. few inserts/deletes), a higher
114-
* value will impact
115-
* performance without any added benefits.
99+
* (default: 10, to disable use: 0). For the case where the consolidation policies merge
100+
* segments often (i.e. a lot of commit+consolidate), a lower value will cause a lot of
101+
* disk space to be wasted. For the case where the consolidation policies rarely merge
102+
* segments (i.e. few inserts/deletes), a higher value will impact performance without
103+
* any added benefits.
116104
* @return options
117105
*/
118106
public ArangoSearchCreateOptions cleanupIntervalStep(final Long cleanupIntervalStep) {
@@ -164,6 +152,16 @@ public ArangoSearchCreateOptions storedValues(final StoredValue... storedValues)
164152
return this;
165153
}
166154

155+
/**
156+
* @param optimizeTopK An array of strings defining sort expressions that you want to optimize.
157+
* @return options
158+
* @since ArangoDB 3.11, Enterprise Edition only
159+
*/
160+
public ArangoSearchCreateOptions optimizeTopK(final String... optimizeTopK) {
161+
this.optimizeTopK = Arrays.asList(optimizeTopK);
162+
return this;
163+
}
164+
167165
/**
168166
* @param primarySortCache If you enable this option, then the primary sort columns are always cached in memory.
169167
* This can improve the performance of queries that utilize the primary sort order.
@@ -231,6 +229,10 @@ public Collection<StoredValue> getStoredValues() {
231229
return storedValues;
232230
}
233231

232+
public Collection<String> getOptimizeTopK() {
233+
return optimizeTopK;
234+
}
235+
234236
public Boolean getPrimarySortCache() {
235237
return primarySortCache;
236238
}

driver/src/test/java/com/arangodb/ArangoSearchTest.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,7 @@ private void createGetAndDeleteTypedAnalyzer(ArangoDatabase db, SearchAnalyzer a
424424
// getAnalyzers
425425
SearchAnalyzer foundAnalyzer =
426426
db.getSearchAnalyzers().stream().filter(it -> it.getName().equals(fullyQualifiedName))
427-
.findFirst().get();
427+
.findFirst().get();
428428
assertThat(foundAnalyzer).isEqualTo(analyzer);
429429

430430
// deleteAnalyzer
@@ -672,6 +672,8 @@ void arangoSearchOptions(ArangoDatabase db) {
672672
.primaryKeyCache(true);
673673
StoredValue storedValue = new StoredValue(Arrays.asList("a", "b"), ArangoSearchCompression.none, true);
674674
options.storedValues(storedValue);
675+
String[] optimizeTopK = new String[]{"BM25(@doc) DESC", "TFIDF(@doc) DESC"};
676+
options.optimizeTopK(optimizeTopK);
675677

676678
final ArangoSearch view = db.arangoSearch(viewName);
677679
view.create(options);
@@ -713,6 +715,11 @@ void arangoSearchOptions(ArangoDatabase db) {
713715
FieldLink nested = fieldLink.getNested().iterator().next();
714716
assertThat(nested.getName()).isEqualTo("f2");
715717
}
718+
719+
if (isEnterprise() && isAtLeastVersion(3, 11)) {
720+
assertThat(properties.getOptimizeTopK()).containsExactly(optimizeTopK);
721+
}
722+
716723
}
717724

718725
@ParameterizedTest(name = "{index}")

0 commit comments

Comments
 (0)