Skip to content

Commit d546094

Browse files
authored
[DE-525] Search optimisation (v6) (#504)
* optimizeTopK in view * optimizeTopK in inverted indexes * test fixes
1 parent 4dc204c commit d546094

File tree

9 files changed

+81
-0
lines changed

9 files changed

+81
-0
lines changed

src/main/java/com/arangodb/entity/InvertedIndexEntity.java

+5
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ public class InvertedIndexEntity implements Entity {
4545
private Collection<InvertedIndexField> fields;
4646
private Boolean searchField;
4747
private Collection<StoredValue> storedValues;
48+
private Collection<String> optimizeTopK;
4849
private InvertedIndexPrimarySort primarySort;
4950
private String analyzer;
5051
private Set<AnalyzerFeature> features;
@@ -104,6 +105,10 @@ public Collection<StoredValue> getStoredValues() {
104105
return storedValues;
105106
}
106107

108+
public Collection<String> getOptimizeTopK() {
109+
return optimizeTopK;
110+
}
111+
107112
public InvertedIndexPrimarySort getPrimarySort() {
108113
return primarySort;
109114
}

src/main/java/com/arangodb/entity/arangosearch/ArangoSearchProperties.java

+14
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ public class ArangoSearchProperties {
4242
private final Collection<CollectionLink> links;
4343
private ArangoSearchCompression primarySortCompression;
4444
private final Collection<StoredValue> storedValues;
45+
private final Collection<String> optimizeTopK;
4546
private Boolean primarySortCache;
4647
private Boolean primaryKeyCache;
4748

@@ -50,6 +51,7 @@ public ArangoSearchProperties() {
5051
links = new ArrayList<>();
5152
primarySorts = new ArrayList<>();
5253
storedValues = new ArrayList<>();
54+
optimizeTopK = new ArrayList<>();
5355
}
5456

5557
public Long getCommitIntervalMsec() {
@@ -127,6 +129,18 @@ public void addStoredValues(final StoredValue... storedValues) {
127129
this.storedValues.addAll(Arrays.asList(storedValues));
128130
}
129131

132+
/**
133+
* @return An array of strings defining optimized sort expressions.
134+
* @since ArangoDB 3.11, Enterprise Edition only
135+
*/
136+
public Collection<String> getOptimizeTopK() {
137+
return optimizeTopK;
138+
}
139+
140+
public void addOptimizeTopK(final String... optimizeTopK) {
141+
this.optimizeTopK.addAll(Arrays.asList(optimizeTopK));
142+
}
143+
130144
public Boolean getPrimarySortCache() {
131145
return primarySortCache;
132146
}

src/main/java/com/arangodb/entity/arangosearch/ArangoSearchPropertiesEntity.java

+8
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,14 @@ public Collection<StoredValue> getStoredValues() {
116116
return properties.getStoredValues();
117117
}
118118

119+
/**
120+
* @return An array of strings defining optimized sort expressions.
121+
* @since ArangoDB 3.11, Enterprise Edition only
122+
*/
123+
public Collection<String> getOptimizeTopK() {
124+
return properties.getOptimizeTopK();
125+
}
126+
119127
/**
120128
* @return If you enable this option, then the primary sort columns are always cached in memory. This can improve
121129
* the performance of queries that utilize the primary sort order. Otherwise, these values are memory-mapped and it

src/main/java/com/arangodb/internal/velocypack/VPackDeserializers.java

+9
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,15 @@ public class VPackDeserializers {
276276
properties.addStoredValues(sv);
277277
}
278278

279+
final VPackSlice optimizeTopK = vpack.get("optimizeTopK");
280+
if (optimizeTopK.isArray()) {
281+
final Iterator<VPackSlice> optimizeTopKIterator = optimizeTopK.arrayIterator();
282+
while (optimizeTopKIterator.hasNext()) {
283+
String o = context.deserialize(optimizeTopKIterator.next(), String.class);
284+
properties.addOptimizeTopK(o);
285+
}
286+
}
287+
279288
return properties;
280289
};
281290

src/main/java/com/arangodb/internal/velocypack/VPackSerializers.java

+9
Original file line numberDiff line numberDiff line change
@@ -239,6 +239,15 @@ public class VPackSerializers {
239239
builder.close(); // close array
240240
}
241241

242+
final Collection<String> optimizeTopK = value.getOptimizeTopK();
243+
if (!optimizeTopK.isEmpty()) {
244+
builder.add("optimizeTopK", ValueType.ARRAY); // open array
245+
for (final String o : optimizeTopK) {
246+
context.serialize(builder, null, o);
247+
}
248+
builder.close(); // close array
249+
}
250+
242251
};
243252

244253
public static final VPackSerializer<SearchAliasProperties> SEARCH_ALIAS_PROPERTIES = (builder, attribute, value, context) -> {

src/main/java/com/arangodb/model/InvertedIndexOptions.java

+15
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ public class InvertedIndexOptions extends IndexOptions<InvertedIndexOptions> {
3838
private Integer parallelism;
3939
private InvertedIndexPrimarySort primarySort;
4040
private final Collection<StoredValue> storedValues = new ArrayList<>();
41+
private final Collection<String> optimizeTopK = new ArrayList<>();
4142
private String analyzer;
4243
private final Set<AnalyzerFeature> features = new HashSet<>();
4344
private Boolean includeAllFields;
@@ -112,6 +113,20 @@ public InvertedIndexOptions storedValues(StoredValue... storedValues) {
112113
return this;
113114
}
114115

116+
public Collection<String> getOptimizeTopK() {
117+
return optimizeTopK;
118+
}
119+
120+
/**
121+
* @param optimizeTopK An array of strings defining sort expressions that you want to optimize.
122+
* @return options
123+
* @since ArangoDB 3.11, Enterprise Edition only
124+
*/
125+
public InvertedIndexOptions optimizeTopK(String... optimizeTopK) {
126+
Collections.addAll(this.optimizeTopK, optimizeTopK);
127+
return this;
128+
}
129+
115130
public String getAnalyzer() {
116131
return analyzer;
117132
}

src/main/java/com/arangodb/model/arangosearch/ArangoSearchCreateOptions.java

+9
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,15 @@ public ArangoSearchCreateOptions storedValues(final StoredValue... storedValues)
134134
return this;
135135
}
136136

137+
/**
138+
* @param optimizeTopK An array of strings defining sort expressions that you want to optimize.
139+
* @return options
140+
* @since ArangoDB 3.11, Enterprise Edition only
141+
*/
142+
public ArangoSearchCreateOptions optimizeTopK(final String... optimizeTopK) {
143+
properties.addOptimizeTopK(optimizeTopK);
144+
return this;
145+
}
137146

138147
/**
139148
* @param primarySortCache If you enable this option, then the primary sort columns are always cached in memory.

src/test/java/com/arangodb/ArangoSearchTest.java

+7
Original file line numberDiff line numberDiff line change
@@ -931,6 +931,8 @@ void arangoSearchOptions(ArangoDatabase db) {
931931
.primaryKeyCache(true);
932932
StoredValue storedValue = new StoredValue(Arrays.asList("a", "b"), ArangoSearchCompression.none, true);
933933
options.storedValues(storedValue);
934+
String[] optimizeTopK = new String[]{"BM25(@doc) DESC", "TFIDF(@doc) DESC"};
935+
options.optimizeTopK(optimizeTopK);
934936

935937
final ArangoSearch view = db.arangoSearch(viewName);
936938
view.create(options);
@@ -972,6 +974,11 @@ void arangoSearchOptions(ArangoDatabase db) {
972974
FieldLink nested = fieldLink.getNested().iterator().next();
973975
assertThat(nested.getName()).isEqualTo("f2");
974976
}
977+
978+
if (isEnterprise() && isAtLeastVersion(3, 11)) {
979+
assertThat(properties.getOptimizeTopK()).containsExactly(optimizeTopK);
980+
}
981+
975982
}
976983

977984
@ParameterizedTest(name = "{index}")

src/test/java/com/arangodb/InvertedIndexTest.java

+5
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ private InvertedIndexOptions createOptions(String analyzerName) {
9393
.cache(cache)
9494
)
9595
.storedValues(new StoredValue(Arrays.asList("f3", "f4"), ArangoSearchCompression.none, cache))
96+
.optimizeTopK("BM25(@doc) DESC", "TFIDF(@doc) DESC")
9697
.analyzer(analyzerName)
9798
.features(AnalyzerFeature.position, AnalyzerFeature.frequency)
9899
.includeAllFields(false)
@@ -144,6 +145,10 @@ private void assertCorrectIndexEntity(InvertedIndexEntity indexResult, InvertedI
144145
assertThat(indexResult.getWritebufferSizeMax()).isEqualTo(options.getWritebufferSizeMax());
145146
assertThat(indexResult.getCache()).isEqualTo(options.getCache());
146147
assertThat(indexResult.getPrimaryKeyCache()).isEqualTo(options.getPrimaryKeyCache());
148+
149+
if (isEnterprise() && isAtLeastVersion(3, 11)) {
150+
assertThat(indexResult.getOptimizeTopK()).containsExactlyElementsOf(options.getOptimizeTopK());
151+
}
147152
}
148153

149154
@ParameterizedTest(name = "{index}")

0 commit comments

Comments
 (0)