Skip to content

[DE-525] Search optimisation (v6) #504

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions src/main/java/com/arangodb/entity/InvertedIndexEntity.java
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ public class InvertedIndexEntity implements Entity {
private Collection<InvertedIndexField> fields;
private Boolean searchField;
private Collection<StoredValue> storedValues;
private Collection<String> optimizeTopK;
private InvertedIndexPrimarySort primarySort;
private String analyzer;
private Set<AnalyzerFeature> features;
Expand Down Expand Up @@ -104,6 +105,10 @@ public Collection<StoredValue> getStoredValues() {
return storedValues;
}

public Collection<String> getOptimizeTopK() {
return optimizeTopK;
}

public InvertedIndexPrimarySort getPrimarySort() {
return primarySort;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ public class ArangoSearchProperties {
private final Collection<CollectionLink> links;
private ArangoSearchCompression primarySortCompression;
private final Collection<StoredValue> storedValues;
private final Collection<String> optimizeTopK;
private Boolean primarySortCache;
private Boolean primaryKeyCache;

Expand All @@ -50,6 +51,7 @@ public ArangoSearchProperties() {
links = new ArrayList<>();
primarySorts = new ArrayList<>();
storedValues = new ArrayList<>();
optimizeTopK = new ArrayList<>();
}

public Long getCommitIntervalMsec() {
Expand Down Expand Up @@ -127,6 +129,18 @@ public void addStoredValues(final StoredValue... storedValues) {
this.storedValues.addAll(Arrays.asList(storedValues));
}

/**
* @return An array of strings defining optimized sort expressions.
* @since ArangoDB 3.11, Enterprise Edition only
*/
public Collection<String> getOptimizeTopK() {
return optimizeTopK;
}

public void addOptimizeTopK(final String... optimizeTopK) {
this.optimizeTopK.addAll(Arrays.asList(optimizeTopK));
}

public Boolean getPrimarySortCache() {
return primarySortCache;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,14 @@ public Collection<StoredValue> getStoredValues() {
return properties.getStoredValues();
}

/**
* @return An array of strings defining optimized sort expressions.
* @since ArangoDB 3.11, Enterprise Edition only
*/
public Collection<String> getOptimizeTopK() {
return properties.getOptimizeTopK();
}

/**
* @return If you enable this option, then the primary sort columns are always cached in memory. This can improve
* the performance of queries that utilize the primary sort order. Otherwise, these values are memory-mapped and it
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,15 @@ public class VPackDeserializers {
properties.addStoredValues(sv);
}

final VPackSlice optimizeTopK = vpack.get("optimizeTopK");
if (optimizeTopK.isArray()) {
final Iterator<VPackSlice> optimizeTopKIterator = optimizeTopK.arrayIterator();
while (optimizeTopKIterator.hasNext()) {
String o = context.deserialize(optimizeTopKIterator.next(), String.class);
properties.addOptimizeTopK(o);
}
}

return properties;
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,15 @@ public class VPackSerializers {
builder.close(); // close array
}

final Collection<String> optimizeTopK = value.getOptimizeTopK();
if (!optimizeTopK.isEmpty()) {
builder.add("optimizeTopK", ValueType.ARRAY); // open array
for (final String o : optimizeTopK) {
context.serialize(builder, null, o);
}
builder.close(); // close array
}

};

public static final VPackSerializer<SearchAliasProperties> SEARCH_ALIAS_PROPERTIES = (builder, attribute, value, context) -> {
Expand Down
15 changes: 15 additions & 0 deletions src/main/java/com/arangodb/model/InvertedIndexOptions.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ public class InvertedIndexOptions extends IndexOptions<InvertedIndexOptions> {
private Integer parallelism;
private InvertedIndexPrimarySort primarySort;
private final Collection<StoredValue> storedValues = new ArrayList<>();
private final Collection<String> optimizeTopK = new ArrayList<>();
private String analyzer;
private final Set<AnalyzerFeature> features = new HashSet<>();
private Boolean includeAllFields;
Expand Down Expand Up @@ -112,6 +113,20 @@ public InvertedIndexOptions storedValues(StoredValue... storedValues) {
return this;
}

public Collection<String> getOptimizeTopK() {
return optimizeTopK;
}

/**
* @param optimizeTopK An array of strings defining sort expressions that you want to optimize.
* @return options
* @since ArangoDB 3.11, Enterprise Edition only
*/
public InvertedIndexOptions optimizeTopK(String... optimizeTopK) {
Collections.addAll(this.optimizeTopK, optimizeTopK);
return this;
}

public String getAnalyzer() {
return analyzer;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,15 @@ public ArangoSearchCreateOptions storedValues(final StoredValue... storedValues)
return this;
}

/**
* @param optimizeTopK An array of strings defining sort expressions that you want to optimize.
* @return options
* @since ArangoDB 3.11, Enterprise Edition only
*/
public ArangoSearchCreateOptions optimizeTopK(final String... optimizeTopK) {
properties.addOptimizeTopK(optimizeTopK);
return this;
}

/**
* @param primarySortCache If you enable this option, then the primary sort columns are always cached in memory.
Expand Down
7 changes: 7 additions & 0 deletions src/test/java/com/arangodb/ArangoSearchTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -931,6 +931,8 @@ void arangoSearchOptions(ArangoDatabase db) {
.primaryKeyCache(true);
StoredValue storedValue = new StoredValue(Arrays.asList("a", "b"), ArangoSearchCompression.none, true);
options.storedValues(storedValue);
String[] optimizeTopK = new String[]{"BM25(@doc) DESC", "TFIDF(@doc) DESC"};
options.optimizeTopK(optimizeTopK);

final ArangoSearch view = db.arangoSearch(viewName);
view.create(options);
Expand Down Expand Up @@ -972,6 +974,11 @@ void arangoSearchOptions(ArangoDatabase db) {
FieldLink nested = fieldLink.getNested().iterator().next();
assertThat(nested.getName()).isEqualTo("f2");
}

if (isEnterprise() && isAtLeastVersion(3, 11)) {
assertThat(properties.getOptimizeTopK()).containsExactly(optimizeTopK);
}

}

@ParameterizedTest(name = "{index}")
Expand Down
5 changes: 5 additions & 0 deletions src/test/java/com/arangodb/InvertedIndexTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ private InvertedIndexOptions createOptions(String analyzerName) {
.cache(cache)
)
.storedValues(new StoredValue(Arrays.asList("f3", "f4"), ArangoSearchCompression.none, cache))
.optimizeTopK("BM25(@doc) DESC", "TFIDF(@doc) DESC")
.analyzer(analyzerName)
.features(AnalyzerFeature.position, AnalyzerFeature.frequency)
.includeAllFields(false)
Expand Down Expand Up @@ -144,6 +145,10 @@ private void assertCorrectIndexEntity(InvertedIndexEntity indexResult, InvertedI
assertThat(indexResult.getWritebufferSizeMax()).isEqualTo(options.getWritebufferSizeMax());
assertThat(indexResult.getCache()).isEqualTo(options.getCache());
assertThat(indexResult.getPrimaryKeyCache()).isEqualTo(options.getPrimaryKeyCache());

if (isEnterprise() && isAtLeastVersion(3, 11)) {
assertThat(indexResult.getOptimizeTopK()).containsExactlyElementsOf(options.getOptimizeTopK());
}
}

@ParameterizedTest(name = "{index}")
Expand Down