From b72092f2f52b335ab4c3c67bd57769d6a5bcb4be Mon Sep 17 00:00:00 2001 From: Michele Rastelli Date: Mon, 8 May 2023 09:08:52 +0200 Subject: [PATCH 1/2] optimizeTopK in view --- .../ArangoSearchPropertiesEntity.java | 9 +++ .../ArangoSearchCreateOptions.java | 74 ++++++++++--------- .../java/com/arangodb/ArangoSearchTest.java | 9 ++- 3 files changed, 55 insertions(+), 37 deletions(-) diff --git a/core/src/main/java/com/arangodb/entity/arangosearch/ArangoSearchPropertiesEntity.java b/core/src/main/java/com/arangodb/entity/arangosearch/ArangoSearchPropertiesEntity.java index 024398680..83c999da8 100644 --- a/core/src/main/java/com/arangodb/entity/arangosearch/ArangoSearchPropertiesEntity.java +++ b/core/src/main/java/com/arangodb/entity/arangosearch/ArangoSearchPropertiesEntity.java @@ -41,6 +41,7 @@ public final class ArangoSearchPropertiesEntity extends ViewEntity { private Collection links; private ArangoSearchCompression primarySortCompression; private Collection storedValues; + private Collection optimizeTopK; private Boolean primarySortCache; private Boolean primaryKeyCache; @@ -121,6 +122,14 @@ public Collection getStoredValues() { return storedValues; } + /** + * @return An array of strings defining optimized sort expressions. + * @since ArangoDB 3.11, Enterprise Edition only + */ + public Collection getOptimizeTopK() { + return optimizeTopK; + } + public Boolean getPrimarySortCache() { return primarySortCache; } diff --git a/core/src/main/java/com/arangodb/model/arangosearch/ArangoSearchCreateOptions.java b/core/src/main/java/com/arangodb/model/arangosearch/ArangoSearchCreateOptions.java index d76f00cba..567c43ca0 100644 --- a/core/src/main/java/com/arangodb/model/arangosearch/ArangoSearchCreateOptions.java +++ b/core/src/main/java/com/arangodb/model/arangosearch/ArangoSearchCreateOptions.java @@ -43,6 +43,7 @@ public final class ArangoSearchCreateOptions { private Collection primarySorts; private ArangoSearchCompression primarySortCompression; private Collection storedValues; + private Collection optimizeTopK; private Boolean primarySortCache; private Boolean primaryKeyCache; @@ -57,14 +58,11 @@ ArangoSearchCreateOptions name(final String name) { } /** - * @param consolidationIntervalMsec Wait at least this many milliseconds between committing index data changes - * and making them visible to - * queries (default: 60000, to disable use: 0). For the case where there are a - * lot of inserts/updates, a - * lower value, until commit, will cause the index not to account for them and - * memory usage would - * continue to grow. For the case where there are a few inserts/updates, a - * higher value will impact + * @param consolidationIntervalMsec Wait at least this many milliseconds between committing index data changes and + * making them visible to queries (default: 60000, to disable use: 0). For the case + * where there are a lot of inserts/updates, a lower value, until commit, will + * cause the index not to account for them and memory usage would continue to grow. + * For the case where there are a few inserts/updates, a higher value will impact * performance and waste disk space for each commit call without any added * benefits. * @return options @@ -76,26 +74,19 @@ public ArangoSearchCreateOptions consolidationIntervalMsec(final Long consolidat /** * @param commitIntervalMsec Wait at least this many milliseconds between committing view data store changes and - * making documents visible to - * queries (default: 1000, to disable use: 0). For the case where there are a lot of - * inserts/updates, a lower value, - * until commit, will cause the index not to account for them and memory usage would - * continue to grow. For the case - * where there are a few inserts/updates, a higher value will impact performance and - * waste disk space for each - * commit call without any added benefits. Background: For data retrieval ArangoSearch - * views follow the concept of - * “eventually-consistent”, i.e. eventually all the data in ArangoDB will be matched by - * corresponding query - * expressions. The concept of ArangoSearch view “commit” operation is introduced to - * control the upper-bound on the - * time until document addition/removals are actually reflected by corresponding query - * expressions. Once a “commit” - * operation is complete all documents added/removed prior to the start of the “commit” - * operation will be reflected - * by queries invoked in subsequent ArangoDB transactions, in-progress ArangoDB - * transactions will still continue to - * return a repeatable-read state. + * making documents visible to queries (default: 1000, to disable use: 0). For the case + * where there are a lot of inserts/updates, a lower value, until commit, will cause the + * index not to account for them and memory usage would continue to grow. For the case + * where there are a few inserts/updates, a higher value will impact performance and waste + * disk space for each commit call without any added benefits. Background: For data + * retrieval ArangoSearch views follow the concept of “eventually-consistent”, i.e. + * eventually all the data in ArangoDB will be matched by corresponding query expressions. + * The concept of ArangoSearch view “commit” operation is introduced to control the + * upper-bound on the time until document addition/removals are actually reflected by + * corresponding query expressions. Once a “commit” operation is complete all documents + * added/removed prior to the start of the “commit” operation will be reflected by queries + * invoked in subsequent ArangoDB transactions, in-progress ArangoDB transactions will + * still continue to return a repeatable-read state. * @return options */ public ArangoSearchCreateOptions commitIntervalMsec(final Long commitIntervalMsec) { @@ -105,14 +96,11 @@ public ArangoSearchCreateOptions commitIntervalMsec(final Long commitIntervalMse /** * @param cleanupIntervalStep Wait at least this many commits between removing unused files in data directory - * (default: 10, to - * disable use: 0). For the case where the consolidation policies merge segments often - * (i.e. a lot of - * commit+consolidate), a lower value will cause a lot of disk space to be wasted. For - * the case where the - * consolidation policies rarely merge segments (i.e. few inserts/deletes), a higher - * value will impact - * performance without any added benefits. + * (default: 10, to disable use: 0). For the case where the consolidation policies merge + * segments often (i.e. a lot of commit+consolidate), a lower value will cause a lot of + * disk space to be wasted. For the case where the consolidation policies rarely merge + * segments (i.e. few inserts/deletes), a higher value will impact performance without + * any added benefits. * @return options */ public ArangoSearchCreateOptions cleanupIntervalStep(final Long cleanupIntervalStep) { @@ -164,6 +152,16 @@ public ArangoSearchCreateOptions storedValues(final StoredValue... storedValues) return this; } + /** + * @param optimizeTopK An array of strings defining sort expressions that you want to optimize. + * @return options + * @since ArangoDB 3.11, Enterprise Edition only + */ + public ArangoSearchCreateOptions optimizeTopK(final String... optimizeTopK) { + this.optimizeTopK = Arrays.asList(optimizeTopK); + return this; + } + /** * @param primarySortCache If you enable this option, then the primary sort columns are always cached in memory. * This can improve the performance of queries that utilize the primary sort order. @@ -231,6 +229,10 @@ public Collection getStoredValues() { return storedValues; } + public Collection getOptimizeTopK() { + return optimizeTopK; + } + public Boolean getPrimarySortCache() { return primarySortCache; } diff --git a/driver/src/test/java/com/arangodb/ArangoSearchTest.java b/driver/src/test/java/com/arangodb/ArangoSearchTest.java index bff6fc543..2cf5ca5f8 100644 --- a/driver/src/test/java/com/arangodb/ArangoSearchTest.java +++ b/driver/src/test/java/com/arangodb/ArangoSearchTest.java @@ -424,7 +424,7 @@ private void createGetAndDeleteTypedAnalyzer(ArangoDatabase db, SearchAnalyzer a // getAnalyzers SearchAnalyzer foundAnalyzer = db.getSearchAnalyzers().stream().filter(it -> it.getName().equals(fullyQualifiedName)) - .findFirst().get(); + .findFirst().get(); assertThat(foundAnalyzer).isEqualTo(analyzer); // deleteAnalyzer @@ -672,6 +672,8 @@ void arangoSearchOptions(ArangoDatabase db) { .primaryKeyCache(true); StoredValue storedValue = new StoredValue(Arrays.asList("a", "b"), ArangoSearchCompression.none, true); options.storedValues(storedValue); + String[] optimizeTopK = new String[]{"BM25(@doc) DESC", "TFIDF(@doc) DESC"}; + options.optimizeTopK(optimizeTopK); final ArangoSearch view = db.arangoSearch(viewName); view.create(options); @@ -713,6 +715,11 @@ void arangoSearchOptions(ArangoDatabase db) { FieldLink nested = fieldLink.getNested().iterator().next(); assertThat(nested.getName()).isEqualTo("f2"); } + + if (isEnterprise() && isAtLeastVersion(3, 11)) { + assertThat(properties.getOptimizeTopK()).containsExactly(optimizeTopK); + } + } @ParameterizedTest(name = "{index}") From 1a85fc0aa816f0fc98251901527a76cf7b0bd86f Mon Sep 17 00:00:00 2001 From: Michele Rastelli Date: Mon, 8 May 2023 09:58:30 +0200 Subject: [PATCH 2/2] optimizeTopK in inverted indexes --- .../com/arangodb/entity/InvertedIndexEntity.java | 5 +++++ .../com/arangodb/model/InvertedIndexOptions.java | 15 +++++++++++++++ .../test/java/com/arangodb/ArangoSearchTest.java | 1 - .../test/java/com/arangodb/InvertedIndexTest.java | 5 +++++ 4 files changed, 25 insertions(+), 1 deletion(-) diff --git a/core/src/main/java/com/arangodb/entity/InvertedIndexEntity.java b/core/src/main/java/com/arangodb/entity/InvertedIndexEntity.java index 8d511e39d..ca321e341 100644 --- a/core/src/main/java/com/arangodb/entity/InvertedIndexEntity.java +++ b/core/src/main/java/com/arangodb/entity/InvertedIndexEntity.java @@ -45,6 +45,7 @@ public final class InvertedIndexEntity { private Collection fields; private Boolean searchField; private Collection storedValues; + private Collection optimizeTopK; private InvertedIndexPrimarySort primarySort; private String analyzer; private Set features; @@ -104,6 +105,10 @@ public Collection getStoredValues() { return storedValues; } + public Collection getOptimizeTopK() { + return optimizeTopK; + } + public InvertedIndexPrimarySort getPrimarySort() { return primarySort; } diff --git a/core/src/main/java/com/arangodb/model/InvertedIndexOptions.java b/core/src/main/java/com/arangodb/model/InvertedIndexOptions.java index 722a3d227..0b48a3f12 100644 --- a/core/src/main/java/com/arangodb/model/InvertedIndexOptions.java +++ b/core/src/main/java/com/arangodb/model/InvertedIndexOptions.java @@ -38,6 +38,7 @@ public final class InvertedIndexOptions extends IndexOptions storedValues = new ArrayList<>(); + private final Collection optimizeTopK = new ArrayList<>(); private String analyzer; private final Set features = new HashSet<>(); private Boolean includeAllFields; @@ -112,6 +113,20 @@ public InvertedIndexOptions storedValues(StoredValue... storedValues) { return this; } + public Collection getOptimizeTopK() { + return optimizeTopK; + } + + /** + * @param optimizeTopK An array of strings defining sort expressions that you want to optimize. + * @return options + * @since ArangoDB 3.11, Enterprise Edition only + */ + public InvertedIndexOptions optimizeTopK(String... optimizeTopK) { + Collections.addAll(this.optimizeTopK, optimizeTopK); + return this; + } + public String getAnalyzer() { return analyzer; } diff --git a/driver/src/test/java/com/arangodb/ArangoSearchTest.java b/driver/src/test/java/com/arangodb/ArangoSearchTest.java index 2cf5ca5f8..df4e27357 100644 --- a/driver/src/test/java/com/arangodb/ArangoSearchTest.java +++ b/driver/src/test/java/com/arangodb/ArangoSearchTest.java @@ -27,7 +27,6 @@ import com.arangodb.entity.arangosearch.analyzer.*; import com.arangodb.model.InvertedIndexOptions; import com.arangodb.model.arangosearch.*; -import com.arangodb.util.TestUtils; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.params.ParameterizedTest; import org.junit.jupiter.params.provider.MethodSource; diff --git a/driver/src/test/java/com/arangodb/InvertedIndexTest.java b/driver/src/test/java/com/arangodb/InvertedIndexTest.java index 5afdb3883..03467a0a1 100644 --- a/driver/src/test/java/com/arangodb/InvertedIndexTest.java +++ b/driver/src/test/java/com/arangodb/InvertedIndexTest.java @@ -93,6 +93,7 @@ private InvertedIndexOptions createOptions(String analyzerName) { .cache(cache) ) .storedValues(new StoredValue(Arrays.asList("f3", "f4"), ArangoSearchCompression.none, cache)) + .optimizeTopK("BM25(@doc) DESC", "TFIDF(@doc) DESC") .analyzer(analyzerName) .features(AnalyzerFeature.position, AnalyzerFeature.frequency) .includeAllFields(false) @@ -144,6 +145,10 @@ private void assertCorrectIndexEntity(InvertedIndexEntity indexResult, InvertedI assertThat(indexResult.getWritebufferSizeMax()).isEqualTo(options.getWritebufferSizeMax()); assertThat(indexResult.getCache()).isEqualTo(options.getCache()); assertThat(indexResult.getPrimaryKeyCache()).isEqualTo(options.getPrimaryKeyCache()); + + if (isEnterprise() && isAtLeastVersion(3, 11)) { + assertThat(indexResult.getOptimizeTopK()).containsExactlyElementsOf(options.getOptimizeTopK()); + } } @ParameterizedTest(name = "{index}")