From 69be643a672969837063d28db2127f74fd587b72 Mon Sep 17 00:00:00 2001 From: Michele Rastelli Date: Fri, 5 May 2023 12:40:58 +0200 Subject: [PATCH] geo s2 analyzer --- .../entity/arangosearch/AnalyzerType.java | 1 + .../arangosearch/analyzer/GeoS2Analyzer.java | 68 ++++++++++ .../analyzer/GeoS2AnalyzerProperties.java | 119 ++++++++++++++++++ .../velocypack/VPackDeserializers.java | 2 + .../java/com/arangodb/ArangoSearchTest.java | 30 +++++ 5 files changed, 220 insertions(+) create mode 100644 src/main/java/com/arangodb/entity/arangosearch/analyzer/GeoS2Analyzer.java create mode 100644 src/main/java/com/arangodb/entity/arangosearch/analyzer/GeoS2AnalyzerProperties.java diff --git a/src/main/java/com/arangodb/entity/arangosearch/AnalyzerType.java b/src/main/java/com/arangodb/entity/arangosearch/AnalyzerType.java index acf38797d..e92b0b191 100644 --- a/src/main/java/com/arangodb/entity/arangosearch/AnalyzerType.java +++ b/src/main/java/com/arangodb/entity/arangosearch/AnalyzerType.java @@ -35,6 +35,7 @@ public enum AnalyzerType { aql, geojson, geopoint, + geo_s2, segmentation, collation, classification, diff --git a/src/main/java/com/arangodb/entity/arangosearch/analyzer/GeoS2Analyzer.java b/src/main/java/com/arangodb/entity/arangosearch/analyzer/GeoS2Analyzer.java new file mode 100644 index 000000000..80542c8c0 --- /dev/null +++ b/src/main/java/com/arangodb/entity/arangosearch/analyzer/GeoS2Analyzer.java @@ -0,0 +1,68 @@ +/* + * DISCLAIMER + * + * Copyright 2016 ArangoDB GmbH, Cologne, Germany + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright holder is ArangoDB GmbH, Cologne, Germany + */ + + +package com.arangodb.entity.arangosearch.analyzer; + +import com.arangodb.entity.arangosearch.AnalyzerType; + +import java.util.Objects; + +/** + * An Analyzer capable of breaking up a GeoJSON object or coordinate array in [longitude, latitude] order into a set of + * indexable tokens for further usage with ArangoSearch Geo functions. + *

+ * The Analyzer is similar to {@link GeoJSONAnalyzer}, but it internally uses a format for storing the geo-spatial data + * that is more efficient. You can choose between different formats to make a tradeoff between the size on disk, the + * precision, and query performance. + * + * @author Michele Rastelli + * @see API Documentation + * @since ArangoDB 3.10.5 + */ +public final class GeoS2Analyzer extends SearchAnalyzer { + private GeoS2AnalyzerProperties properties; + + public GeoS2Analyzer() { + setType(AnalyzerType.geo_s2); + } + + public GeoS2AnalyzerProperties getProperties() { + return properties; + } + + public void setProperties(GeoS2AnalyzerProperties properties) { + this.properties = properties; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + if (!super.equals(o)) return false; + GeoS2Analyzer that = (GeoS2Analyzer) o; + return Objects.equals(properties, that.properties); + } + + @Override + public int hashCode() { + return Objects.hash(super.hashCode(), properties); + } +} diff --git a/src/main/java/com/arangodb/entity/arangosearch/analyzer/GeoS2AnalyzerProperties.java b/src/main/java/com/arangodb/entity/arangosearch/analyzer/GeoS2AnalyzerProperties.java new file mode 100644 index 000000000..5a570b512 --- /dev/null +++ b/src/main/java/com/arangodb/entity/arangosearch/analyzer/GeoS2AnalyzerProperties.java @@ -0,0 +1,119 @@ +/* + * DISCLAIMER + * + * Copyright 2016 ArangoDB GmbH, Cologne, Germany + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright holder is ArangoDB GmbH, Cologne, Germany + */ + +package com.arangodb.entity.arangosearch.analyzer; + + +import java.util.Objects; + +/** + * @author Michele Rastelli + */ +public final class GeoS2AnalyzerProperties { + + private GeoS2AnalyzerType type; + private GeoAnalyzerOptions options; + private GeoS2Format format; + + public GeoS2AnalyzerType getType() { + return type; + } + + public void setType(GeoS2AnalyzerType type) { + this.type = type; + } + + /** + * @return Options for fine-tuning geo queries {@link GeoS2AnalyzerProperties}. These options should generally + * remain unchanged. + */ + public GeoAnalyzerOptions getOptions() { + return options; + } + + public void setOptions(GeoAnalyzerOptions options) { + this.options = options; + } + + /** + * @return The internal binary representation to use for storing the geo-spatial data in an index. + */ + public GeoS2Format getFormat() { + return format; + } + + public void setFormat(GeoS2Format format) { + this.format = format; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + GeoS2AnalyzerProperties that = (GeoS2AnalyzerProperties) o; + return type == that.type && Objects.equals(options, that.options) && format == that.format; + } + + @Override + public int hashCode() { + return Objects.hash(type, options, format); + } + + public enum GeoS2AnalyzerType { + + /** + * (default): index all GeoJSON geometry types (Point, Polygon etc.) + */ + shape, + + /** + * compute and only index the centroid of the input geometry + */ + centroid, + + /** + * only index GeoJSON objects of type Point, ignore all other geometry types + */ + point + } + + public enum GeoS2Format { + /** + * Store each latitude and longitude value as an 8-byte floating-point value (16 bytes per coordinate pair). + * This format preserves numeric values exactly and is more compact than the VelocyPack format used by + * {@link GeoJSONAnalyzer}. (default) + */ + latLngDouble, + + /** + * Store each latitude and longitude value as an 4-byte integer value (8 bytes per coordinate pair). This is the + * most compact format but the precision is limited to approximately 1 to 10 centimeters. + */ + latLngInt, + + /** + * Store each longitude-latitude pair in the native format of Google S2 which is used for geo-spatial + * calculations (24 bytes per coordinate pair). This is not a particular compact format but it reduces the + * number of computations necessary when you execute geo-spatial queries. This format preserves numeric values + * exactly. + */ + s2Point + } +} diff --git a/src/main/java/com/arangodb/internal/velocypack/VPackDeserializers.java b/src/main/java/com/arangodb/internal/velocypack/VPackDeserializers.java index 50c7c07c3..4f64a38a9 100644 --- a/src/main/java/com/arangodb/internal/velocypack/VPackDeserializers.java +++ b/src/main/java/com/arangodb/internal/velocypack/VPackDeserializers.java @@ -88,6 +88,8 @@ public class VPackDeserializers { return context.deserialize(vpack, GeoJSONAnalyzer.class); case geopoint: return context.deserialize(vpack, GeoPointAnalyzer.class); + case geo_s2: + return context.deserialize(vpack, GeoS2Analyzer.class); case segmentation: return context.deserialize(vpack, SegmentationAnalyzer.class); case collation: diff --git a/src/test/java/com/arangodb/ArangoSearchTest.java b/src/test/java/com/arangodb/ArangoSearchTest.java index 52bb94005..bd180de91 100644 --- a/src/test/java/com/arangodb/ArangoSearchTest.java +++ b/src/test/java/com/arangodb/ArangoSearchTest.java @@ -1104,6 +1104,36 @@ void geoJsonAnalyzer(ArangoDatabase db) { } + @ParameterizedTest(name = "{index}") + @MethodSource("dbs") + void geoS2Analyzer(ArangoDatabase db) { + assumeTrue(isEnterprise()); + assumeTrue(isAtLeastVersion(3, 10, 5)); + + GeoAnalyzerOptions options = new GeoAnalyzerOptions(); + options.setMaxLevel(10); + options.setMaxCells(11); + options.setMinLevel(8); + + GeoS2AnalyzerProperties properties = new GeoS2AnalyzerProperties(); + properties.setOptions(options); + properties.setType(GeoS2AnalyzerProperties.GeoS2AnalyzerType.point); + properties.setFormat(GeoS2AnalyzerProperties.GeoS2Format.s2Point); + + Set features = new HashSet<>(); + features.add(AnalyzerFeature.frequency); + features.add(AnalyzerFeature.norm); + features.add(AnalyzerFeature.position); + + GeoS2Analyzer geoS2Analyzer = new GeoS2Analyzer(); + geoS2Analyzer.setName("test-" + UUID.randomUUID()); + geoS2Analyzer.setProperties(properties); + geoS2Analyzer.setFeatures(features); + + createGetAndDeleteTypedAnalyzer(db, geoS2Analyzer); + } + + @ParameterizedTest(name = "{index}") @MethodSource("dbs") void geoPointAnalyzer(ArangoDatabase db) {