Skip to content

[DE-567] geo_s2 analyzer #502

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ public enum AnalyzerType {
aql,
geojson,
geopoint,
geo_s2,
segmentation,
collation,
classification,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/*
* DISCLAIMER
*
* Copyright 2016 ArangoDB GmbH, Cologne, Germany
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Copyright holder is ArangoDB GmbH, Cologne, Germany
*/


package com.arangodb.entity.arangosearch.analyzer;

import com.arangodb.entity.arangosearch.AnalyzerType;

import java.util.Objects;

/**
* An Analyzer capable of breaking up a GeoJSON object or coordinate array in [longitude, latitude] order into a set of
* indexable tokens for further usage with ArangoSearch Geo functions.
* <p>
* The Analyzer is similar to {@link GeoJSONAnalyzer}, but it internally uses a format for storing the geo-spatial data
* that is more efficient. You can choose between different formats to make a tradeoff between the size on disk, the
* precision, and query performance.
*
* @author Michele Rastelli
* @see <a href= "https://www.arangodb.com/docs/stable/analyzers.html#geo_s2">API Documentation</a>
* @since ArangoDB 3.10.5
*/
public final class GeoS2Analyzer extends SearchAnalyzer {
private GeoS2AnalyzerProperties properties;

public GeoS2Analyzer() {
setType(AnalyzerType.geo_s2);
}

public GeoS2AnalyzerProperties getProperties() {
return properties;
}

public void setProperties(GeoS2AnalyzerProperties properties) {
this.properties = properties;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
GeoS2Analyzer that = (GeoS2Analyzer) o;
return Objects.equals(properties, that.properties);
}

@Override
public int hashCode() {
return Objects.hash(super.hashCode(), properties);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
/*
* DISCLAIMER
*
* Copyright 2016 ArangoDB GmbH, Cologne, Germany
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Copyright holder is ArangoDB GmbH, Cologne, Germany
*/

package com.arangodb.entity.arangosearch.analyzer;


import java.util.Objects;

/**
* @author Michele Rastelli
*/
public final class GeoS2AnalyzerProperties {

private GeoS2AnalyzerType type;
private GeoAnalyzerOptions options;
private GeoS2Format format;

public GeoS2AnalyzerType getType() {
return type;
}

public void setType(GeoS2AnalyzerType type) {
this.type = type;
}

/**
* @return Options for fine-tuning geo queries {@link GeoS2AnalyzerProperties}. These options should generally
* remain unchanged.
*/
public GeoAnalyzerOptions getOptions() {
return options;
}

public void setOptions(GeoAnalyzerOptions options) {
this.options = options;
}

/**
* @return The internal binary representation to use for storing the geo-spatial data in an index.
*/
public GeoS2Format getFormat() {
return format;
}

public void setFormat(GeoS2Format format) {
this.format = format;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
GeoS2AnalyzerProperties that = (GeoS2AnalyzerProperties) o;
return type == that.type && Objects.equals(options, that.options) && format == that.format;
}

@Override
public int hashCode() {
return Objects.hash(type, options, format);
}

public enum GeoS2AnalyzerType {

/**
* (default): index all GeoJSON geometry types (Point, Polygon etc.)
*/
shape,

/**
* compute and only index the centroid of the input geometry
*/
centroid,

/**
* only index GeoJSON objects of type Point, ignore all other geometry types
*/
point
}

public enum GeoS2Format {
/**
* Store each latitude and longitude value as an 8-byte floating-point value (16 bytes per coordinate pair).
* This format preserves numeric values exactly and is more compact than the VelocyPack format used by
* {@link GeoJSONAnalyzer}. (default)
*/
latLngDouble,

/**
* Store each latitude and longitude value as an 4-byte integer value (8 bytes per coordinate pair). This is the
* most compact format but the precision is limited to approximately 1 to 10 centimeters.
*/
latLngInt,

/**
* Store each longitude-latitude pair in the native format of Google S2 which is used for geo-spatial
* calculations (24 bytes per coordinate pair). This is not a particular compact format but it reduces the
* number of computations necessary when you execute geo-spatial queries. This format preserves numeric values
* exactly.
*/
s2Point
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,8 @@ public class VPackDeserializers {
return context.deserialize(vpack, GeoJSONAnalyzer.class);
case geopoint:
return context.deserialize(vpack, GeoPointAnalyzer.class);
case geo_s2:
return context.deserialize(vpack, GeoS2Analyzer.class);
case segmentation:
return context.deserialize(vpack, SegmentationAnalyzer.class);
case collation:
Expand Down
30 changes: 30 additions & 0 deletions src/test/java/com/arangodb/ArangoSearchTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -1104,6 +1104,36 @@ void geoJsonAnalyzer(ArangoDatabase db) {
}


@ParameterizedTest(name = "{index}")
@MethodSource("dbs")
void geoS2Analyzer(ArangoDatabase db) {
assumeTrue(isEnterprise());
assumeTrue(isAtLeastVersion(3, 10, 5));

GeoAnalyzerOptions options = new GeoAnalyzerOptions();
options.setMaxLevel(10);
options.setMaxCells(11);
options.setMinLevel(8);

GeoS2AnalyzerProperties properties = new GeoS2AnalyzerProperties();
properties.setOptions(options);
properties.setType(GeoS2AnalyzerProperties.GeoS2AnalyzerType.point);
properties.setFormat(GeoS2AnalyzerProperties.GeoS2Format.s2Point);

Set<AnalyzerFeature> features = new HashSet<>();
features.add(AnalyzerFeature.frequency);
features.add(AnalyzerFeature.norm);
features.add(AnalyzerFeature.position);

GeoS2Analyzer geoS2Analyzer = new GeoS2Analyzer();
geoS2Analyzer.setName("test-" + UUID.randomUUID());
geoS2Analyzer.setProperties(properties);
geoS2Analyzer.setFeatures(features);

createGetAndDeleteTypedAnalyzer(db, geoS2Analyzer);
}


@ParameterizedTest(name = "{index}")
@MethodSource("dbs")
void geoPointAnalyzer(ArangoDatabase db) {
Expand Down