Skip to content

[DE-385] 3.10 analyzers #458

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Sep 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added docker/foo.bin
Binary file not shown.
7 changes: 7 additions & 0 deletions docker/start_db.sh
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,13 @@ for a in ${COORDINATORS[*]} ; do
curl -u root:test --insecure --fail "$SCHEME://$a/_api/version"
done

echo ""
echo ""
echo "Copying test ML models into containers..."
for c in $(docker ps -a -f name=adb-.* -q) ; do
docker cp "$LOCATION"/foo.bin "$c":/tmp
done

echo ""
echo ""
echo "Done, your deployment is reachable at: "
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/com/arangodb/entity/InvertedIndexField.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ public class InvertedIndexField implements Entity {
private Boolean searchField;
private Boolean trackListPositions;
private final Set<AnalyzerFeature> features = new HashSet<>();
private final Collection<InvertedIndexField> nested = new ArrayList<>();
private Collection<InvertedIndexField> nested;

public String getName() {
return name;
Expand Down Expand Up @@ -79,6 +79,7 @@ public Collection<InvertedIndexField> getNested() {
}

public InvertedIndexField nested(InvertedIndexField... nested) {
if(this.nested == null) this.nested = new ArrayList<>();
Collections.addAll(this.nested, nested);
return this;
}
Expand Down
17 changes: 16 additions & 1 deletion src/main/java/com/arangodb/entity/arangosearch/AnalyzerType.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,20 @@
* @author Michele Rastelli
*/
public enum AnalyzerType {
identity, delimiter, stem, norm, ngram, text, pipeline, stopwords, aql, geojson, geopoint, segmentation, collation
identity,
delimiter,
stem,
norm,
ngram,
text,
pipeline,
stopwords,
aql,
geojson,
geopoint,
segmentation,
collation,
classification,
nearest_neighbors,
minhash
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* DISCLAIMER
*
* Copyright 2016 ArangoDB GmbH, Cologne, Germany
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Copyright holder is ArangoDB GmbH, Cologne, Germany
*/

package com.arangodb.entity.arangosearch.analyzer;


import com.arangodb.entity.arangosearch.AnalyzerType;

import java.util.Objects;

/**
* An Analyzer capable of classifying tokens in the input text. It applies a user-provided supervised fastText word
* embedding model to classify the input text. It is able to classify individual tokens as well as entire inputs.
*
* @author Michele Rastelli
* @see <a href= "https://www.arangodb.com/docs/stable/analyzers.html#classification">API Documentation</a>
* @since ArangoDB 3.10
*/
public class ClassificationAnalyzer extends SearchAnalyzer {
public ClassificationAnalyzer() {
setType(AnalyzerType.classification);
}

private ClassificationAnalyzerProperties properties;

public ClassificationAnalyzerProperties getProperties() {
return properties;
}

public void setProperties(ClassificationAnalyzerProperties properties) {
this.properties = properties;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
ClassificationAnalyzer that = (ClassificationAnalyzer) o;
return Objects.equals(properties, that.properties);
}

@Override
public int hashCode() {
return Objects.hash(super.hashCode(), properties);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
/*
* DISCLAIMER
*
* Copyright 2016 ArangoDB GmbH, Cologne, Germany
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Copyright holder is ArangoDB GmbH, Cologne, Germany
*/

package com.arangodb.entity.arangosearch.analyzer;


import com.arangodb.velocypack.annotations.SerializedName;

import java.util.Objects;

/**
* @author Michele Rastelli
* @since ArangoDB 3.10
*/
public class ClassificationAnalyzerProperties {

@SerializedName("model_location")
private String modelLocation;

@SerializedName("top_k")
private Integer topK;

private Double threshold;

public String getModelLocation() {
return modelLocation;
}

public void setModelLocation(String modelLocation) {
this.modelLocation = modelLocation;
}

public Integer getTopK() {
return topK;
}

public void setTopK(Integer topK) {
this.topK = topK;
}

public Double getThreshold() {
return threshold;
}

public void setThreshold(Double threshold) {
this.threshold = threshold;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
ClassificationAnalyzerProperties that = (ClassificationAnalyzerProperties) o;
return Objects.equals(modelLocation, that.modelLocation) && Objects.equals(topK, that.topK) && Objects.equals(threshold, that.threshold);
}

@Override
public int hashCode() {
return Objects.hash(modelLocation, topK, threshold);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* DISCLAIMER
*
* Copyright 2016 ArangoDB GmbH, Cologne, Germany
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Copyright holder is ArangoDB GmbH, Cologne, Germany
*/

package com.arangodb.entity.arangosearch.analyzer;


import com.arangodb.entity.arangosearch.AnalyzerType;

import java.util.Objects;

/**
* An Analyzer that computes so called MinHash signatures using a locality-sensitive hash function. It applies an
* Analyzer of your choice before the hashing, for example, to break up text into words.
*
* @author Michele Rastelli
* @see <a href= "https://www.arangodb.com/docs/stable/analyzers.html#minhash">API Documentation</a>
* @since ArangoDB 3.10
*/
public class MinHashAnalyzer extends SearchAnalyzer {
public MinHashAnalyzer() {
setType(AnalyzerType.minhash);
}

private MinHashAnalyzerProperties properties;

public MinHashAnalyzerProperties getProperties() {
return properties;
}

public void setProperties(MinHashAnalyzerProperties properties) {
this.properties = properties;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
MinHashAnalyzer that = (MinHashAnalyzer) o;
return Objects.equals(properties, that.properties);
}

@Override
public int hashCode() {
return Objects.hash(super.hashCode(), properties);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
/*
* DISCLAIMER
*
* Copyright 2016 ArangoDB GmbH, Cologne, Germany
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Copyright holder is ArangoDB GmbH, Cologne, Germany
*/

package com.arangodb.entity.arangosearch.analyzer;


import java.util.Objects;

/**
* @author Michele Rastelli
* @since ArangoDB 3.10
*/
public class MinHashAnalyzerProperties {

private SearchAnalyzer analyzer;
private Integer numHashes;

public SearchAnalyzer getAnalyzer() {
return analyzer;
}

public void setAnalyzer(SearchAnalyzer analyzer) {
this.analyzer = analyzer;
}

public Integer getNumHashes() {
return numHashes;
}

public void setNumHashes(Integer numHashes) {
this.numHashes = numHashes;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
MinHashAnalyzerProperties that = (MinHashAnalyzerProperties) o;
return Objects.equals(analyzer, that.analyzer) && Objects.equals(numHashes, that.numHashes);
}

@Override
public int hashCode() {
return Objects.hash(analyzer, numHashes);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
/*
* DISCLAIMER
*
* Copyright 2016 ArangoDB GmbH, Cologne, Germany
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Copyright holder is ArangoDB GmbH, Cologne, Germany
*/

package com.arangodb.entity.arangosearch.analyzer;


import com.arangodb.entity.arangosearch.AnalyzerType;

import java.util.Objects;

/**
* An Analyzer capable of finding nearest neighbors of tokens in the input. It applies a user-provided supervised
* fastText word embedding model to retrieve nearest neighbor tokens in the text. It is able to find neighbors of
* individual tokens as well as entire input strings. For entire input strings, the Analyzer will return nearest
* neighbors for each token within the input string.
*
* @author Michele Rastelli
* @see <a href= "https://www.arangodb.com/docs/stable/analyzers.html#nearest_neighbors">API Documentation</a>
* @since ArangoDB 3.10
*/
public class NearestNeighborsAnalyzer extends SearchAnalyzer {
public NearestNeighborsAnalyzer() {
setType(AnalyzerType.nearest_neighbors);
}

private NearestNeighborsAnalyzerProperties properties;

public NearestNeighborsAnalyzerProperties getProperties() {
return properties;
}

public void setProperties(NearestNeighborsAnalyzerProperties properties) {
this.properties = properties;
}

@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
NearestNeighborsAnalyzer that = (NearestNeighborsAnalyzer) o;
return Objects.equals(properties, that.properties);
}

@Override
public int hashCode() {
return Objects.hash(super.hashCode(), properties);
}
}
Loading