Fix semantic highlighting bug on flat quantized fields (#131525)
* Fix semantic highlighting bug on flat quantized fields * Update docs/changelog/131525.yaml
This commit is contained in:
parent
929f65b94c
commit
90699d3cc3
|
@ -0,0 +1,6 @@
|
|||
pr: 131525
|
||||
summary: Fix semantic highlighting bug on flat quantized fields
|
||||
area: Highlighting
|
||||
type: bug
|
||||
issues:
|
||||
- 131443
|
|
@ -42,6 +42,7 @@ public class InferenceFeatures implements FeatureSpecification {
|
|||
);
|
||||
private static final NodeFeature SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER = new NodeFeature("semantic_text.match_all_highlighter");
|
||||
private static final NodeFeature COHERE_V2_API = new NodeFeature("inference.cohere.v2");
|
||||
public static final NodeFeature SEMANTIC_TEXT_HIGHLIGHTING_FLAT = new NodeFeature("semantic_text.highlighter.flat_index_options");
|
||||
|
||||
@Override
|
||||
public Set<NodeFeature> getTestFeatures() {
|
||||
|
@ -72,7 +73,8 @@ public class InferenceFeatures implements FeatureSpecification {
|
|||
SEMANTIC_TEXT_INDEX_OPTIONS,
|
||||
COHERE_V2_API,
|
||||
SEMANTIC_TEXT_INDEX_OPTIONS_WITH_DEFAULTS,
|
||||
SEMANTIC_QUERY_REWRITE_INTERCEPTORS_PROPAGATE_BOOST_AND_QUERY_NAME_FIX
|
||||
SEMANTIC_QUERY_REWRITE_INTERCEPTORS_PROPAGATE_BOOST_AND_QUERY_NAME_FIX,
|
||||
SEMANTIC_TEXT_HIGHLIGHTING_FLAT
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.elasticsearch.search.fetch.subphase.highlight.FieldHighlightContext;
|
|||
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
|
||||
import org.elasticsearch.search.fetch.subphase.highlight.HighlightUtils;
|
||||
import org.elasticsearch.search.fetch.subphase.highlight.Highlighter;
|
||||
import org.elasticsearch.search.vectors.DenseVectorQuery;
|
||||
import org.elasticsearch.search.vectors.SparseVectorQueryWrapper;
|
||||
import org.elasticsearch.search.vectors.VectorData;
|
||||
import org.elasticsearch.xcontent.Text;
|
||||
|
@ -273,6 +274,8 @@ public class SemanticTextHighlighter implements Highlighter {
|
|||
queries.add(fieldType.createExactKnnQuery(VectorData.fromBytes(knnQuery.getTargetCopy()), null));
|
||||
} else if (query instanceof MatchAllDocsQuery) {
|
||||
queries.add(new MatchAllDocsQuery());
|
||||
} else if (query instanceof DenseVectorQuery.Floats floatsQuery) {
|
||||
queries.add(fieldType.createExactKnnQuery(VectorData.fromFloats(floatsQuery.getQuery()), null));
|
||||
}
|
||||
}
|
||||
});
|
||||
|
|
|
@ -35,6 +35,23 @@ setup:
|
|||
}
|
||||
}
|
||||
|
||||
- do:
|
||||
inference.put:
|
||||
task_type: text_embedding
|
||||
inference_id: dense-inference-id-compatible-with-bbq
|
||||
body: >
|
||||
{
|
||||
"service": "text_embedding_test_service",
|
||||
"service_settings": {
|
||||
"model": "my_model",
|
||||
"dimensions": 64,
|
||||
"similarity": "cosine",
|
||||
"api_key": "abc64"
|
||||
},
|
||||
"task_settings": {
|
||||
}
|
||||
}
|
||||
|
||||
- do:
|
||||
indices.create:
|
||||
index: test-sparse-index
|
||||
|
@ -70,7 +87,7 @@ setup:
|
|||
id: doc_1
|
||||
body:
|
||||
title: "Elasticsearch"
|
||||
body: ["ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!"]
|
||||
body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
|
||||
refresh: true
|
||||
|
||||
- do:
|
||||
|
@ -89,14 +106,14 @@ setup:
|
|||
index: test-dense-index
|
||||
body:
|
||||
query:
|
||||
match_all: {}
|
||||
match_all: { }
|
||||
highlight:
|
||||
fields:
|
||||
another_body: {}
|
||||
another_body: { }
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- not_exists: hits.hits.0.highlight.another_body
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- not_exists: hits.hits.0.highlight.another_body
|
||||
|
||||
---
|
||||
"Highlighting using a sparse embedding model":
|
||||
|
@ -114,10 +131,10 @@ setup:
|
|||
type: "semantic"
|
||||
number_of_fragments: 1
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- length: { hits.hits.0.highlight.body: 1 }
|
||||
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
|
||||
- do:
|
||||
search:
|
||||
|
@ -133,11 +150,11 @@ setup:
|
|||
type: "semantic"
|
||||
number_of_fragments: 2
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- length: { hits.hits.0.highlight.body: 2 }
|
||||
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }
|
||||
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }
|
||||
|
||||
- do:
|
||||
search:
|
||||
|
@ -154,10 +171,10 @@ setup:
|
|||
order: "score"
|
||||
number_of_fragments: 1
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- length: { hits.hits.0.highlight.body: 1 }
|
||||
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
|
||||
- do:
|
||||
search:
|
||||
|
@ -196,10 +213,10 @@ setup:
|
|||
type: "semantic"
|
||||
number_of_fragments: 1
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- length: { hits.hits.0.highlight.body: 1 }
|
||||
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
|
||||
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
|
||||
|
||||
- do:
|
||||
search:
|
||||
|
@ -215,11 +232,11 @@ setup:
|
|||
type: "semantic"
|
||||
number_of_fragments: 2
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- length: { hits.hits.0.highlight.body: 2 }
|
||||
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }
|
||||
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }
|
||||
|
||||
- do:
|
||||
search:
|
||||
|
@ -236,10 +253,10 @@ setup:
|
|||
order: "score"
|
||||
number_of_fragments: 1
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- length: { hits.hits.0.highlight.body: 1 }
|
||||
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
|
||||
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
|
||||
|
||||
- do:
|
||||
search:
|
||||
|
@ -256,17 +273,17 @@ setup:
|
|||
order: "score"
|
||||
number_of_fragments: 2
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- length: { hits.hits.0.highlight.body: 2 }
|
||||
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
|
||||
- match: { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
|
||||
- match: { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
|
||||
---
|
||||
"Default highlighter for fields":
|
||||
- requires:
|
||||
cluster_features: "semantic_text.highlighter.default"
|
||||
reason: semantic text field defaults to the semantic highlighter
|
||||
cluster_features: "semantic_text.highlighter.default"
|
||||
reason: semantic text field defaults to the semantic highlighter
|
||||
|
||||
- do:
|
||||
search:
|
||||
|
@ -281,11 +298,11 @@ setup:
|
|||
order: "score"
|
||||
number_of_fragments: 2
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- length: { hits.hits.0.highlight.body: 2 }
|
||||
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
|
||||
- match: { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
|
||||
- match: { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
|
||||
---
|
||||
"semantic highlighter ignores non-inference fields":
|
||||
|
@ -306,8 +323,8 @@ setup:
|
|||
type: semantic
|
||||
number_of_fragments: 2
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- not_exists: hits.hits.0.highlight.title
|
||||
|
||||
---
|
||||
|
@ -333,7 +350,7 @@ setup:
|
|||
index: test-multi-chunk-index
|
||||
id: doc_1
|
||||
body:
|
||||
semantic_text_field: ["some test data", " ", "now with chunks"]
|
||||
semantic_text_field: [ "some test data", " ", "now with chunks" ]
|
||||
refresh: true
|
||||
|
||||
- do:
|
||||
|
@ -367,25 +384,25 @@ setup:
|
|||
index: test-sparse-index
|
||||
body:
|
||||
query:
|
||||
match_all: {}
|
||||
match_all: { }
|
||||
highlight:
|
||||
fields:
|
||||
body:
|
||||
type: "semantic"
|
||||
number_of_fragments: 2
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- length: { hits.hits.0.highlight.body: 2 }
|
||||
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }
|
||||
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test-dense-index
|
||||
body:
|
||||
query:
|
||||
match_all: {}
|
||||
match_all: { }
|
||||
highlight:
|
||||
fields:
|
||||
body:
|
||||
|
@ -432,18 +449,18 @@ setup:
|
|||
index: test-index-sparse
|
||||
body:
|
||||
query:
|
||||
match_all: {}
|
||||
match_all: { }
|
||||
highlight:
|
||||
fields:
|
||||
semantic_text_field:
|
||||
type: "semantic"
|
||||
number_of_fragments: 2
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- length: { hits.hits.0.highlight.semantic_text_field: 2 }
|
||||
- match: { hits.hits.0.highlight.semantic_text_field.0: "some test data" }
|
||||
- match: { hits.hits.0.highlight.semantic_text_field.1: "now with chunks" }
|
||||
- match: { hits.hits.0.highlight.semantic_text_field.0: "some test data" }
|
||||
- match: { hits.hits.0.highlight.semantic_text_field.1: "now with chunks" }
|
||||
|
||||
- do:
|
||||
indices.create:
|
||||
|
@ -473,7 +490,7 @@ setup:
|
|||
index: test-index-dense
|
||||
body:
|
||||
query:
|
||||
match_all: {}
|
||||
match_all: { }
|
||||
highlight:
|
||||
fields:
|
||||
semantic_text_field:
|
||||
|
@ -485,3 +502,172 @@ setup:
|
|||
- length: { hits.hits.0.highlight.semantic_text_field: 2 }
|
||||
- match: { hits.hits.0.highlight.semantic_text_field.0: "some test data" }
|
||||
- match: { hits.hits.0.highlight.semantic_text_field.1: "now with chunks" }
|
||||
|
||||
---
|
||||
"Highlighting with flat quantization index options":
|
||||
- requires:
|
||||
cluster_features: "semantic_text.highlighter.flat_index_options"
|
||||
reason: semantic highlighter fix for flat index options
|
||||
|
||||
- do:
|
||||
indices.create:
|
||||
index: test-dense-index-flat
|
||||
body:
|
||||
settings:
|
||||
index.mapping.semantic_text.use_legacy_format: false
|
||||
mappings:
|
||||
properties:
|
||||
flat_field:
|
||||
type: semantic_text
|
||||
inference_id: dense-inference-id
|
||||
index_options:
|
||||
dense_vector:
|
||||
type: flat
|
||||
int4_flat_field:
|
||||
type: semantic_text
|
||||
inference_id: dense-inference-id
|
||||
index_options:
|
||||
dense_vector:
|
||||
type: int4_flat
|
||||
int8_flat_field:
|
||||
type: semantic_text
|
||||
inference_id: dense-inference-id
|
||||
index_options:
|
||||
dense_vector:
|
||||
type: int8_flat
|
||||
bbq_flat_field:
|
||||
type: semantic_text
|
||||
inference_id: dense-inference-id-compatible-with-bbq
|
||||
index_options:
|
||||
dense_vector:
|
||||
type: bbq_flat
|
||||
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test-dense-index-flat
|
||||
id: doc_1
|
||||
body:
|
||||
flat_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
|
||||
int4_flat_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
|
||||
int8_flat_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
|
||||
bbq_flat_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
|
||||
refresh: true
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test-dense-index-flat
|
||||
body:
|
||||
query:
|
||||
match_all: { }
|
||||
highlight:
|
||||
fields:
|
||||
flat_field:
|
||||
type: "semantic"
|
||||
number_of_fragments: 1
|
||||
int4_flat_field:
|
||||
type: "semantic"
|
||||
number_of_fragments: 1
|
||||
int8_flat_field:
|
||||
type: "semantic"
|
||||
number_of_fragments: 1
|
||||
bbq_flat_field:
|
||||
type: "semantic"
|
||||
number_of_fragments: 1
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- length: { hits.hits.0.highlight: 4 }
|
||||
- length: { hits.hits.0.highlight.flat_field: 1 }
|
||||
- match: { hits.hits.0.highlight.flat_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- length: { hits.hits.0.highlight.int4_flat_field: 1 }
|
||||
- match: { hits.hits.0.highlight.int4_flat_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- length: { hits.hits.0.highlight.int8_flat_field: 1 }
|
||||
- match: { hits.hits.0.highlight.int8_flat_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- length: { hits.hits.0.highlight.bbq_flat_field: 1 }
|
||||
- match: { hits.hits.0.highlight.bbq_flat_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
|
||||
---
|
||||
"Highlighting with HNSW quantization index options":
|
||||
- requires:
|
||||
cluster_features: "semantic_text.highlighter.flat_index_options"
|
||||
reason: semantic highlighter fix for flat index options
|
||||
|
||||
- do:
|
||||
indices.create:
|
||||
index: test-dense-index-hnsw
|
||||
body:
|
||||
settings:
|
||||
index.mapping.semantic_text.use_legacy_format: false
|
||||
mappings:
|
||||
properties:
|
||||
hnsw_field:
|
||||
type: semantic_text
|
||||
inference_id: dense-inference-id
|
||||
index_options:
|
||||
dense_vector:
|
||||
type: hnsw
|
||||
int4_hnsw_field:
|
||||
type: semantic_text
|
||||
inference_id: dense-inference-id
|
||||
index_options:
|
||||
dense_vector:
|
||||
type: int4_hnsw
|
||||
int8_hnsw_field:
|
||||
type: semantic_text
|
||||
inference_id: dense-inference-id
|
||||
index_options:
|
||||
dense_vector:
|
||||
type: int8_hnsw
|
||||
bbq_hnsw_field:
|
||||
type: semantic_text
|
||||
inference_id: dense-inference-id-compatible-with-bbq
|
||||
index_options:
|
||||
dense_vector:
|
||||
type: bbq_hnsw
|
||||
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test-dense-index-hnsw
|
||||
id: doc_1
|
||||
body:
|
||||
hnsw_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
|
||||
int4_hnsw_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
|
||||
int8_hnsw_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
|
||||
bbq_hnsw_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
|
||||
refresh: true
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test-dense-index-hnsw
|
||||
body:
|
||||
query:
|
||||
match_all: { }
|
||||
highlight:
|
||||
fields:
|
||||
hnsw_field:
|
||||
type: "semantic"
|
||||
number_of_fragments: 1
|
||||
int4_hnsw_field:
|
||||
type: "semantic"
|
||||
number_of_fragments: 1
|
||||
int8_hnsw_field:
|
||||
type: "semantic"
|
||||
number_of_fragments: 1
|
||||
bbq_hnsw_field:
|
||||
type: "semantic"
|
||||
number_of_fragments: 1
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- length: { hits.hits.0.highlight: 4 }
|
||||
- length: { hits.hits.0.highlight.hnsw_field: 1 }
|
||||
- match: { hits.hits.0.highlight.hnsw_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- length: { hits.hits.0.highlight.int4_hnsw_field: 1 }
|
||||
- match: { hits.hits.0.highlight.int4_hnsw_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- length: { hits.hits.0.highlight.int8_hnsw_field: 1 }
|
||||
- match: { hits.hits.0.highlight.int8_hnsw_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- length: { hits.hits.0.highlight.bbq_hnsw_field: 1 }
|
||||
- match: { hits.hits.0.highlight.bbq_hnsw_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
|
||||
|
|
|
@ -35,6 +35,23 @@ setup:
|
|||
}
|
||||
}
|
||||
|
||||
- do:
|
||||
inference.put:
|
||||
task_type: text_embedding
|
||||
inference_id: dense-inference-id-compatible-with-bbq
|
||||
body: >
|
||||
{
|
||||
"service": "text_embedding_test_service",
|
||||
"service_settings": {
|
||||
"model": "my_model",
|
||||
"dimensions": 64,
|
||||
"similarity": "cosine",
|
||||
"api_key": "abc64"
|
||||
},
|
||||
"task_settings": {
|
||||
}
|
||||
}
|
||||
|
||||
- do:
|
||||
indices.create:
|
||||
index: test-sparse-index
|
||||
|
@ -65,12 +82,12 @@ setup:
|
|||
---
|
||||
"Highlighting empty field":
|
||||
- do:
|
||||
index:
|
||||
index: test-dense-index
|
||||
id: doc_1
|
||||
body:
|
||||
body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
|
||||
refresh: true
|
||||
index:
|
||||
index: test-dense-index
|
||||
id: doc_1
|
||||
body:
|
||||
body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
|
||||
refresh: true
|
||||
|
||||
- match: { result: created }
|
||||
|
||||
|
@ -79,14 +96,14 @@ setup:
|
|||
index: test-dense-index
|
||||
body:
|
||||
query:
|
||||
match_all: {}
|
||||
match_all: { }
|
||||
highlight:
|
||||
fields:
|
||||
another_body: {}
|
||||
another_body: { }
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- not_exists: hits.hits.0.highlight.another_body
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- not_exists: hits.hits.0.highlight.another_body
|
||||
|
||||
---
|
||||
"Highlighting using a sparse embedding model":
|
||||
|
@ -95,7 +112,7 @@ setup:
|
|||
index: test-sparse-index
|
||||
id: doc_1
|
||||
body:
|
||||
body: ["ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!"]
|
||||
body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
|
||||
refresh: true
|
||||
|
||||
- match: { result: created }
|
||||
|
@ -114,10 +131,10 @@ setup:
|
|||
type: "semantic"
|
||||
number_of_fragments: 1
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- length: { hits.hits.0.highlight.body: 1 }
|
||||
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
|
||||
- do:
|
||||
search:
|
||||
|
@ -133,11 +150,11 @@ setup:
|
|||
type: "semantic"
|
||||
number_of_fragments: 2
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- length: { hits.hits.0.highlight.body: 2 }
|
||||
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }
|
||||
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }
|
||||
|
||||
- do:
|
||||
search:
|
||||
|
@ -154,10 +171,10 @@ setup:
|
|||
order: "score"
|
||||
number_of_fragments: 1
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- length: { hits.hits.0.highlight.body: 1 }
|
||||
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
|
||||
- do:
|
||||
search:
|
||||
|
@ -187,7 +204,7 @@ setup:
|
|||
index: test-dense-index
|
||||
id: doc_1
|
||||
body:
|
||||
body: ["ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!"]
|
||||
body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
|
||||
refresh: true
|
||||
|
||||
- match: { result: created }
|
||||
|
@ -206,10 +223,10 @@ setup:
|
|||
type: "semantic"
|
||||
number_of_fragments: 1
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- length: { hits.hits.0.highlight.body: 1 }
|
||||
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
|
||||
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
|
||||
|
||||
- do:
|
||||
search:
|
||||
|
@ -225,11 +242,11 @@ setup:
|
|||
type: "semantic"
|
||||
number_of_fragments: 2
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- length: { hits.hits.0.highlight.body: 2 }
|
||||
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }
|
||||
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }
|
||||
|
||||
- do:
|
||||
search:
|
||||
|
@ -246,10 +263,10 @@ setup:
|
|||
order: "score"
|
||||
number_of_fragments: 1
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- length: { hits.hits.0.highlight.body: 1 }
|
||||
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
|
||||
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
|
||||
|
||||
- do:
|
||||
search:
|
||||
|
@ -266,11 +283,11 @@ setup:
|
|||
order: "score"
|
||||
number_of_fragments: 2
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- length: { hits.hits.0.highlight.body: 2 }
|
||||
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
|
||||
- match: { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
|
||||
- match: { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
|
||||
---
|
||||
"Highlighting and multi chunks with empty input":
|
||||
|
@ -295,7 +312,7 @@ setup:
|
|||
index: test-multi-chunk-index
|
||||
id: doc_1
|
||||
body:
|
||||
semantic_text_field: ["some test data", " ", "now with chunks"]
|
||||
semantic_text_field: [ "some test data", " ", "now with chunks" ]
|
||||
refresh: true
|
||||
|
||||
- do:
|
||||
|
@ -337,18 +354,18 @@ setup:
|
|||
index: test-sparse-index
|
||||
body:
|
||||
query:
|
||||
match_all: {}
|
||||
match_all: { }
|
||||
highlight:
|
||||
fields:
|
||||
body:
|
||||
type: "semantic"
|
||||
number_of_fragments: 2
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- length: { hits.hits.0.highlight.body: 2 }
|
||||
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }
|
||||
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }
|
||||
|
||||
- do:
|
||||
index:
|
||||
|
@ -363,7 +380,7 @@ setup:
|
|||
index: test-dense-index
|
||||
body:
|
||||
query:
|
||||
match_all: {}
|
||||
match_all: { }
|
||||
highlight:
|
||||
fields:
|
||||
body:
|
||||
|
@ -410,18 +427,18 @@ setup:
|
|||
index: test-index-sparse
|
||||
body:
|
||||
query:
|
||||
match_all: {}
|
||||
match_all: { }
|
||||
highlight:
|
||||
fields:
|
||||
semantic_text_field:
|
||||
type: "semantic"
|
||||
number_of_fragments: 2
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- length: { hits.hits.0.highlight.semantic_text_field: 2 }
|
||||
- match: { hits.hits.0.highlight.semantic_text_field.0: "some test data" }
|
||||
- match: { hits.hits.0.highlight.semantic_text_field.1: "now with chunks" }
|
||||
- match: { hits.hits.0.highlight.semantic_text_field.0: "some test data" }
|
||||
- match: { hits.hits.0.highlight.semantic_text_field.1: "now with chunks" }
|
||||
|
||||
- do:
|
||||
indices.create:
|
||||
|
@ -451,7 +468,7 @@ setup:
|
|||
index: test-index-dense
|
||||
body:
|
||||
query:
|
||||
match_all: {}
|
||||
match_all: { }
|
||||
highlight:
|
||||
fields:
|
||||
semantic_text_field:
|
||||
|
@ -464,3 +481,173 @@ setup:
|
|||
- match: { hits.hits.0.highlight.semantic_text_field.0: "some test data" }
|
||||
- match: { hits.hits.0.highlight.semantic_text_field.1: "now with chunks" }
|
||||
|
||||
---
|
||||
"Highlighting with flat quantization index options":
|
||||
- requires:
|
||||
cluster_features: "semantic_text.highlighter.flat_index_options"
|
||||
reason: semantic highlighter fix for flat index options
|
||||
|
||||
- do:
|
||||
indices.create:
|
||||
index: test-dense-index-flat
|
||||
body:
|
||||
settings:
|
||||
index.mapping.semantic_text.use_legacy_format: true
|
||||
mappings:
|
||||
properties:
|
||||
flat_field:
|
||||
type: semantic_text
|
||||
inference_id: dense-inference-id
|
||||
index_options:
|
||||
dense_vector:
|
||||
type: flat
|
||||
int4_flat_field:
|
||||
type: semantic_text
|
||||
inference_id: dense-inference-id
|
||||
index_options:
|
||||
dense_vector:
|
||||
type: int4_flat
|
||||
int8_flat_field:
|
||||
type: semantic_text
|
||||
inference_id: dense-inference-id
|
||||
index_options:
|
||||
dense_vector:
|
||||
type: int8_flat
|
||||
bbq_flat_field:
|
||||
type: semantic_text
|
||||
inference_id: dense-inference-id-compatible-with-bbq
|
||||
index_options:
|
||||
dense_vector:
|
||||
type: bbq_flat
|
||||
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test-dense-index-flat
|
||||
id: doc_1
|
||||
body:
|
||||
flat_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
|
||||
int4_flat_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
|
||||
int8_flat_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
|
||||
bbq_flat_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
|
||||
refresh: true
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test-dense-index-flat
|
||||
body:
|
||||
query:
|
||||
match_all: { }
|
||||
highlight:
|
||||
fields:
|
||||
flat_field:
|
||||
type: "semantic"
|
||||
number_of_fragments: 1
|
||||
int4_flat_field:
|
||||
type: "semantic"
|
||||
number_of_fragments: 1
|
||||
int8_flat_field:
|
||||
type: "semantic"
|
||||
number_of_fragments: 1
|
||||
bbq_flat_field:
|
||||
type: "semantic"
|
||||
number_of_fragments: 1
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- length: { hits.hits.0.highlight: 4 }
|
||||
- length: { hits.hits.0.highlight.flat_field: 1 }
|
||||
- match: { hits.hits.0.highlight.flat_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- length: { hits.hits.0.highlight.int4_flat_field: 1 }
|
||||
- match: { hits.hits.0.highlight.int4_flat_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- length: { hits.hits.0.highlight.int8_flat_field: 1 }
|
||||
- match: { hits.hits.0.highlight.int8_flat_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- length: { hits.hits.0.highlight.bbq_flat_field: 1 }
|
||||
- match: { hits.hits.0.highlight.bbq_flat_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
|
||||
---
|
||||
"Highlighting with HNSW quantization index options":
|
||||
- requires:
|
||||
cluster_features: "semantic_text.highlighter.flat_index_options"
|
||||
reason: semantic highlighter fix for flat index options
|
||||
|
||||
- do:
|
||||
indices.create:
|
||||
index: test-dense-index-hnsw
|
||||
body:
|
||||
settings:
|
||||
index.mapping.semantic_text.use_legacy_format: true
|
||||
mappings:
|
||||
properties:
|
||||
hnsw_field:
|
||||
type: semantic_text
|
||||
inference_id: dense-inference-id
|
||||
index_options:
|
||||
dense_vector:
|
||||
type: hnsw
|
||||
int4_hnsw_field:
|
||||
type: semantic_text
|
||||
inference_id: dense-inference-id
|
||||
index_options:
|
||||
dense_vector:
|
||||
type: int4_hnsw
|
||||
int8_hnsw_field:
|
||||
type: semantic_text
|
||||
inference_id: dense-inference-id
|
||||
index_options:
|
||||
dense_vector:
|
||||
type: int8_hnsw
|
||||
bbq_hnsw_field:
|
||||
type: semantic_text
|
||||
inference_id: dense-inference-id-compatible-with-bbq
|
||||
index_options:
|
||||
dense_vector:
|
||||
type: bbq_hnsw
|
||||
|
||||
|
||||
- do:
|
||||
index:
|
||||
index: test-dense-index-hnsw
|
||||
id: doc_1
|
||||
body:
|
||||
hnsw_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
|
||||
int4_hnsw_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
|
||||
int8_hnsw_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
|
||||
bbq_hnsw_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
|
||||
refresh: true
|
||||
|
||||
- do:
|
||||
search:
|
||||
index: test-dense-index-hnsw
|
||||
body:
|
||||
query:
|
||||
match_all: { }
|
||||
highlight:
|
||||
fields:
|
||||
hnsw_field:
|
||||
type: "semantic"
|
||||
number_of_fragments: 1
|
||||
int4_hnsw_field:
|
||||
type: "semantic"
|
||||
number_of_fragments: 1
|
||||
int8_hnsw_field:
|
||||
type: "semantic"
|
||||
number_of_fragments: 1
|
||||
bbq_hnsw_field:
|
||||
type: "semantic"
|
||||
number_of_fragments: 1
|
||||
|
||||
- match: { hits.total.value: 1 }
|
||||
- match: { hits.hits.0._id: "doc_1" }
|
||||
- length: { hits.hits.0.highlight: 4 }
|
||||
- length: { hits.hits.0.highlight.hnsw_field: 1 }
|
||||
- match: { hits.hits.0.highlight.hnsw_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- length: { hits.hits.0.highlight.int4_hnsw_field: 1 }
|
||||
- match: { hits.hits.0.highlight.int4_hnsw_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- length: { hits.hits.0.highlight.int8_hnsw_field: 1 }
|
||||
- match: { hits.hits.0.highlight.int8_hnsw_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
- length: { hits.hits.0.highlight.bbq_hnsw_field: 1 }
|
||||
- match: { hits.hits.0.highlight.bbq_hnsw_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
|
||||
|
||||
|
||||
|
||||
|
|
Loading…
Reference in New Issue