Fix semantic highlighting bug on flat quantized fields (#131525)

* Fix semantic highlighting bug on flat quantized fields

* Update docs/changelog/131525.yaml
This commit is contained in:
Kathleen DeRusso 2025-07-18 11:27:34 -04:00 committed by GitHub
parent 929f65b94c
commit 90699d3cc3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
5 changed files with 486 additions and 102 deletions

View File

@ -0,0 +1,6 @@
pr: 131525
summary: Fix semantic highlighting bug on flat quantized fields
area: Highlighting
type: bug
issues:
- 131443

View File

@ -42,6 +42,7 @@ public class InferenceFeatures implements FeatureSpecification {
);
private static final NodeFeature SEMANTIC_TEXT_MATCH_ALL_HIGHLIGHTER = new NodeFeature("semantic_text.match_all_highlighter");
private static final NodeFeature COHERE_V2_API = new NodeFeature("inference.cohere.v2");
public static final NodeFeature SEMANTIC_TEXT_HIGHLIGHTING_FLAT = new NodeFeature("semantic_text.highlighter.flat_index_options");
@Override
public Set<NodeFeature> getTestFeatures() {
@ -72,7 +73,8 @@ public class InferenceFeatures implements FeatureSpecification {
SEMANTIC_TEXT_INDEX_OPTIONS,
COHERE_V2_API,
SEMANTIC_TEXT_INDEX_OPTIONS_WITH_DEFAULTS,
SEMANTIC_QUERY_REWRITE_INTERCEPTORS_PROPAGATE_BOOST_AND_QUERY_NAME_FIX
SEMANTIC_QUERY_REWRITE_INTERCEPTORS_PROPAGATE_BOOST_AND_QUERY_NAME_FIX,
SEMANTIC_TEXT_HIGHLIGHTING_FLAT
);
}
}

View File

@ -32,6 +32,7 @@ import org.elasticsearch.search.fetch.subphase.highlight.FieldHighlightContext;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightField;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightUtils;
import org.elasticsearch.search.fetch.subphase.highlight.Highlighter;
import org.elasticsearch.search.vectors.DenseVectorQuery;
import org.elasticsearch.search.vectors.SparseVectorQueryWrapper;
import org.elasticsearch.search.vectors.VectorData;
import org.elasticsearch.xcontent.Text;
@ -273,6 +274,8 @@ public class SemanticTextHighlighter implements Highlighter {
queries.add(fieldType.createExactKnnQuery(VectorData.fromBytes(knnQuery.getTargetCopy()), null));
} else if (query instanceof MatchAllDocsQuery) {
queries.add(new MatchAllDocsQuery());
} else if (query instanceof DenseVectorQuery.Floats floatsQuery) {
queries.add(fieldType.createExactKnnQuery(VectorData.fromFloats(floatsQuery.getQuery()), null));
}
}
});

View File

@ -35,6 +35,23 @@ setup:
}
}
- do:
inference.put:
task_type: text_embedding
inference_id: dense-inference-id-compatible-with-bbq
body: >
{
"service": "text_embedding_test_service",
"service_settings": {
"model": "my_model",
"dimensions": 64,
"similarity": "cosine",
"api_key": "abc64"
},
"task_settings": {
}
}
- do:
indices.create:
index: test-sparse-index
@ -70,7 +87,7 @@ setup:
id: doc_1
body:
title: "Elasticsearch"
body: ["ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!"]
body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
refresh: true
- do:
@ -89,14 +106,14 @@ setup:
index: test-dense-index
body:
query:
match_all: {}
match_all: { }
highlight:
fields:
another_body: {}
another_body: { }
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- not_exists: hits.hits.0.highlight.another_body
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- not_exists: hits.hits.0.highlight.another_body
---
"Highlighting using a sparse embedding model":
@ -114,10 +131,10 @@ setup:
type: "semantic"
number_of_fragments: 1
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- length: { hits.hits.0.highlight.body: 1 }
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- do:
search:
@ -133,11 +150,11 @@ setup:
type: "semantic"
number_of_fragments: 2
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- length: { hits.hits.0.highlight.body: 2 }
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }
- do:
search:
@ -154,10 +171,10 @@ setup:
order: "score"
number_of_fragments: 1
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- length: { hits.hits.0.highlight.body: 1 }
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- do:
search:
@ -196,10 +213,10 @@ setup:
type: "semantic"
number_of_fragments: 1
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- length: { hits.hits.0.highlight.body: 1 }
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
- do:
search:
@ -215,11 +232,11 @@ setup:
type: "semantic"
number_of_fragments: 2
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- length: { hits.hits.0.highlight.body: 2 }
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }
- do:
search:
@ -236,10 +253,10 @@ setup:
order: "score"
number_of_fragments: 1
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- length: { hits.hits.0.highlight.body: 1 }
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
- do:
search:
@ -256,17 +273,17 @@ setup:
order: "score"
number_of_fragments: 2
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- length: { hits.hits.0.highlight.body: 2 }
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
- match: { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
- match: { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
---
"Default highlighter for fields":
- requires:
cluster_features: "semantic_text.highlighter.default"
reason: semantic text field defaults to the semantic highlighter
cluster_features: "semantic_text.highlighter.default"
reason: semantic text field defaults to the semantic highlighter
- do:
search:
@ -281,11 +298,11 @@ setup:
order: "score"
number_of_fragments: 2
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- length: { hits.hits.0.highlight.body: 2 }
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
- match: { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
- match: { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
---
"semantic highlighter ignores non-inference fields":
@ -306,8 +323,8 @@ setup:
type: semantic
number_of_fragments: 2
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- not_exists: hits.hits.0.highlight.title
---
@ -333,7 +350,7 @@ setup:
index: test-multi-chunk-index
id: doc_1
body:
semantic_text_field: ["some test data", " ", "now with chunks"]
semantic_text_field: [ "some test data", " ", "now with chunks" ]
refresh: true
- do:
@ -367,25 +384,25 @@ setup:
index: test-sparse-index
body:
query:
match_all: {}
match_all: { }
highlight:
fields:
body:
type: "semantic"
number_of_fragments: 2
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- length: { hits.hits.0.highlight.body: 2 }
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }
- do:
search:
index: test-dense-index
body:
query:
match_all: {}
match_all: { }
highlight:
fields:
body:
@ -432,18 +449,18 @@ setup:
index: test-index-sparse
body:
query:
match_all: {}
match_all: { }
highlight:
fields:
semantic_text_field:
type: "semantic"
number_of_fragments: 2
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- length: { hits.hits.0.highlight.semantic_text_field: 2 }
- match: { hits.hits.0.highlight.semantic_text_field.0: "some test data" }
- match: { hits.hits.0.highlight.semantic_text_field.1: "now with chunks" }
- match: { hits.hits.0.highlight.semantic_text_field.0: "some test data" }
- match: { hits.hits.0.highlight.semantic_text_field.1: "now with chunks" }
- do:
indices.create:
@ -473,7 +490,7 @@ setup:
index: test-index-dense
body:
query:
match_all: {}
match_all: { }
highlight:
fields:
semantic_text_field:
@ -485,3 +502,172 @@ setup:
- length: { hits.hits.0.highlight.semantic_text_field: 2 }
- match: { hits.hits.0.highlight.semantic_text_field.0: "some test data" }
- match: { hits.hits.0.highlight.semantic_text_field.1: "now with chunks" }
---
"Highlighting with flat quantization index options":
- requires:
cluster_features: "semantic_text.highlighter.flat_index_options"
reason: semantic highlighter fix for flat index options
- do:
indices.create:
index: test-dense-index-flat
body:
settings:
index.mapping.semantic_text.use_legacy_format: false
mappings:
properties:
flat_field:
type: semantic_text
inference_id: dense-inference-id
index_options:
dense_vector:
type: flat
int4_flat_field:
type: semantic_text
inference_id: dense-inference-id
index_options:
dense_vector:
type: int4_flat
int8_flat_field:
type: semantic_text
inference_id: dense-inference-id
index_options:
dense_vector:
type: int8_flat
bbq_flat_field:
type: semantic_text
inference_id: dense-inference-id-compatible-with-bbq
index_options:
dense_vector:
type: bbq_flat
- do:
index:
index: test-dense-index-flat
id: doc_1
body:
flat_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
int4_flat_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
int8_flat_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
bbq_flat_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
refresh: true
- do:
search:
index: test-dense-index-flat
body:
query:
match_all: { }
highlight:
fields:
flat_field:
type: "semantic"
number_of_fragments: 1
int4_flat_field:
type: "semantic"
number_of_fragments: 1
int8_flat_field:
type: "semantic"
number_of_fragments: 1
bbq_flat_field:
type: "semantic"
number_of_fragments: 1
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- length: { hits.hits.0.highlight: 4 }
- length: { hits.hits.0.highlight.flat_field: 1 }
- match: { hits.hits.0.highlight.flat_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- length: { hits.hits.0.highlight.int4_flat_field: 1 }
- match: { hits.hits.0.highlight.int4_flat_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- length: { hits.hits.0.highlight.int8_flat_field: 1 }
- match: { hits.hits.0.highlight.int8_flat_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- length: { hits.hits.0.highlight.bbq_flat_field: 1 }
- match: { hits.hits.0.highlight.bbq_flat_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
---
"Highlighting with HNSW quantization index options":
- requires:
cluster_features: "semantic_text.highlighter.flat_index_options"
reason: semantic highlighter fix for flat index options
- do:
indices.create:
index: test-dense-index-hnsw
body:
settings:
index.mapping.semantic_text.use_legacy_format: false
mappings:
properties:
hnsw_field:
type: semantic_text
inference_id: dense-inference-id
index_options:
dense_vector:
type: hnsw
int4_hnsw_field:
type: semantic_text
inference_id: dense-inference-id
index_options:
dense_vector:
type: int4_hnsw
int8_hnsw_field:
type: semantic_text
inference_id: dense-inference-id
index_options:
dense_vector:
type: int8_hnsw
bbq_hnsw_field:
type: semantic_text
inference_id: dense-inference-id-compatible-with-bbq
index_options:
dense_vector:
type: bbq_hnsw
- do:
index:
index: test-dense-index-hnsw
id: doc_1
body:
hnsw_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
int4_hnsw_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
int8_hnsw_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
bbq_hnsw_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
refresh: true
- do:
search:
index: test-dense-index-hnsw
body:
query:
match_all: { }
highlight:
fields:
hnsw_field:
type: "semantic"
number_of_fragments: 1
int4_hnsw_field:
type: "semantic"
number_of_fragments: 1
int8_hnsw_field:
type: "semantic"
number_of_fragments: 1
bbq_hnsw_field:
type: "semantic"
number_of_fragments: 1
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- length: { hits.hits.0.highlight: 4 }
- length: { hits.hits.0.highlight.hnsw_field: 1 }
- match: { hits.hits.0.highlight.hnsw_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- length: { hits.hits.0.highlight.int4_hnsw_field: 1 }
- match: { hits.hits.0.highlight.int4_hnsw_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- length: { hits.hits.0.highlight.int8_hnsw_field: 1 }
- match: { hits.hits.0.highlight.int8_hnsw_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- length: { hits.hits.0.highlight.bbq_hnsw_field: 1 }
- match: { hits.hits.0.highlight.bbq_hnsw_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }

View File

@ -35,6 +35,23 @@ setup:
}
}
- do:
inference.put:
task_type: text_embedding
inference_id: dense-inference-id-compatible-with-bbq
body: >
{
"service": "text_embedding_test_service",
"service_settings": {
"model": "my_model",
"dimensions": 64,
"similarity": "cosine",
"api_key": "abc64"
},
"task_settings": {
}
}
- do:
indices.create:
index: test-sparse-index
@ -65,12 +82,12 @@ setup:
---
"Highlighting empty field":
- do:
index:
index: test-dense-index
id: doc_1
body:
body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
refresh: true
index:
index: test-dense-index
id: doc_1
body:
body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
refresh: true
- match: { result: created }
@ -79,14 +96,14 @@ setup:
index: test-dense-index
body:
query:
match_all: {}
match_all: { }
highlight:
fields:
another_body: {}
another_body: { }
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- not_exists: hits.hits.0.highlight.another_body
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- not_exists: hits.hits.0.highlight.another_body
---
"Highlighting using a sparse embedding model":
@ -95,7 +112,7 @@ setup:
index: test-sparse-index
id: doc_1
body:
body: ["ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!"]
body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
refresh: true
- match: { result: created }
@ -114,10 +131,10 @@ setup:
type: "semantic"
number_of_fragments: 1
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- length: { hits.hits.0.highlight.body: 1 }
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- do:
search:
@ -133,11 +150,11 @@ setup:
type: "semantic"
number_of_fragments: 2
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- length: { hits.hits.0.highlight.body: 2 }
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }
- do:
search:
@ -154,10 +171,10 @@ setup:
order: "score"
number_of_fragments: 1
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- length: { hits.hits.0.highlight.body: 1 }
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- do:
search:
@ -187,7 +204,7 @@ setup:
index: test-dense-index
id: doc_1
body:
body: ["ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!"]
body: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
refresh: true
- match: { result: created }
@ -206,10 +223,10 @@ setup:
type: "semantic"
number_of_fragments: 1
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- length: { hits.hits.0.highlight.body: 1 }
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
- do:
search:
@ -225,11 +242,11 @@ setup:
type: "semantic"
number_of_fragments: 2
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- length: { hits.hits.0.highlight.body: 2 }
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }
- do:
search:
@ -246,10 +263,10 @@ setup:
order: "score"
number_of_fragments: 1
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- length: { hits.hits.0.highlight.body: 1 }
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
- do:
search:
@ -266,11 +283,11 @@ setup:
order: "score"
number_of_fragments: 2
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- length: { hits.hits.0.highlight.body: 2 }
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
- match: { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- match: { hits.hits.0.highlight.body.0: "You Know, for Search!" }
- match: { hits.hits.0.highlight.body.1: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
---
"Highlighting and multi chunks with empty input":
@ -295,7 +312,7 @@ setup:
index: test-multi-chunk-index
id: doc_1
body:
semantic_text_field: ["some test data", " ", "now with chunks"]
semantic_text_field: [ "some test data", " ", "now with chunks" ]
refresh: true
- do:
@ -337,18 +354,18 @@ setup:
index: test-sparse-index
body:
query:
match_all: {}
match_all: { }
highlight:
fields:
body:
type: "semantic"
number_of_fragments: 2
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- length: { hits.hits.0.highlight.body: 2 }
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }
- match: { hits.hits.0.highlight.body.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- match: { hits.hits.0.highlight.body.1: "You Know, for Search!" }
- do:
index:
@ -363,7 +380,7 @@ setup:
index: test-dense-index
body:
query:
match_all: {}
match_all: { }
highlight:
fields:
body:
@ -410,18 +427,18 @@ setup:
index: test-index-sparse
body:
query:
match_all: {}
match_all: { }
highlight:
fields:
semantic_text_field:
type: "semantic"
number_of_fragments: 2
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- length: { hits.hits.0.highlight.semantic_text_field: 2 }
- match: { hits.hits.0.highlight.semantic_text_field.0: "some test data" }
- match: { hits.hits.0.highlight.semantic_text_field.1: "now with chunks" }
- match: { hits.hits.0.highlight.semantic_text_field.0: "some test data" }
- match: { hits.hits.0.highlight.semantic_text_field.1: "now with chunks" }
- do:
indices.create:
@ -451,7 +468,7 @@ setup:
index: test-index-dense
body:
query:
match_all: {}
match_all: { }
highlight:
fields:
semantic_text_field:
@ -464,3 +481,173 @@ setup:
- match: { hits.hits.0.highlight.semantic_text_field.0: "some test data" }
- match: { hits.hits.0.highlight.semantic_text_field.1: "now with chunks" }
---
"Highlighting with flat quantization index options":
- requires:
cluster_features: "semantic_text.highlighter.flat_index_options"
reason: semantic highlighter fix for flat index options
- do:
indices.create:
index: test-dense-index-flat
body:
settings:
index.mapping.semantic_text.use_legacy_format: true
mappings:
properties:
flat_field:
type: semantic_text
inference_id: dense-inference-id
index_options:
dense_vector:
type: flat
int4_flat_field:
type: semantic_text
inference_id: dense-inference-id
index_options:
dense_vector:
type: int4_flat
int8_flat_field:
type: semantic_text
inference_id: dense-inference-id
index_options:
dense_vector:
type: int8_flat
bbq_flat_field:
type: semantic_text
inference_id: dense-inference-id-compatible-with-bbq
index_options:
dense_vector:
type: bbq_flat
- do:
index:
index: test-dense-index-flat
id: doc_1
body:
flat_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
int4_flat_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
int8_flat_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
bbq_flat_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
refresh: true
- do:
search:
index: test-dense-index-flat
body:
query:
match_all: { }
highlight:
fields:
flat_field:
type: "semantic"
number_of_fragments: 1
int4_flat_field:
type: "semantic"
number_of_fragments: 1
int8_flat_field:
type: "semantic"
number_of_fragments: 1
bbq_flat_field:
type: "semantic"
number_of_fragments: 1
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- length: { hits.hits.0.highlight: 4 }
- length: { hits.hits.0.highlight.flat_field: 1 }
- match: { hits.hits.0.highlight.flat_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- length: { hits.hits.0.highlight.int4_flat_field: 1 }
- match: { hits.hits.0.highlight.int4_flat_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- length: { hits.hits.0.highlight.int8_flat_field: 1 }
- match: { hits.hits.0.highlight.int8_flat_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- length: { hits.hits.0.highlight.bbq_flat_field: 1 }
- match: { hits.hits.0.highlight.bbq_flat_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
---
"Highlighting with HNSW quantization index options":
- requires:
cluster_features: "semantic_text.highlighter.flat_index_options"
reason: semantic highlighter fix for flat index options
- do:
indices.create:
index: test-dense-index-hnsw
body:
settings:
index.mapping.semantic_text.use_legacy_format: true
mappings:
properties:
hnsw_field:
type: semantic_text
inference_id: dense-inference-id
index_options:
dense_vector:
type: hnsw
int4_hnsw_field:
type: semantic_text
inference_id: dense-inference-id
index_options:
dense_vector:
type: int4_hnsw
int8_hnsw_field:
type: semantic_text
inference_id: dense-inference-id
index_options:
dense_vector:
type: int8_hnsw
bbq_hnsw_field:
type: semantic_text
inference_id: dense-inference-id-compatible-with-bbq
index_options:
dense_vector:
type: bbq_hnsw
- do:
index:
index: test-dense-index-hnsw
id: doc_1
body:
hnsw_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
int4_hnsw_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
int8_hnsw_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
bbq_hnsw_field: [ "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides.", "You Know, for Search!" ]
refresh: true
- do:
search:
index: test-dense-index-hnsw
body:
query:
match_all: { }
highlight:
fields:
hnsw_field:
type: "semantic"
number_of_fragments: 1
int4_hnsw_field:
type: "semantic"
number_of_fragments: 1
int8_hnsw_field:
type: "semantic"
number_of_fragments: 1
bbq_hnsw_field:
type: "semantic"
number_of_fragments: 1
- match: { hits.total.value: 1 }
- match: { hits.hits.0._id: "doc_1" }
- length: { hits.hits.0.highlight: 4 }
- length: { hits.hits.0.highlight.hnsw_field: 1 }
- match: { hits.hits.0.highlight.hnsw_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- length: { hits.hits.0.highlight.int4_hnsw_field: 1 }
- match: { hits.hits.0.highlight.int4_hnsw_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- length: { hits.hits.0.highlight.int8_hnsw_field: 1 }
- match: { hits.hits.0.highlight.int8_hnsw_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }
- length: { hits.hits.0.highlight.bbq_hnsw_field: 1 }
- match: { hits.hits.0.highlight.bbq_hnsw_field.0: "ElasticSearch is an open source, distributed, RESTful, search engine which is built on top of Lucene internally and enjoys all the features it provides." }