Add ability to set "max_analyzed_offset" implicitly to "index.highlight (#118895)

Add ability to set "max_analyzed_offet" implicitly to "index.highlight
.max_analyzed_offset", by setting it excplicitly to "-1".

Closes #112822
This commit is contained in:
Svilen Mihaylov 2025-01-07 11:19:07 -05:00 committed by GitHub
parent edfe2c5c6d
commit 93c349cc76
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
15 changed files with 182 additions and 29 deletions

View File

@ -262,9 +262,11 @@ max_analyzed_offset:: By default, the maximum number of characters
analyzed for a highlight request is bounded by the value defined in the
<<index-max-analyzed-offset, `index.highlight.max_analyzed_offset`>> setting,
and when the number of characters exceeds this limit an error is returned. If
this setting is set to a non-negative value, the highlighting stops at this defined
this setting is set to a positive value, the highlighting stops at this defined
maximum limit, and the rest of the text is not processed, thus not highlighted and
no error is returned. The <<max-analyzed-offset, `max_analyzed_offset`>> query setting
no error is returned. If it is specifically set to -1 then the value of
<<index-max-analyzed-offset, `index.highlight.max_analyzed_offset`>> is used instead.
For values < -1 or 0, an error is returned. The <<max-analyzed-offset, `max_analyzed_offset`>> query setting
does *not* override the <<index-max-analyzed-offset, `index.highlight.max_analyzed_offset`>>
which prevails when it's set to lower value than the query setting.

View File

@ -17,6 +17,7 @@ import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.Ann
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter;
import org.elasticsearch.lucene.search.uhighlight.QueryMaxAnalyzedOffset;
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext;
@ -52,7 +53,7 @@ public class AnnotatedTextHighlighter extends DefaultHighlighter {
}
@Override
protected Analyzer wrapAnalyzer(Analyzer analyzer, Integer maxAnalyzedOffset) {
protected Analyzer wrapAnalyzer(Analyzer analyzer, QueryMaxAnalyzedOffset maxAnalyzedOffset) {
return new AnnotatedHighlighterAnalyzer(super.wrapAnalyzer(analyzer, maxAnalyzedOffset));
}

View File

@ -39,6 +39,7 @@ import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.Ann
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText;
import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotationAnalyzerWrapper;
import org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter;
import org.elasticsearch.lucene.search.uhighlight.QueryMaxAnalyzedOffset;
import org.elasticsearch.lucene.search.uhighlight.Snippet;
import org.elasticsearch.search.fetch.subphase.highlight.LimitTokenOffsetAnalyzer;
import org.elasticsearch.test.ESTestCase;
@ -85,7 +86,7 @@ public class AnnotatedTextHighlighterTests extends ESTestCase {
int noMatchSize,
String[] expectedPassages,
int maxAnalyzedOffset,
Integer queryMaxAnalyzedOffset
Integer queryMaxAnalyzedOffsetIn
) throws Exception {
try (Directory dir = newDirectory()) {
@ -116,8 +117,9 @@ public class AnnotatedTextHighlighterTests extends ESTestCase {
for (int i = 0; i < markedUpInputs.length; i++) {
annotations[i] = AnnotatedText.parse(markedUpInputs[i]);
}
QueryMaxAnalyzedOffset queryMaxAnalyzedOffset = QueryMaxAnalyzedOffset.create(queryMaxAnalyzedOffsetIn, maxAnalyzedOffset);
if (queryMaxAnalyzedOffset != null) {
wrapperAnalyzer = new LimitTokenOffsetAnalyzer(wrapperAnalyzer, queryMaxAnalyzedOffset);
wrapperAnalyzer = new LimitTokenOffsetAnalyzer(wrapperAnalyzer, queryMaxAnalyzedOffset.getNotNull());
}
AnnotatedHighlighterAnalyzer hiliteAnalyzer = new AnnotatedHighlighterAnalyzer(wrapperAnalyzer);
hiliteAnalyzer.setAnnotations(annotations);
@ -311,6 +313,19 @@ public class AnnotatedTextHighlighterTests extends ESTestCase {
e.getMessage()
);
// Same as before, but force using index maxOffset (20) as queryMaxOffset by passing -1.
assertHighlightOneDoc(
"text",
new String[] { "[Long Text exceeds](Long+Text+exceeds) MAX analyzed offset)" },
query,
Locale.ROOT,
breakIterator,
0,
new String[] { "Long Text [exceeds](_hit_term=exceeds) MAX analyzed offset)" },
20,
-1
);
assertHighlightOneDoc(
"text",
new String[] { "[Long Text Exceeds](Long+Text+Exceeds) MAX analyzed offset [Long Text Exceeds](Long+Text+Exceeds)" },

View File

@ -60,4 +60,5 @@ tasks.named("yamlRestCompatTestTransform").configure ({ task ->
task.skipTest("cat.aliases/10_basic/Deprecated local parameter", "CAT APIs not covered by compatibility policy")
task.skipTest("cat.shards/10_basic/Help", "sync_id is removed in 9.0")
task.skipTest("search/500_date_range/from, to, include_lower, include_upper deprecated", "deprecated parameters are removed in 9.0")
task.skipTest("search.highlight/30_max_analyzed_offset/Plain highlighter with max_analyzed_offset < 0 should FAIL", "semantics of test has changed")
})

View File

@ -115,12 +115,70 @@ setup:
- match: {hits.hits.0.highlight.field2.0: "The quick brown <em>fox</em> went to the forest and saw another fox."}
---
"Plain highlighter with max_analyzed_offset < 0 should FAIL":
"Plain highlighter on a field WITH OFFSETS exceeding index.highlight.max_analyzed_offset with max_analyzed_offset=0 should FAIL":
- requires:
test_runner_features: [capabilities]
capabilities:
- method: GET
path: /_search
capabilities: [ highlight_max_analyzed_offset_default ]
reason: Behavior of max_analyzed_offset query param changed in 8.18.
- do:
catch: bad_request
search:
rest_total_hits_as_int: true
index: test1
body: {"query" : {"match" : {"field2" : "fox"}}, "highlight" : {"type" : "plain", "fields" : {"field2" : {}}, "max_analyzed_offset": 0}}
- match: { status: 400 }
- match: { error.root_cause.0.type: "x_content_parse_exception" }
- match: { error.caused_by.type: "illegal_argument_exception" }
- match: { error.caused_by.reason: "[max_analyzed_offset] must be a positive integer, or -1" }
---
"Plain highlighter on a field WITH OFFSETS exceeding index.highlight.max_analyzed_offset with max_analyzed_offset=1 should SUCCEED":
- requires:
cluster_features: ["gte_v7.12.0"]
reason: max_analyzed_offset query param added in 7.12.0
- do:
search:
rest_total_hits_as_int: true
index: test1
body: {"query" : {"match" : {"field2" : "fox"}}, "highlight" : {"type" : "plain", "fields" : {"field2" : {}}, "max_analyzed_offset": 1}}
- match: { hits.hits.0.highlight: null }
---
"Plain highlighter with max_analyzed_offset = -1 default to index analyze offset should SUCCEED":
- requires:
test_runner_features: [capabilities]
capabilities:
- method: GET
path: /_search
capabilities: [ highlight_max_analyzed_offset_default ]
reason: Behavior of max_analyzed_offset query param changed in 8.18.
- do:
search:
rest_total_hits_as_int: true
index: test1
body: {"query" : {"match" : {"field2" : "fox"}}, "highlight" : {"type" : "plain", "fields" : {"field2" : {}}, "max_analyzed_offset": -1}}
- match: {hits.hits.0.highlight.field2.0: "The quick brown <em>fox</em> went to the forest and saw another fox."}
---
"Plain highlighter with max_analyzed_offset < -1 should FAIL":
- requires:
test_runner_features: [capabilities]
capabilities:
- method: GET
path: /_search
capabilities: [ highlight_max_analyzed_offset_default ]
reason: Behavior of max_analyzed_offset query param changed in 8.18.
- do:
catch: bad_request
search:
@ -130,4 +188,4 @@ setup:
- match: { status: 400 }
- match: { error.root_cause.0.type: "x_content_parse_exception" }
- match: { error.caused_by.type: "illegal_argument_exception" }
- match: { error.caused_by.reason: "[max_analyzed_offset] must be a positive integer" }
- match: { error.caused_by.reason: "[max_analyzed_offset] must be a positive integer, or -1" }

View File

@ -2632,6 +2632,41 @@ public class HighlighterSearchIT extends ESIntegTestCase {
});
}
public void testMaxQueryOffsetDefault() throws Exception {
assertAcked(
prepareCreate("test").setMapping(type1PostingsffsetsMapping())
.setSettings(Settings.builder().put("index.highlight.max_analyzed_offset", "10").build())
);
ensureGreen();
prepareIndex("test").setSource(
"field1",
new String[] {
"This sentence contains one match, not that short. This sentence contains zero sentence matches. "
+ "This one contains no matches.",
"This is the second value's first sentence. This one contains no matches. "
+ "This sentence contains three sentence occurrences (sentence).",
"One sentence match here and scored lower since the text is quite long, not that appealing. "
+ "This one contains no matches." }
).get();
refresh();
// Specific for this test: by passing "-1" as "maxAnalyzedOffset", the index highlight setting above will be used.
SearchSourceBuilder source = searchSource().query(termQuery("field1", "sentence"))
.highlighter(highlight().field("field1").order("score").maxAnalyzedOffset(-1));
assertResponse(client().search(new SearchRequest("test").source(source)), response -> {
Map<String, HighlightField> highlightFieldMap = response.getHits().getAt(0).getHighlightFields();
assertThat(highlightFieldMap.size(), equalTo(1));
HighlightField field1 = highlightFieldMap.get("field1");
assertThat(field1.fragments().length, equalTo(1));
assertThat(
field1.fragments()[0].string(),
equalTo("This <em>sentence</em> contains one match, not that short. This sentence contains zero sentence matches.")
);
});
}
public void testPostingsHighlighterEscapeHtml() throws Exception {
assertAcked(prepareCreate("test").setMapping("title", "type=text," + randomStoreField() + "index_options=offsets"));

View File

@ -34,7 +34,7 @@ class CustomFieldHighlighter extends FieldHighlighter {
private final Locale breakIteratorLocale;
private final int noMatchSize;
private String fieldValue;
private final Integer queryMaxAnalyzedOffset;
private final QueryMaxAnalyzedOffset queryMaxAnalyzedOffset;
CustomFieldHighlighter(
String field,
@ -47,7 +47,7 @@ class CustomFieldHighlighter extends FieldHighlighter {
PassageFormatter passageFormatter,
Comparator<Passage> passageSortComparator,
int noMatchSize,
Integer queryMaxAnalyzedOffset
QueryMaxAnalyzedOffset queryMaxAnalyzedOffset
) {
super(
field,
@ -113,7 +113,7 @@ class CustomFieldHighlighter extends FieldHighlighter {
@Override
protected Passage[] highlightOffsetsEnums(OffsetsEnum off) throws IOException {
if (queryMaxAnalyzedOffset != null) {
off = new LimitedOffsetsEnum(off, queryMaxAnalyzedOffset);
off = new LimitedOffsetsEnum(off, queryMaxAnalyzedOffset.getNotNull());
}
return super.highlightOffsetsEnums(off);
}

View File

@ -66,7 +66,7 @@ public final class CustomUnifiedHighlighter extends UnifiedHighlighter {
private final int noMatchSize;
private final CustomFieldHighlighter fieldHighlighter;
private final int maxAnalyzedOffset;
private final Integer queryMaxAnalyzedOffset;
private final QueryMaxAnalyzedOffset queryMaxAnalyzedOffset;
/**
* Creates a new instance of {@link CustomUnifiedHighlighter}
@ -94,7 +94,7 @@ public final class CustomUnifiedHighlighter extends UnifiedHighlighter {
int noMatchSize,
int maxPassages,
int maxAnalyzedOffset,
Integer queryMaxAnalyzedOffset,
QueryMaxAnalyzedOffset queryMaxAnalyzedOffset,
boolean requireFieldMatch,
boolean weightMatchesEnabled
) {
@ -125,9 +125,9 @@ public final class CustomUnifiedHighlighter extends UnifiedHighlighter {
return null;
}
int fieldValueLength = fieldValue.length();
if (((queryMaxAnalyzedOffset == null || queryMaxAnalyzedOffset > maxAnalyzedOffset)
if ((queryMaxAnalyzedOffset == null || queryMaxAnalyzedOffset.getNotNull() > maxAnalyzedOffset)
&& (getOffsetSource(field) == OffsetSource.ANALYSIS)
&& (fieldValueLength > maxAnalyzedOffset))) {
&& (fieldValueLength > maxAnalyzedOffset)) {
throw new IllegalArgumentException(
"The length ["
+ fieldValueLength

View File

@ -0,0 +1,30 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.lucene.search.uhighlight;
public class QueryMaxAnalyzedOffset {
private final int queryMaxAnalyzedOffset;
private QueryMaxAnalyzedOffset(final int queryMaxAnalyzedOffset) {
// If we have a negative value, grab value for the actual maximum from the index.
this.queryMaxAnalyzedOffset = queryMaxAnalyzedOffset;
}
public static QueryMaxAnalyzedOffset create(final Integer queryMaxAnalyzedOffset, final int indexMaxAnalyzedOffset) {
if (queryMaxAnalyzedOffset == null) {
return null;
}
return new QueryMaxAnalyzedOffset(queryMaxAnalyzedOffset < 0 ? indexMaxAnalyzedOffset : queryMaxAnalyzedOffset);
}
public int getNotNull() {
return queryMaxAnalyzedOffset;
}
}

View File

@ -42,6 +42,8 @@ public final class SearchCapabilities {
private static final String OPTIMIZED_SCALAR_QUANTIZATION_BBQ = "optimized_scalar_quantization_bbq";
private static final String KNN_QUANTIZED_VECTOR_RESCORE = "knn_quantized_vector_rescore";
private static final String HIGHLIGHT_MAX_ANALYZED_OFFSET_DEFAULT = "highlight_max_analyzed_offset_default";
public static final Set<String> CAPABILITIES;
static {
HashSet<String> capabilities = new HashSet<>();
@ -58,6 +60,7 @@ public final class SearchCapabilities {
if (Build.current().isSnapshot()) {
capabilities.add(KQL_QUERY_SUPPORTED);
}
capabilities.add(HIGHLIGHT_MAX_ANALYZED_OFFSET_DEFAULT);
CAPABILITIES = Set.copyOf(capabilities);
}
}

View File

@ -561,13 +561,12 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
}
/**
* Set to a non-negative value which represents the max offset used to analyze
* the field thus avoiding exceptions if the field exceeds this limit.
* "maxAnalyzedOffset" might be non-negative int, null (unknown), or a negative int (defaulting to index analyzed offset).
*/
@SuppressWarnings("unchecked")
public HB maxAnalyzedOffset(Integer maxAnalyzedOffset) {
if (maxAnalyzedOffset != null && maxAnalyzedOffset <= 0) {
throw new IllegalArgumentException("[" + MAX_ANALYZED_OFFSET_FIELD + "] must be a positive integer");
if (maxAnalyzedOffset != null && (maxAnalyzedOffset < -1 || maxAnalyzedOffset == 0)) {
throw new IllegalArgumentException("[" + MAX_ANALYZED_OFFSET_FIELD + "] must be a positive integer, or -1");
}
this.maxAnalyzedOffset = maxAnalyzedOffset;
return (HB) this;

View File

@ -31,6 +31,7 @@ import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.lucene.search.uhighlight.BoundedBreakIteratorScanner;
import org.elasticsearch.lucene.search.uhighlight.CustomPassageFormatter;
import org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter;
import org.elasticsearch.lucene.search.uhighlight.QueryMaxAnalyzedOffset;
import org.elasticsearch.lucene.search.uhighlight.Snippet;
import org.elasticsearch.search.fetch.FetchContext;
import org.elasticsearch.search.fetch.FetchSubPhase;
@ -121,7 +122,10 @@ public class DefaultHighlighter implements Highlighter {
int maxAnalyzedOffset = indexSettings.getHighlightMaxAnalyzedOffset();
boolean weightMatchesEnabled = indexSettings.isWeightMatchesEnabled();
int numberOfFragments = fieldContext.field.fieldOptions().numberOfFragments();
Integer queryMaxAnalyzedOffset = fieldContext.field.fieldOptions().maxAnalyzedOffset();
QueryMaxAnalyzedOffset queryMaxAnalyzedOffset = QueryMaxAnalyzedOffset.create(
fieldContext.field.fieldOptions().maxAnalyzedOffset(),
maxAnalyzedOffset
);
Analyzer analyzer = wrapAnalyzer(
fieldContext.context.getSearchExecutionContext().getIndexAnalyzer(f -> Lucene.KEYWORD_ANALYZER),
queryMaxAnalyzedOffset
@ -171,7 +175,7 @@ public class DefaultHighlighter implements Highlighter {
fieldContext.field.fieldOptions().noMatchSize(),
highlighterNumberOfFragments,
maxAnalyzedOffset,
fieldContext.field.fieldOptions().maxAnalyzedOffset(),
queryMaxAnalyzedOffset,
fieldContext.field.fieldOptions().requireFieldMatch(),
weightMatchesEnabled
);
@ -186,9 +190,9 @@ public class DefaultHighlighter implements Highlighter {
);
}
protected Analyzer wrapAnalyzer(Analyzer analyzer, Integer maxAnalyzedOffset) {
protected Analyzer wrapAnalyzer(Analyzer analyzer, QueryMaxAnalyzedOffset maxAnalyzedOffset) {
if (maxAnalyzedOffset != null) {
analyzer = new LimitTokenOffsetAnalyzer(analyzer, maxAnalyzedOffset);
analyzer = new LimitTokenOffsetAnalyzer(analyzer, maxAnalyzedOffset.getNotNull());
}
return analyzer;
}

View File

@ -27,6 +27,7 @@ import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.text.Text;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.lucene.search.uhighlight.QueryMaxAnalyzedOffset;
import org.elasticsearch.search.fetch.FetchContext;
import org.elasticsearch.search.fetch.FetchSubPhase;
@ -107,7 +108,10 @@ public class PlainHighlighter implements Highlighter {
ArrayList<OrderedTextFragment> fragsList = new ArrayList<>();
List<Object> textsToHighlight;
final int maxAnalyzedOffset = context.getSearchExecutionContext().getIndexSettings().getHighlightMaxAnalyzedOffset();
Integer queryMaxAnalyzedOffset = fieldContext.field.fieldOptions().maxAnalyzedOffset();
QueryMaxAnalyzedOffset queryMaxAnalyzedOffset = QueryMaxAnalyzedOffset.create(
fieldContext.field.fieldOptions().maxAnalyzedOffset(),
maxAnalyzedOffset
);
Analyzer analyzer = wrapAnalyzer(
context.getSearchExecutionContext().getIndexAnalyzer(f -> Lucene.KEYWORD_ANALYZER),
queryMaxAnalyzedOffset
@ -119,7 +123,8 @@ public class PlainHighlighter implements Highlighter {
for (Object textToHighlight : textsToHighlight) {
String text = convertFieldValue(fieldType, textToHighlight);
int textLength = text.length();
if ((queryMaxAnalyzedOffset == null || queryMaxAnalyzedOffset > maxAnalyzedOffset) && (textLength > maxAnalyzedOffset)) {
if ((queryMaxAnalyzedOffset == null || queryMaxAnalyzedOffset.getNotNull() > maxAnalyzedOffset)
&& (textLength > maxAnalyzedOffset)) {
throw new IllegalArgumentException(
"The length ["
+ textLength
@ -241,9 +246,9 @@ public class PlainHighlighter implements Highlighter {
}
}
private static Analyzer wrapAnalyzer(Analyzer analyzer, Integer maxAnalyzedOffset) {
private static Analyzer wrapAnalyzer(Analyzer analyzer, QueryMaxAnalyzedOffset maxAnalyzedOffset) {
if (maxAnalyzedOffset != null) {
return new LimitTokenOffsetAnalyzer(analyzer, maxAnalyzedOffset);
return new LimitTokenOffsetAnalyzer(analyzer, maxAnalyzedOffset.getNotNull());
}
return analyzer;
}

View File

@ -157,7 +157,7 @@ public class CustomUnifiedHighlighterTests extends ESTestCase {
noMatchSize,
expectedPassages.length,
maxAnalyzedOffset,
queryMaxAnalyzedOffset,
QueryMaxAnalyzedOffset.create(queryMaxAnalyzedOffset, maxAnalyzedOffset),
true,
true
);

View File

@ -577,10 +577,10 @@ public class HighlightBuilderTests extends ESTestCase {
public void testInvalidMaxAnalyzedOffset() throws IOException {
XContentParseException e = expectParseThrows(
XContentParseException.class,
"{ \"max_analyzed_offset\" : " + randomIntBetween(-100, 0) + "}"
"{ \"max_analyzed_offset\" : " + randomIntBetween(-100, -1) + "}"
);
assertThat(e.getMessage(), containsString("[highlight] failed to parse field [" + MAX_ANALYZED_OFFSET_FIELD.toString() + "]"));
assertThat(e.getCause().getMessage(), containsString("[max_analyzed_offset] must be a positive integer"));
assertThat(e.getCause().getMessage(), containsString("[max_analyzed_offset] must be a positive integer, or -1"));
}
/**