Use FallbackSyntheticSourceBlockLoader for text fields (#126237)
This commit is contained in:
parent
20eb59080c
commit
21ff72bef4
|
@ -0,0 +1,5 @@
|
|||
pr: 126237
|
||||
summary: Use `FallbackSyntheticSourceBlockLoader` for text fields
|
||||
area: Mapping
|
||||
type: enhancement
|
||||
issues: []
|
|
@ -74,6 +74,7 @@ import org.elasticsearch.script.field.TextDocValuesField;
|
|||
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
|
||||
import org.elasticsearch.xcontent.ToXContent;
|
||||
import org.elasticsearch.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.xcontent.XContentParser;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
@ -1016,10 +1017,53 @@ public final class TextFieldMapper extends FieldMapper {
|
|||
if (isStored()) {
|
||||
return new BlockStoredFieldsReader.BytesFromStringsBlockLoader(name());
|
||||
}
|
||||
|
||||
// _ignored_source field will only be present if text field is not stored
|
||||
// and there is no syntheticSourceDelegate
|
||||
if (isSyntheticSource && syntheticSourceDelegate == null) {
|
||||
return fallbackSyntheticSourceBlockLoader();
|
||||
}
|
||||
|
||||
SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name()));
|
||||
return new BlockSourceReader.BytesRefsBlockLoader(fetcher, blockReaderDisiLookup(blContext));
|
||||
}
|
||||
|
||||
FallbackSyntheticSourceBlockLoader fallbackSyntheticSourceBlockLoader() {
|
||||
var reader = new FallbackSyntheticSourceBlockLoader.SingleValueReader<BytesRef>(null) {
|
||||
@Override
|
||||
public void convertValue(Object value, List<BytesRef> accumulator) {
|
||||
if (value != null) {
|
||||
accumulator.add(new BytesRef(value.toString()));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void parseNonNullValue(XContentParser parser, List<BytesRef> accumulator) throws IOException {
|
||||
var text = parser.textOrNull();
|
||||
|
||||
if (text != null) {
|
||||
accumulator.add(new BytesRef(text));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeToBlock(List<BytesRef> values, BlockLoader.Builder blockBuilder) {
|
||||
var bytesRefBuilder = (BlockLoader.BytesRefBuilder) blockBuilder;
|
||||
|
||||
for (var value : values) {
|
||||
bytesRefBuilder.appendBytesRef(value);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
return new FallbackSyntheticSourceBlockLoader(reader, name()) {
|
||||
@Override
|
||||
public Builder builder(BlockFactory factory, int expectedCount) {
|
||||
return factory.bytesRefs(expectedCount);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Build an iterator of documents that have the field. This mirrors parseCreateField,
|
||||
* using whatever
|
||||
|
|
|
@ -25,9 +25,13 @@ public class KeywordFieldBlockLoaderTests extends BlockLoaderTestCase {
|
|||
super(FieldType.KEYWORD.toString(), params);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
protected Object expected(Map<String, Object> fieldMapping, Object value, TestContext testContext) {
|
||||
return expectedValue(fieldMapping, value, params, testContext);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
public static Object expectedValue(Map<String, Object> fieldMapping, Object value, Params params, TestContext testContext) {
|
||||
var nullValue = (String) fieldMapping.get("null_value");
|
||||
|
||||
var ignoreAbove = fieldMapping.get("ignore_above") == null
|
||||
|
@ -45,7 +49,7 @@ public class KeywordFieldBlockLoaderTests extends BlockLoaderTestCase {
|
|||
Function<Stream<String>, Stream<BytesRef>> convertValues = s -> s.map(v -> convert(v, nullValue, ignoreAbove))
|
||||
.filter(Objects::nonNull);
|
||||
|
||||
boolean hasDocValues = hasDocValues(fieldMapping, false);
|
||||
boolean hasDocValues = hasDocValues(fieldMapping, true);
|
||||
boolean useDocValues = params.preference() == MappedFieldType.FieldExtractPreference.NONE
|
||||
|| params.preference() == MappedFieldType.FieldExtractPreference.DOC_VALUES
|
||||
|| params.syntheticSource();
|
||||
|
@ -63,7 +67,7 @@ public class KeywordFieldBlockLoaderTests extends BlockLoaderTestCase {
|
|||
return maybeFoldList(resultList);
|
||||
}
|
||||
|
||||
private BytesRef convert(String value, String nullValue, int ignoreAbove) {
|
||||
private static BytesRef convert(String value, String nullValue, int ignoreAbove) {
|
||||
if (value == null) {
|
||||
if (nullValue != null) {
|
||||
value = nullValue;
|
||||
|
|
|
@ -0,0 +1,135 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.mapper.blockloader;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.index.mapper.BlockLoaderTestCase;
|
||||
import org.elasticsearch.logsdb.datageneration.FieldType;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class TextFieldBlockLoaderTests extends BlockLoaderTestCase {
|
||||
public TextFieldBlockLoaderTests(Params params) {
|
||||
super(FieldType.TEXT.toString(), params);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
@Override
|
||||
protected Object expected(Map<String, Object> fieldMapping, Object value, TestContext testContext) {
|
||||
if (fieldMapping.getOrDefault("store", false).equals(true)) {
|
||||
return valuesInSourceOrder(value);
|
||||
}
|
||||
|
||||
var fields = (Map<String, Object>) fieldMapping.get("fields");
|
||||
if (fields != null) {
|
||||
var keywordMultiFieldMapping = (Map<String, Object>) fields.get("kwd");
|
||||
boolean docValues = hasDocValues(keywordMultiFieldMapping, true);
|
||||
boolean index = keywordMultiFieldMapping.getOrDefault("index", true).equals(true);
|
||||
boolean store = keywordMultiFieldMapping.getOrDefault("store", false).equals(true);
|
||||
Object ignoreAbove = keywordMultiFieldMapping.get("ignore_above");
|
||||
|
||||
// See TextFieldMapper.SyntheticSourceHelper#usingSyntheticSourceDelegate
|
||||
// and TextFieldMapper#canUseSyntheticSourceDelegateForLoading().
|
||||
boolean usingSyntheticSourceDelegate = docValues || store;
|
||||
boolean canUseSyntheticSourceDelegateForLoading = usingSyntheticSourceDelegate && ignoreAbove == null && (index || store);
|
||||
if (canUseSyntheticSourceDelegateForLoading) {
|
||||
return KeywordFieldBlockLoaderTests.expectedValue(keywordMultiFieldMapping, value, params, testContext);
|
||||
}
|
||||
|
||||
// Even if multi-field is not eligible for loading it can still be used to produce synthetic source
|
||||
// and then we load from the synthetic source.
|
||||
// Synthetic source is actually different from keyword field block loader results
|
||||
// because synthetic source includes values exceeding ignore_above and block loader doesn't.
|
||||
// TODO ideally this logic should be in some kind of KeywordFieldSyntheticSourceTest that uses same infra as
|
||||
// KeywordFieldBlockLoaderTest
|
||||
// It is here since KeywordFieldBlockLoaderTest does not really need it
|
||||
if (params.syntheticSource() && testContext.forceFallbackSyntheticSource() == false && usingSyntheticSourceDelegate) {
|
||||
var nullValue = (String) keywordMultiFieldMapping.get("null_value");
|
||||
|
||||
// Due to how TextFieldMapper#blockReaderDisiLookup works this is complicated.
|
||||
// If we are using lookupMatchingAll() then we'll see all docs, generate synthetic source using syntheticSourceDelegate,
|
||||
// parse it and see null_value inside.
|
||||
// But if we are using lookupFromNorms() we will skip the document (since the text field itself does not exist).
|
||||
// Same goes for lookupFromFieldNames().
|
||||
boolean textFieldIndexed = (boolean) fieldMapping.getOrDefault("index", true);
|
||||
|
||||
if (value == null) {
|
||||
if (textFieldIndexed == false
|
||||
&& nullValue != null
|
||||
&& (ignoreAbove == null || nullValue.length() <= (int) ignoreAbove)) {
|
||||
return new BytesRef(nullValue);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
if (value instanceof String s) {
|
||||
return new BytesRef(s);
|
||||
}
|
||||
|
||||
var values = (List<String>) value;
|
||||
|
||||
// See note above about TextFieldMapper#blockReaderDisiLookup.
|
||||
if (textFieldIndexed && values.stream().allMatch(Objects::isNull)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
var indexed = values.stream()
|
||||
.map(s -> s == null ? nullValue : s)
|
||||
.filter(Objects::nonNull)
|
||||
.filter(s -> ignoreAbove == null || s.length() <= (int) ignoreAbove)
|
||||
.map(BytesRef::new)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
if (store == false) {
|
||||
// using doc_values for synthetic source
|
||||
indexed = new ArrayList<>(new HashSet<>(indexed));
|
||||
indexed.sort(BytesRef::compareTo);
|
||||
}
|
||||
|
||||
// ignored values always come last
|
||||
List<BytesRef> ignored = ignoreAbove == null
|
||||
? List.of()
|
||||
: values.stream()
|
||||
.map(s -> s == null ? nullValue : s)
|
||||
.filter(Objects::nonNull)
|
||||
.filter(s -> s.length() > (int) ignoreAbove)
|
||||
.map(BytesRef::new)
|
||||
.toList();
|
||||
|
||||
indexed.addAll(ignored);
|
||||
|
||||
return maybeFoldList(indexed);
|
||||
}
|
||||
}
|
||||
|
||||
// Loading from _ignored_source or stored _source
|
||||
return valuesInSourceOrder(value);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked")
|
||||
private Object valuesInSourceOrder(Object value) {
|
||||
if (value == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (value instanceof String s) {
|
||||
return new BytesRef(s);
|
||||
}
|
||||
|
||||
var resultList = ((List<String>) value).stream().filter(Objects::nonNull).map(BytesRef::new).toList();
|
||||
return maybeFoldList(resultList);
|
||||
}
|
||||
}
|
|
@ -23,6 +23,7 @@ import org.elasticsearch.logsdb.datageneration.fields.leaf.KeywordFieldDataGener
|
|||
import org.elasticsearch.logsdb.datageneration.fields.leaf.LongFieldDataGenerator;
|
||||
import org.elasticsearch.logsdb.datageneration.fields.leaf.ScaledFloatFieldDataGenerator;
|
||||
import org.elasticsearch.logsdb.datageneration.fields.leaf.ShortFieldDataGenerator;
|
||||
import org.elasticsearch.logsdb.datageneration.fields.leaf.TextFieldDataGenerator;
|
||||
import org.elasticsearch.logsdb.datageneration.fields.leaf.UnsignedLongFieldDataGenerator;
|
||||
|
||||
/**
|
||||
|
@ -42,7 +43,8 @@ public enum FieldType {
|
|||
COUNTED_KEYWORD("counted_keyword"),
|
||||
BOOLEAN("boolean"),
|
||||
DATE("date"),
|
||||
GEO_POINT("geo_point");
|
||||
GEO_POINT("geo_point"),
|
||||
TEXT("text");
|
||||
|
||||
private final String name;
|
||||
|
||||
|
@ -66,6 +68,7 @@ public enum FieldType {
|
|||
case BOOLEAN -> new BooleanFieldDataGenerator(dataSource);
|
||||
case DATE -> new DateFieldDataGenerator(dataSource);
|
||||
case GEO_POINT -> new GeoPointFieldDataGenerator(dataSource);
|
||||
case TEXT -> new TextFieldDataGenerator(dataSource);
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -85,6 +88,7 @@ public enum FieldType {
|
|||
case "boolean" -> FieldType.BOOLEAN;
|
||||
case "date" -> FieldType.DATE;
|
||||
case "geo_point" -> FieldType.GEO_POINT;
|
||||
case "text" -> FieldType.TEXT;
|
||||
default -> null;
|
||||
};
|
||||
}
|
||||
|
|
|
@ -35,10 +35,7 @@ public class DefaultMappingParametersHandler implements DataSourceHandler {
|
|||
return null;
|
||||
}
|
||||
|
||||
var map = new HashMap<String, Object>();
|
||||
map.put("store", ESTestCase.randomBoolean());
|
||||
map.put("index", ESTestCase.randomBoolean());
|
||||
map.put("doc_values", ESTestCase.randomBoolean());
|
||||
var map = commonMappingParameters();
|
||||
if (ESTestCase.randomBoolean()) {
|
||||
map.put(Mapper.SYNTHETIC_SOURCE_KEEP_PARAM, ESTestCase.randomFrom("none", "arrays", "all"));
|
||||
}
|
||||
|
@ -51,6 +48,7 @@ public class DefaultMappingParametersHandler implements DataSourceHandler {
|
|||
case BOOLEAN -> booleanMapping(map);
|
||||
case DATE -> dateMapping(map);
|
||||
case GEO_POINT -> geoPointMapping(map);
|
||||
case TEXT -> textMapping(request, new HashMap<>());
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -190,6 +188,35 @@ public class DefaultMappingParametersHandler implements DataSourceHandler {
|
|||
};
|
||||
}
|
||||
|
||||
private Supplier<Map<String, Object>> textMapping(
|
||||
DataSourceRequest.LeafMappingParametersGenerator request,
|
||||
Map<String, Object> injected
|
||||
) {
|
||||
return () -> {
|
||||
injected.put("store", ESTestCase.randomBoolean());
|
||||
injected.put("index", ESTestCase.randomBoolean());
|
||||
|
||||
if (ESTestCase.randomDouble() <= 0.1) {
|
||||
var keywordMultiFieldMapping = keywordMapping(request, commonMappingParameters()).get();
|
||||
keywordMultiFieldMapping.put("type", "keyword");
|
||||
keywordMultiFieldMapping.remove("copy_to");
|
||||
|
||||
injected.put("fields", Map.of("kwd", keywordMultiFieldMapping));
|
||||
|
||||
}
|
||||
|
||||
return injected;
|
||||
};
|
||||
}
|
||||
|
||||
private static HashMap<String, Object> commonMappingParameters() {
|
||||
var map = new HashMap<String, Object>();
|
||||
map.put("store", ESTestCase.randomBoolean());
|
||||
map.put("index", ESTestCase.randomBoolean());
|
||||
map.put("doc_values", ESTestCase.randomBoolean());
|
||||
return map;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DataSourceResponse.ObjectMappingParametersGenerator handle(DataSourceRequest.ObjectMappingParametersGenerator request) {
|
||||
if (request.isNested()) {
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
|
||||
* or more contributor license agreements. Licensed under the "Elastic License
|
||||
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
|
||||
* Public License v 1"; you may not use this file except in compliance with, at
|
||||
* your election, the "Elastic License 2.0", the "GNU Affero General Public
|
||||
* License v3.0 only", or the "Server Side Public License, v 1".
|
||||
*/
|
||||
|
||||
package org.elasticsearch.logsdb.datageneration.fields.leaf;
|
||||
|
||||
import org.elasticsearch.logsdb.datageneration.FieldDataGenerator;
|
||||
import org.elasticsearch.logsdb.datageneration.datasource.DataSource;
|
||||
import org.elasticsearch.logsdb.datageneration.datasource.DataSourceRequest;
|
||||
|
||||
import java.util.Map;
|
||||
import java.util.function.Supplier;
|
||||
|
||||
public class TextFieldDataGenerator implements FieldDataGenerator {
|
||||
private final Supplier<Object> valueGenerator;
|
||||
|
||||
public TextFieldDataGenerator(DataSource dataSource) {
|
||||
var strings = dataSource.get(new DataSourceRequest.StringGenerator());
|
||||
var nulls = dataSource.get(new DataSourceRequest.NullWrapper());
|
||||
var arrays = dataSource.get(new DataSourceRequest.ArrayWrapper());
|
||||
|
||||
this.valueGenerator = arrays.wrapper().compose(nulls.wrapper()).apply(() -> strings.generator().get());
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object generateValue(Map<String, Object> fieldMapping) {
|
||||
return valueGenerator.get();
|
||||
}
|
||||
}
|
|
@ -61,6 +61,7 @@ interface FieldSpecificMatcher {
|
|||
put("geo_shape", new ExactMatcher("geo_shape", actualMappings, actualSettings, expectedMappings, expectedSettings));
|
||||
put("shape", new ExactMatcher("shape", actualMappings, actualSettings, expectedMappings, expectedSettings));
|
||||
put("geo_point", new GeoPointMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings));
|
||||
put("text", new TextMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings));
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -598,6 +599,73 @@ interface FieldSpecificMatcher {
|
|||
}
|
||||
}
|
||||
|
||||
class TextMatcher implements FieldSpecificMatcher {
|
||||
private final XContentBuilder actualMappings;
|
||||
private final Settings.Builder actualSettings;
|
||||
private final XContentBuilder expectedMappings;
|
||||
private final Settings.Builder expectedSettings;
|
||||
|
||||
TextMatcher(
|
||||
XContentBuilder actualMappings,
|
||||
Settings.Builder actualSettings,
|
||||
XContentBuilder expectedMappings,
|
||||
Settings.Builder expectedSettings
|
||||
) {
|
||||
this.actualMappings = actualMappings;
|
||||
this.actualSettings = actualSettings;
|
||||
this.expectedMappings = expectedMappings;
|
||||
this.expectedSettings = expectedSettings;
|
||||
}
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public MatchResult match(
|
||||
List<Object> actual,
|
||||
List<Object> expected,
|
||||
Map<String, Object> actualMapping,
|
||||
Map<String, Object> expectedMapping
|
||||
) {
|
||||
var expectedNormalized = normalize(expected);
|
||||
var actualNormalized = normalize(actual);
|
||||
|
||||
// Match simply as text first.
|
||||
if (actualNormalized.equals(expectedNormalized)) {
|
||||
return MatchResult.match();
|
||||
}
|
||||
|
||||
// In some cases synthetic source for text fields is synthesized using the keyword multi field.
|
||||
// So in this case it's appropriate to match it using keyword matching logic (mainly to cover `null_value`).
|
||||
var multiFields = (Map<String, Object>) getMappingParameter("fields", actualMapping, expectedMapping);
|
||||
if (multiFields != null) {
|
||||
var keywordMatcher = new KeywordMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings);
|
||||
|
||||
var keywordFieldMapping = (Map<String, Object>) multiFields.get("kwd");
|
||||
var keywordMatchResult = keywordMatcher.match(actual, expected, keywordFieldMapping, keywordFieldMapping);
|
||||
if (keywordMatchResult.isMatch()) {
|
||||
return MatchResult.match();
|
||||
}
|
||||
}
|
||||
|
||||
return MatchResult.noMatch(
|
||||
formatErrorMessage(
|
||||
actualMappings,
|
||||
actualSettings,
|
||||
expectedMappings,
|
||||
expectedSettings,
|
||||
"Values of type [text] don't match, " + prettyPrintCollections(actual, expected)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
private Set<Object> normalize(List<Object> values) {
|
||||
if (values == null) {
|
||||
return Set.of();
|
||||
}
|
||||
|
||||
return values.stream().filter(Objects::nonNull).collect(Collectors.toSet());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generic matcher that supports common matching logic like null values.
|
||||
*/
|
||||
|
|
|
@ -46,7 +46,7 @@ class MappingTransforms {
|
|||
if (entry.getKey().equals("_doc") || entry.getKey().equals("properties")) {
|
||||
descend(pathFromRoot, (Map<String, Object>) entry.getValue(), flattened);
|
||||
} else {
|
||||
if (entry.getValue() instanceof Map<?, ?> map) {
|
||||
if (entry.getKey().equals("fields") == false && entry.getValue() instanceof Map<?, ?> map) {
|
||||
var pathToField = pathFromRoot == null ? entry.getKey() : pathFromRoot + "." + entry.getKey();
|
||||
|
||||
// Descending to subobject, we need to remember parent mapping
|
||||
|
|
Loading…
Reference in New Issue