Store arrays offsets for keyword fields natively with synthetic source (#113757)

The keyword doc values field gets an extra sorted doc values field, that encodes the order of how array values were specified at index time. This also captures duplicate values. This is stored in an offset to ordinal array that gets zigzag vint encoded into a sorted doc values field.

For example, in case of the following string array for a keyword field: ["c", "b", "a", "c"].
Sorted set doc values: ["a", "b", "c"] with ordinals: 0, 1 and 2. The offset array will be: [2, 1, 0, 2]

Null values are also supported. For example ["c", "b", null, "c"] results into sorted set doc values: ["b", "c"] with ordinals: 0 and 1. The offset array will be: [1, 0, -1, 1]

Empty arrays are also supported by encoding a zigzag vint array of zero elements.

Limitations:

currently only doc values based array support for keyword field mapper.
multi level leaf arrays are flattened. For example: [[b], [c]] -> [b, c]
arrays are always synthesized as one type. In case of keyword field, [1, 2] gets synthesized as ["1", "2"].
These limitations can be addressed, but some require more complexity and or additional storage.

With this PR, keyword field array will no longer be stored in ignored source, but array offsets are kept track of in an adjacent sorted doc value field. This only applies if index.mapping.synthetic_source_keep is set to arrays (default for logsdb).
This commit is contained in:
Martijn van Groningen 2025-02-20 09:20:49 +01:00 committed by GitHub
parent 18df4d0c89
commit 43665f0a35
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
19 changed files with 1106 additions and 33 deletions

View File

@ -0,0 +1,5 @@
pr: 113757
summary: Store arrays offsets for keyword fields natively with synthetic source instead of falling back to ignored source.
area: Mapping
type: enhancement
issues: []

View File

@ -83,4 +83,8 @@ tasks.named("yamlRestCompatTestTransform").configure ({ task ->
"node_version warning is removed in 9.0"
)
task.skipTest("tsdb/20_mapping/nested fields", "nested field support in tsdb indices is now supported")
task.skipTest("logsdb/10_settings/routing path allowed in logs mode with routing on sort fields", "Unknown feature routing.logsb_route_on_sort_fields")
task.skipTest("indices.create/21_synthetic_source_stored/index param - field ordering", "Synthetic source keep arrays now stores leaf arrays natively")
task.skipTest("indices.create/21_synthetic_source_stored/field param - keep nested array", "Synthetic source keep arrays now stores leaf arrays natively")
task.skipTest("indices.create/21_synthetic_source_stored/field param - keep root array", "Synthetic source keep arrays now stores leaf arrays natively")
})

View File

@ -922,7 +922,7 @@ subobjects auto:
- match: { hits.hits.0._source.foo: 10 }
- match: { hits.hits.0._source.foo\.bar: 100 }
- match: { hits.hits.0._source.regular.span.id: "1" }
- match: { hits.hits.0._source.regular.trace.id: [ "a", "b" ] }
- match: { hits.hits.0._source.regular.trace.id: ["a", "b" ] }
- match: { hits.hits.1._source.id: 2 }
- match: { hits.hits.1._source.foo: 20 }
- match: { hits.hits.1._source.foo\.bar: 200 }

View File

@ -1024,7 +1024,7 @@ index param - field ordering:
index: test
- length: { hits.hits.0._source: 4 }
- match: { hits.hits.0._source: { "a": "2", "b": [ { "bb": 100, "aa": 200 }, { "aa": 300, "bb": 400 } ], "c": [30, 20, 10], "d": [ { "bb": 10, "aa": 20 }, { "aa": 30, "bb": 40 } ] } }
- match: { hits.hits.0._source: { "a": "2", "b": [ { "bb": 100, "aa": 200 }, { "aa": 300, "bb": 400 } ], "c": ["30", "20", "10"], "d": [ { "bb": 10, "aa": 20 }, { "aa": 30, "bb": 40 } ] } }
---

View File

@ -148,6 +148,7 @@ public class IndexVersions {
public static final IndexVersion USE_SYNTHETIC_SOURCE_FOR_RECOVERY_BY_DEFAULT = def(9_010_00_0, Version.LUCENE_10_1_0);
public static final IndexVersion TIMESTAMP_DOC_VALUES_SPARSE_INDEX = def(9_011_0_00, Version.LUCENE_10_1_0);
public static final IndexVersion TIME_SERIES_ID_DOC_VALUES_SPARSE_INDEX = def(9_012_0_00, Version.LUCENE_10_1_0);
public static final IndexVersion SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_KEYWORD = def(9_013_0_00, Version.LUCENE_10_1_0);
/*
* STOP! READ THIS FIRST! No, really,
* ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _

View File

@ -154,6 +154,7 @@ public final class DocumentParser {
executeIndexTimeScripts(context);
context.processArrayOffsets(context);
for (MetadataFieldMapper metadataMapper : metadataFieldsMappers) {
metadataMapper.postParse(context);
}
@ -519,6 +520,7 @@ public final class DocumentParser {
private static void parseObject(final DocumentParserContext context, String currentFieldName) throws IOException {
assert currentFieldName != null;
context.setImmediateXContentParent(context.parser().currentToken());
Mapper objectMapper = context.getMapper(currentFieldName);
if (objectMapper != null) {
doParseObject(context, currentFieldName, objectMapper);
@ -611,6 +613,12 @@ public final class DocumentParser {
}
private static void parseArray(DocumentParserContext context, String lastFieldName) throws IOException {
// Record previous immediate parent, so that it can be reset after array has been parsed.
// This is for recording array offset with synthetic source. Only if the immediate parent is an array,
// then the offsets can be accounted accurately.
var prev = context.getImmediateXContentParent();
context.setImmediateXContentParent(context.parser().currentToken());
Mapper mapper = getLeafMapper(context, lastFieldName);
if (mapper != null) {
// There is a concrete mapper for this field already. Need to check if the mapper
@ -624,6 +632,8 @@ public final class DocumentParser {
} else {
parseArrayDynamic(context, lastFieldName);
}
// Reset previous immediate parent
context.setImmediateXContentParent(prev);
}
private static void parseArrayDynamic(DocumentParserContext context, String currentFieldName) throws IOException {
@ -688,11 +698,12 @@ public final class DocumentParser {
final String lastFieldName,
String arrayFieldName
) throws IOException {
boolean supportStoringArrayOffsets = mapper != null && mapper.supportStoringArrayOffsets();
String fullPath = context.path().pathAsText(arrayFieldName);
// Check if we need to record the array source. This only applies to synthetic source.
boolean canRemoveSingleLeafElement = false;
if (context.canAddIgnoredField()) {
if (context.canAddIgnoredField() && supportStoringArrayOffsets == false) {
Mapper.SourceKeepMode mode = Mapper.SourceKeepMode.NONE;
boolean objectWithFallbackSyntheticSource = false;
if (mapper instanceof ObjectMapper objectMapper) {
@ -736,6 +747,7 @@ public final class DocumentParser {
XContentParser parser = context.parser();
XContentParser.Token token;
XContentParser.Token previousToken = parser.currentToken();
int elements = 0;
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
if (token == XContentParser.Token.START_OBJECT) {
@ -754,6 +766,14 @@ public final class DocumentParser {
elements++;
parseValue(context, lastFieldName);
}
previousToken = token;
}
if (mapper != null
&& context.canAddIgnoredField()
&& mapper.supportStoringArrayOffsets()
&& previousToken == XContentParser.Token.START_ARRAY
&& context.isImmediateParentAnArray()) {
context.getOffSetContext().maybeRecordEmptyArray(mapper.getOffsetFieldName());
}
if (elements <= 1 && canRemoveSingleLeafElement) {
context.removeLastIgnoredField(fullPath);

View File

@ -91,6 +91,31 @@ public abstract class DocumentParserContext {
protected void addDoc(LuceneDocument doc) {
in.addDoc(doc);
}
@Override
public void processArrayOffsets(DocumentParserContext context) throws IOException {
in.processArrayOffsets(context);
}
@Override
public FieldArrayContext getOffSetContext() {
return in.getOffSetContext();
}
@Override
public void setImmediateXContentParent(XContentParser.Token token) {
in.setImmediateXContentParent(token);
}
@Override
public XContentParser.Token getImmediateXContentParent() {
return in.getImmediateXContentParent();
}
@Override
public boolean isImmediateParentAnArray() {
return in.isImmediateParentAnArray();
}
}
/**
@ -141,6 +166,8 @@ public abstract class DocumentParserContext {
private final SeqNoFieldMapper.SequenceIDFields seqID;
private final Set<String> fieldsAppliedFromTemplates;
private FieldArrayContext fieldArrayContext;
/**
* Fields that are copied from values of other fields via copy_to.
* This per-document state is needed since it is possible
@ -460,6 +487,33 @@ public abstract class DocumentParserContext {
return copyToFields.contains(name);
}
public void processArrayOffsets(DocumentParserContext context) throws IOException {
if (fieldArrayContext != null) {
fieldArrayContext.addToLuceneDocument(context);
}
}
public FieldArrayContext getOffSetContext() {
if (fieldArrayContext == null) {
fieldArrayContext = new FieldArrayContext();
}
return fieldArrayContext;
}
private XContentParser.Token lastSetToken;
public void setImmediateXContentParent(XContentParser.Token token) {
this.lastSetToken = token;
}
public XContentParser.Token getImmediateXContentParent() {
return lastSetToken;
}
public boolean isImmediateParentAnArray() {
return lastSetToken == XContentParser.Token.START_ARRAY;
}
/**
* Add a new mapper dynamically created while parsing.
*

View File

@ -0,0 +1,93 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.index.mapper;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.util.BitUtil;
import org.elasticsearch.common.io.stream.BytesStreamOutput;
import org.elasticsearch.common.io.stream.StreamInput;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
public class FieldArrayContext {
private final Map<String, Offsets> offsetsPerField = new HashMap<>();
void recordOffset(String field, String value) {
Offsets arrayOffsets = offsetsPerField.computeIfAbsent(field, k -> new Offsets());
int nextOffset = arrayOffsets.currentOffset++;
var offsets = arrayOffsets.valueToOffsets.computeIfAbsent(value, s -> new ArrayList<>(2));
offsets.add(nextOffset);
}
void recordNull(String field) {
Offsets arrayOffsets = offsetsPerField.computeIfAbsent(field, k -> new Offsets());
int nextOffset = arrayOffsets.currentOffset++;
arrayOffsets.nullValueOffsets.add(nextOffset);
}
void maybeRecordEmptyArray(String field) {
offsetsPerField.computeIfAbsent(field, k -> new Offsets());
}
void addToLuceneDocument(DocumentParserContext context) throws IOException {
for (var entry : offsetsPerField.entrySet()) {
var fieldName = entry.getKey();
var offset = entry.getValue();
int currentOrd = 0;
// This array allows to retain the original ordering of elements in leaf arrays and retain duplicates.
int[] offsetToOrd = new int[offset.currentOffset];
for (var offsetEntry : offset.valueToOffsets.entrySet()) {
for (var offsetAndLevel : offsetEntry.getValue()) {
offsetToOrd[offsetAndLevel] = currentOrd;
}
currentOrd++;
}
for (var nullOffset : offset.nullValueOffsets) {
offsetToOrd[nullOffset] = -1;
}
try (var streamOutput = new BytesStreamOutput()) {
// Could just use vint for array length, but this allows for decoding my_field: null as -1
streamOutput.writeVInt(BitUtil.zigZagEncode(offsetToOrd.length));
for (int ord : offsetToOrd) {
streamOutput.writeVInt(BitUtil.zigZagEncode(ord));
}
context.doc().add(new SortedDocValuesField(fieldName, streamOutput.bytes().toBytesRef()));
}
}
}
static int[] parseOffsetArray(StreamInput in) throws IOException {
int[] offsetToOrd = new int[BitUtil.zigZagDecode(in.readVInt())];
for (int i = 0; i < offsetToOrd.length; i++) {
offsetToOrd[i] = BitUtil.zigZagDecode(in.readVInt());
}
return offsetToOrd;
}
private static class Offsets {
int currentOffset;
// Need to use TreeMap here, so that we maintain the order in which each value (with offset) stored inserted,
// (which is in the same order the document gets parsed) so we store offsets in right order. This is the same
// order in what the values get stored in SortedSetDocValues.
final Map<String, List<Integer>> valueToOffsets = new TreeMap<>();
final List<Integer> nullValueOffsets = new ArrayList<>(2);
}
}

View File

@ -200,7 +200,7 @@ public abstract class FieldMapper extends Mapper {
}
}
private void doParseMultiFields(DocumentParserContext context) throws IOException {
protected void doParseMultiFields(DocumentParserContext context) throws IOException {
context.path().add(leafName());
for (FieldMapper mapper : builderParams.multiFields.mappers) {
mapper.parse(context);
@ -208,7 +208,7 @@ public abstract class FieldMapper extends Mapper {
context.path().remove();
}
private static void throwIndexingWithScriptParam() {
protected static void throwIndexingWithScriptParam() {
throw new IllegalArgumentException("Cannot index data directly into a field with a [script] parameter");
}

View File

@ -95,6 +95,7 @@ public final class KeywordFieldMapper extends FieldMapper {
public static final String CONTENT_TYPE = "keyword";
private static final String HOST_NAME = "host.name";
public static final String OFFSETS_FIELD_NAME_SUFFIX = ".offsets";
public static class Defaults {
public static final FieldType FIELD_TYPE;
@ -203,6 +204,7 @@ public final class KeywordFieldMapper extends FieldMapper {
private final ScriptCompiler scriptCompiler;
private final IndexVersion indexCreatedVersion;
private final boolean useDocValuesSkipper;
private final SourceKeepMode indexSourceKeepMode;
public Builder(final String name, final MappingParserContext mappingParserContext) {
this(
@ -213,7 +215,8 @@ public final class KeywordFieldMapper extends FieldMapper {
mappingParserContext.getIndexSettings().getIndexVersionCreated(),
mappingParserContext.getIndexSettings().getMode(),
mappingParserContext.getIndexSettings().getIndexSortConfig(),
USE_DOC_VALUES_SKIPPER.get(mappingParserContext.getSettings())
USE_DOC_VALUES_SKIPPER.get(mappingParserContext.getSettings()),
mappingParserContext.getIndexSettings().sourceKeepMode()
);
}
@ -222,9 +225,20 @@ public final class KeywordFieldMapper extends FieldMapper {
IndexAnalyzers indexAnalyzers,
ScriptCompiler scriptCompiler,
int ignoreAboveDefault,
IndexVersion indexCreatedVersion
IndexVersion indexCreatedVersion,
SourceKeepMode sourceKeepMode
) {
this(name, indexAnalyzers, scriptCompiler, ignoreAboveDefault, indexCreatedVersion, IndexMode.STANDARD, null, false);
this(
name,
indexAnalyzers,
scriptCompiler,
ignoreAboveDefault,
indexCreatedVersion,
IndexMode.STANDARD,
null,
false,
sourceKeepMode
);
}
private Builder(
@ -235,7 +249,8 @@ public final class KeywordFieldMapper extends FieldMapper {
IndexVersion indexCreatedVersion,
IndexMode indexMode,
IndexSortConfig indexSortConfig,
boolean useDocValuesSkipper
boolean useDocValuesSkipper,
SourceKeepMode indexSourceKeepMode
) {
super(name);
this.indexAnalyzers = indexAnalyzers;
@ -273,10 +288,11 @@ public final class KeywordFieldMapper extends FieldMapper {
this.indexSortConfig = indexSortConfig;
this.indexMode = indexMode;
this.useDocValuesSkipper = useDocValuesSkipper;
this.indexSourceKeepMode = indexSourceKeepMode;
}
public Builder(String name, IndexVersion indexCreatedVersion) {
this(name, null, ScriptCompiler.NONE, Integer.MAX_VALUE, indexCreatedVersion);
this(name, null, ScriptCompiler.NONE, Integer.MAX_VALUE, indexCreatedVersion, SourceKeepMode.NONE);
}
public Builder ignoreAbove(int ignoreAbove) {
@ -422,6 +438,27 @@ public final class KeywordFieldMapper extends FieldMapper {
}
super.hasScript = script.get() != null;
super.onScriptError = onScriptError.getValue();
var sourceKeepMode = this.sourceKeepMode.orElse(indexSourceKeepMode);
String offsetsFieldName;
if (context.isSourceSynthetic()
&& sourceKeepMode == SourceKeepMode.ARRAYS
&& hasDocValues()
&& fieldtype.stored() == false
&& copyTo.copyToFields().isEmpty()
&& multiFieldsBuilder.hasMultiFields() == false
&& indexCreatedVersion.onOrAfter(IndexVersions.SYNTHETIC_SOURCE_STORE_ARRAYS_NATIVELY_KEYWORD)) {
// Skip stored, we will be synthesizing from stored fields, no point to keep track of the offsets
// Skip copy_to and multi fields, supporting that requires more work. However, copy_to usage is rare in metrics and
// logging use cases
// keep track of value offsets so that we can reconstruct arrays from doc values in order as was specified during indexing
// (if field is stored then there is no point of doing this)
offsetsFieldName = context.buildFullName(leafName() + OFFSETS_FIELD_NAME_SUFFIX);
} else {
offsetsFieldName = null;
}
return new KeywordFieldMapper(
leafName(),
fieldtype,
@ -429,7 +466,9 @@ public final class KeywordFieldMapper extends FieldMapper {
builderParams(this, context),
context.isSourceSynthetic(),
useDocValuesSkipper,
this
this,
offsetsFieldName,
indexSourceKeepMode
);
}
@ -1028,6 +1067,8 @@ public final class KeywordFieldMapper extends FieldMapper {
private final IndexMode indexMode;
private final IndexSortConfig indexSortConfig;
private final boolean useDocValuesSkipper;
private final String offsetsFieldName;
private final SourceKeepMode indexSourceKeepMode;
private KeywordFieldMapper(
String simpleName,
@ -1036,7 +1077,9 @@ public final class KeywordFieldMapper extends FieldMapper {
BuilderParams builderParams,
boolean isSyntheticSource,
boolean useDocValuesSkipper,
Builder builder
Builder builder,
String offsetsFieldName,
SourceKeepMode indexSourceKeepMode
) {
super(simpleName, mappedFieldType, builderParams);
assert fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) <= 0;
@ -1055,6 +1098,8 @@ public final class KeywordFieldMapper extends FieldMapper {
this.indexMode = builder.indexMode;
this.indexSortConfig = builder.indexSortConfig;
this.useDocValuesSkipper = useDocValuesSkipper;
this.offsetsFieldName = offsetsFieldName;
this.indexSourceKeepMode = indexSourceKeepMode;
}
@Override
@ -1063,9 +1108,24 @@ public final class KeywordFieldMapper extends FieldMapper {
}
@Override
public String getOffsetFieldName() {
return offsetsFieldName;
}
protected void parseCreateField(DocumentParserContext context) throws IOException {
final String value = context.parser().textOrNull();
indexValue(context, value == null ? fieldType().nullValue : value);
String value = context.parser().textOrNull();
if (value == null) {
value = fieldType().nullValue;
}
boolean indexed = indexValue(context, value);
if (offsetsFieldName != null && context.isImmediateParentAnArray() && context.getRecordedSource() == false) {
if (indexed) {
context.getOffSetContext().recordOffset(offsetsFieldName, value);
} else if (value == null) {
context.getOffSetContext().recordNull(offsetsFieldName);
}
}
}
@Override
@ -1078,13 +1138,13 @@ public final class KeywordFieldMapper extends FieldMapper {
this.fieldType().scriptValues.valuesForDoc(searchLookup, readerContext, doc, value -> indexValue(documentParserContext, value));
}
private void indexValue(DocumentParserContext context, String value) {
private boolean indexValue(DocumentParserContext context, String value) {
if (value == null) {
return;
return false;
}
// if field is disabled, skip indexing
if ((fieldType.indexOptions() == IndexOptions.NONE) && (fieldType.stored() == false) && (fieldType().hasDocValues() == false)) {
return;
return false;
}
if (value.length() > fieldType().ignoreAbove()) {
@ -1093,7 +1153,7 @@ public final class KeywordFieldMapper extends FieldMapper {
// Save a copy of the field so synthetic source can load it
context.doc().add(new StoredField(originalName(), new BytesRef(value)));
}
return;
return false;
}
value = normalizeValue(fieldType().normalizer(), fullPath(), value);
@ -1131,6 +1191,8 @@ public final class KeywordFieldMapper extends FieldMapper {
if (fieldType().hasDocValues() == false && fieldType.omitNorms()) {
context.addToFieldNames(fieldType().name());
}
return true;
}
private static String normalizeValue(NamedAnalyzer normalizer, String field, String value) {
@ -1180,7 +1242,8 @@ public final class KeywordFieldMapper extends FieldMapper {
indexCreatedVersion,
indexMode,
indexSortConfig,
useDocValuesSkipper
useDocValuesSkipper,
indexSourceKeepMode
).dimension(fieldType().isDimension()).init(this);
}
@ -1234,19 +1297,23 @@ public final class KeywordFieldMapper extends FieldMapper {
}
});
} else if (hasDocValues) {
layers.add(new SortedSetDocValuesSyntheticFieldLoaderLayer(fullPath()) {
if (offsetsFieldName != null) {
layers.add(new SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(fullPath(), offsetsFieldName));
} else {
layers.add(new SortedSetDocValuesSyntheticFieldLoaderLayer(fullPath()) {
@Override
protected BytesRef convert(BytesRef value) {
return value;
}
@Override
protected BytesRef convert(BytesRef value) {
return value;
}
@Override
protected BytesRef preserve(BytesRef value) {
// Preserve must make a deep copy because convert gets a shallow copy from the iterator
return BytesRef.deepCopyOf(value);
}
});
@Override
protected BytesRef preserve(BytesRef value) {
// Preserve must make a deep copy because convert gets a shallow copy from the iterator
return BytesRef.deepCopyOf(value);
}
});
}
}
if (fieldType().ignoreAbove != Integer.MAX_VALUE) {

View File

@ -212,4 +212,19 @@ public abstract class Mapper implements ToXContentFragment, Iterable<Mapper> {
* Defines how this mapper counts towards {@link MapperService#INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING}.
*/
public abstract int getTotalFieldsCount();
/**
* @return whether this mapper supports storing leaf array elements natively when synthetic source is enabled.
*/
public final boolean supportStoringArrayOffsets() {
return getOffsetFieldName() != null;
}
/**
* @return the offset field name used to store offsets iff {@link #supportStoringArrayOffsets()} returns
* <code>true</code>.
*/
public String getOffsetFieldName() {
return null;
}
}

View File

@ -0,0 +1,167 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.index.mapper;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
import org.elasticsearch.xcontent.XContentBuilder;
import java.io.IOException;
import java.util.Objects;
/**
* Load {@code _source} fields from {@link SortedSetDocValues} and associated {@link BinaryDocValues}. The former contains the unique values
* in sorted order and the latter the offsets for each instance of the values. This allows synthesizing array elements in order as was
* specified at index time. Note that this works only for leaf arrays.
*/
final class SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer implements CompositeSyntheticFieldLoader.DocValuesLayer {
private final String name;
private final String offsetsFieldName;
private DocValuesWithOffsetsLoader docValues;
SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer(String name, String offsetsFieldName) {
this.name = Objects.requireNonNull(name);
this.offsetsFieldName = Objects.requireNonNull(offsetsFieldName);
}
@Override
public String fieldName() {
return name;
}
@Override
public DocValuesLoader docValuesLoader(LeafReader leafReader, int[] docIdsInLeaf) throws IOException {
SortedSetDocValues valueDocValues = DocValues.getSortedSet(leafReader, name);
SortedDocValues offsetDocValues = DocValues.getSorted(leafReader, offsetsFieldName);
return docValues = new DocValuesWithOffsetsLoader(valueDocValues, offsetDocValues);
}
@Override
public boolean hasValue() {
if (docValues != null) {
return docValues.count() > 0;
} else {
return false;
}
}
@Override
public long valueCount() {
if (docValues != null) {
return docValues.count();
} else {
return 0;
}
}
@Override
public void write(XContentBuilder b) throws IOException {
if (docValues != null) {
docValues.write(b);
}
}
static final class DocValuesWithOffsetsLoader implements DocValuesLoader {
private final SortedDocValues offsetDocValues;
private final SortedSetDocValues valueDocValues;
private final ByteArrayStreamInput scratch = new ByteArrayStreamInput();
private boolean hasValue;
private boolean hasOffset;
private int[] offsetToOrd;
DocValuesWithOffsetsLoader(SortedSetDocValues valueDocValues, SortedDocValues offsetDocValues) {
this.valueDocValues = valueDocValues;
this.offsetDocValues = offsetDocValues;
}
@Override
public boolean advanceToDoc(int docId) throws IOException {
hasValue = valueDocValues.advanceExact(docId);
hasOffset = offsetDocValues.advanceExact(docId);
if (hasValue || hasOffset) {
if (hasOffset) {
int offsetOrd = offsetDocValues.ordValue();
var encodedValue = offsetDocValues.lookupOrd(offsetOrd);
scratch.reset(encodedValue.bytes, encodedValue.offset, encodedValue.length);
offsetToOrd = FieldArrayContext.parseOffsetArray(scratch);
} else {
offsetToOrd = null;
}
return true;
} else {
offsetToOrd = null;
return false;
}
}
public int count() {
if (hasValue) {
if (offsetToOrd != null) {
// HACK: trick CompositeSyntheticFieldLoader to serialize this layer as array.
// (if offsetToOrd is not null, then at index time an array was always specified even if there is just one value)
return offsetToOrd.length + 1;
} else {
return valueDocValues.docValueCount();
}
} else {
if (hasOffset) {
// trick CompositeSyntheticFieldLoader to serialize this layer as empty array.
return 2;
} else {
return 0;
}
}
}
public void write(XContentBuilder b) throws IOException {
if (hasValue == false && hasOffset == false) {
return;
}
if (offsetToOrd != null && hasValue) {
long[] ords = new long[valueDocValues.docValueCount()];
for (int i = 0; i < valueDocValues.docValueCount(); i++) {
ords[i] = valueDocValues.nextOrd();
}
for (int offset : offsetToOrd) {
if (offset == -1) {
b.nullValue();
continue;
}
long ord = ords[offset];
BytesRef c = valueDocValues.lookupOrd(ord);
// This is keyword specific and needs to be updated once support is added for other field types:
b.utf8Value(c.bytes, c.offset, c.length);
}
} else if (offsetToOrd != null) {
// in case all values are NULLs
for (int offset : offsetToOrd) {
assert offset == -1;
b.nullValue();
}
} else {
for (int i = 0; i < valueDocValues.docValueCount(); i++) {
BytesRef c = valueDocValues.lookupOrd(valueDocValues.nextOrd());
b.utf8Value(c.bytes, c.offset, c.length);
}
}
}
}
}

View File

@ -0,0 +1,67 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.index.mapper;
import org.elasticsearch.common.io.stream.ByteArrayStreamInput;
import org.elasticsearch.test.ESTestCase;
import java.io.IOException;
import static org.elasticsearch.index.mapper.FieldArrayContext.parseOffsetArray;
public class FieldArrayContextTests extends ESTestCase {
public void testOffsets() throws IOException {
var context = new FieldArrayContext();
context.recordOffset("field", "a");
context.recordOffset("field", "a");
context.recordOffset("field", "b");
context.recordOffset("field", "z");
context.recordOffset("field", "a");
context.recordOffset("field", "b");
var parserContext = new TestDocumentParserContext();
context.addToLuceneDocument(parserContext);
var binaryDocValues = parserContext.doc().getField("field");
int[] offsetToOrd = parseOffsetArray(new ByteArrayStreamInput(binaryDocValues.binaryValue().bytes));
assertArrayEquals(new int[] { 0, 0, 1, 2, 0, 1 }, offsetToOrd);
}
public void testOffsetsWithNull() throws IOException {
var context = new FieldArrayContext();
context.recordNull("field");
context.recordOffset("field", "a");
context.recordOffset("field", "b");
context.recordOffset("field", "z");
context.recordNull("field");
context.recordOffset("field", "b");
var parserContext = new TestDocumentParserContext();
context.addToLuceneDocument(parserContext);
var binaryDocValues = parserContext.doc().getField("field");
int[] offsetToOrd = parseOffsetArray(new ByteArrayStreamInput(binaryDocValues.binaryValue().bytes));
assertArrayEquals(new int[] { -1, 0, 1, 2, -1, 1 }, offsetToOrd);
}
public void testEmptyOffset() throws IOException {
var context = new FieldArrayContext();
context.maybeRecordEmptyArray("field");
var parserContext = new TestDocumentParserContext();
context.addToLuceneDocument(parserContext);
var binaryDocValues = parserContext.doc().getField("field");
int[] offsetToOrd = parseOffsetArray(new ByteArrayStreamInput(binaryDocValues.binaryValue().bytes));
assertArrayEquals(new int[] {}, offsetToOrd);
}
}

View File

@ -971,4 +971,10 @@ public class KeywordFieldMapperTests extends MapperTestCase {
assertFalse(mapper.fieldType().isIndexed());
assertFalse(mapper.fieldType().hasDocValuesSkipper());
}
@Override
protected String randomSyntheticSourceKeep() {
// Only option all keeps array source in ignored source.
return randomFrom("all");
}
}

View File

@ -244,7 +244,8 @@ public class KeywordFieldTypeTests extends FieldTypeTestCase {
createIndexAnalyzers(),
ScriptCompiler.NONE,
Integer.MAX_VALUE,
IndexVersion.current()
IndexVersion.current(),
randomFrom(Mapper.SourceKeepMode.values())
).normalizer("lowercase").build(MapperBuilderContext.root(false, false)).fieldType();
assertEquals(List.of("value"), fetchSourceValue(normalizerMapper, "VALUE"));
assertEquals(List.of("42"), fetchSourceValue(normalizerMapper, 42L));

View File

@ -0,0 +1,237 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.index.mapper;
import org.apache.lucene.index.DirectoryReader;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.mapper.SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer.DocValuesWithOffsetsLoader;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentType;
import java.io.IOException;
import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder;
import static org.hamcrest.Matchers.nullValue;
public class KeywordOffsetDocValuesLoaderTests extends MapperServiceTestCase {
@Override
protected Settings getIndexSettings() {
return Settings.builder()
.put("index.mapping.source.mode", "synthetic")
.put("index.mapping.synthetic_source_keep", "arrays")
.build();
}
public void testOffsetArrayNoDocValues() throws Exception {
String mapping = """
{
"_doc": {
"properties": {
"field": {
"type": "keyword",
"doc_values": false
}
}
}
}
""";
try (var mapperService = createMapperService(mapping)) {
var fieldMapper = mapperService.mappingLookup().getMapper("field");
assertThat(fieldMapper.getOffsetFieldName(), nullValue());
}
}
public void testOffsetArrayStored() throws Exception {
String mapping = """
{
"_doc": {
"properties": {
"field": {
"type": "keyword",
"store": true
}
}
}
}
""";
try (var mapperService = createMapperService(mapping)) {
var fieldMapper = mapperService.mappingLookup().getMapper("field");
assertThat(fieldMapper.getOffsetFieldName(), nullValue());
}
}
public void testOffsetMultiFields() throws Exception {
String mapping = """
{
"_doc": {
"properties": {
"field": {
"type": "keyword",
"fields": {
"sub": {
"type": "text"
}
}
}
}
}
}
""";
try (var mapperService = createMapperService(mapping)) {
var fieldMapper = mapperService.mappingLookup().getMapper("field");
assertThat(fieldMapper.getOffsetFieldName(), nullValue());
}
}
public void testOffsetArrayNoSyntheticSource() throws Exception {
String mapping = """
{
"_doc": {
"properties": {
"field": {
"type": "keyword"
}
}
}
}
""";
try (var mapperService = createMapperService(Settings.EMPTY, mapping)) {
var fieldMapper = mapperService.mappingLookup().getMapper("field");
assertThat(fieldMapper.getOffsetFieldName(), nullValue());
}
}
public void testOffsetArrayNoSourceArrayKeep() throws Exception {
var settingsBuilder = Settings.builder().put("index.mapping.source.mode", "synthetic");
String mapping;
if (randomBoolean()) {
mapping = """
{
"_doc": {
"properties": {
"field": {
"type": "keyword",
"synthetic_source_keep": "{{synthetic_source_keep}}"
}
}
}
}
""".replace("{{synthetic_source_keep}}", randomBoolean() ? "none" : "all");
} else {
mapping = """
{
"_doc": {
"properties": {
"field": {
"type": "keyword"
}
}
}
}
""";
if (randomBoolean()) {
settingsBuilder.put("index.mapping.synthetic_source_keep", "none");
}
}
try (var mapperService = createMapperService(settingsBuilder.build(), mapping)) {
var fieldMapper = mapperService.mappingLookup().getMapper("field");
assertThat(fieldMapper.getOffsetFieldName(), nullValue());
}
}
public void testOffsetArray() throws Exception {
verifyOffsets("{\"field\":[\"z\",\"x\",\"y\",\"c\",\"b\",\"a\"]}");
verifyOffsets("{\"field\":[\"z\",null,\"y\",\"c\",null,\"a\"]}");
}
public void testOffsetNestedArray() throws Exception {
verifyOffsets("{\"field\":[\"z\",[\"y\"],[\"c\"],null,\"a\"]}", "{\"field\":[\"z\",\"y\",\"c\",null,\"a\"]}");
verifyOffsets(
"{\"field\":[\"z\",[\"y\", [\"k\"]],[\"c\", [\"l\"]],null,\"a\"]}",
"{\"field\":[\"z\",\"y\",\"k\",\"c\",\"l\",null,\"a\"]}"
);
}
public void testOffsetEmptyArray() throws Exception {
verifyOffsets("{\"field\":[]}");
}
public void testOffsetArrayWithNulls() throws Exception {
verifyOffsets("{\"field\":[null,null,null]}");
}
public void testOffsetArrayRandom() throws Exception {
StringBuilder values = new StringBuilder();
int numValues = randomIntBetween(0, 256);
for (int i = 0; i < numValues; i++) {
if (randomInt(10) == 1) {
values.append("null");
} else {
values.append('"').append(randomAlphanumericOfLength(2)).append('"');
}
if (i != (numValues - 1)) {
values.append(',');
}
}
verifyOffsets("{\"field\":[" + values + "]}");
}
private void verifyOffsets(String source) throws IOException {
verifyOffsets(source, source);
}
private void verifyOffsets(String source, String expectedSource) throws IOException {
String mapping = """
{
"_doc": {
"properties": {
"field": {
"type": "keyword"
}
}
}
}
""";
verifyOffsets(mapping, source, expectedSource);
}
private void verifyOffsets(String mapping, String source, String expectedSource) throws IOException {
try (var mapperService = createMapperService(mapping)) {
var mapper = mapperService.documentMapper();
try (var directory = newDirectory()) {
var iw = indexWriterForSyntheticSource(directory);
var doc = mapper.parse(new SourceToParse("_id", new BytesArray(source), XContentType.JSON));
doc.updateSeqID(0, 0);
doc.version().setLongValue(0);
iw.addDocuments(doc.docs());
iw.close();
try (var indexReader = wrapInMockESDirectoryReader(DirectoryReader.open(directory))) {
var layer = new SortedSetWithOffsetsDocValuesSyntheticFieldLoaderLayer("field", "field.offsets");
var leafReader = indexReader.leaves().getFirst().reader();
var loader = (DocValuesWithOffsetsLoader) layer.docValuesLoader(leafReader, new int[] { 0 });
assertTrue(loader.advanceToDoc(0));
assertTrue(loader.count() > 0);
XContentBuilder builder = jsonBuilder().startObject();
builder.startArray("field");
loader.write(builder);
builder.endArray().endObject();
var actual = Strings.toString(builder);
assertEquals(expectedSource, actual);
}
}
}
}
}

View File

@ -0,0 +1,331 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/
package org.elasticsearch.index.mapper;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReader;
import org.elasticsearch.action.admin.indices.forcemerge.ForceMergeRequest;
import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.support.WriteRequest;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.query.IdsQueryBuilder;
import org.elasticsearch.test.ESSingleNodeTestCase;
import org.elasticsearch.xcontent.XContentBuilder;
import org.hamcrest.Matchers;
import java.io.IOException;
import java.util.ArrayList;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.hasKey;
import static org.hamcrest.Matchers.nullValue;
public class KeywordSyntheticSourceNativeArrayIntegrationTests extends ESSingleNodeTestCase {
public void testSynthesizeArray() throws Exception {
var arrayValues = new Object[][] {
new Object[] { "z", "y", null, "x", null, "v" },
new Object[] { null, "b", null, "a" },
new Object[] { null },
new Object[] { null, null, null },
new Object[] { "c", "b", "a" } };
verifySyntheticArray(arrayValues);
}
public void testSynthesizeEmptyArray() throws Exception {
var arrayValues = new Object[][] { new Object[] {} };
verifySyntheticArray(arrayValues);
}
public void testSynthesizeArrayRandom() throws Exception {
var arrayValues = new Object[][] { generateRandomStringArray(64, 8, false, true) };
verifySyntheticArray(arrayValues);
}
public void testSynthesizeArrayIgnoreAbove() throws Exception {
var mapping = jsonBuilder().startObject()
.startObject("properties")
.startObject("field")
.field("type", "keyword")
.field("ignore_above", 4)
.endObject()
.endObject()
.endObject();
// Note values that would be ignored are added at the end of arrays,
// this makes testing easier as ignored values are always synthesized after regular values:
var arrayValues = new Object[][] {
new Object[] { null, "a", "ab", "abc", "abcd", null, "abcde" },
new Object[] { "12345", "12345", "12345" },
new Object[] { "123", "1234", "12345" },
new Object[] { null, null, null, "blabla" },
new Object[] { "1", "2", "3", "blabla" } };
verifySyntheticArray(arrayValues, mapping, 4, "_id", "field._original");
}
public void testSynthesizeObjectArray() throws Exception {
List<List<Object[]>> documents = new ArrayList<>();
{
List<Object[]> document = new ArrayList<>();
document.add(new Object[] { "z", "y", "x" });
document.add(new Object[] { "m", "l", "m" });
document.add(new Object[] { "c", "b", "a" });
documents.add(document);
}
{
List<Object[]> document = new ArrayList<>();
document.add(new Object[] { "9", "7", "5" });
document.add(new Object[] { "2", "4", "6" });
document.add(new Object[] { "7", "6", "5" });
documents.add(document);
}
verifySyntheticObjectArray(documents);
}
public void testSynthesizeArrayInObjectField() throws Exception {
List<Object[]> documents = new ArrayList<>();
documents.add(new Object[] { "z", "y", "x" });
documents.add(new Object[] { "m", "l", "m" });
documents.add(new Object[] { "c", "b", "a" });
documents.add(new Object[] { "9", "7", "5" });
documents.add(new Object[] { "2", "4", "6" });
documents.add(new Object[] { "7", "6", "5" });
verifySyntheticArrayInObject(documents);
}
public void testSynthesizeArrayInObjectFieldRandom() throws Exception {
List<Object[]> documents = new ArrayList<>();
int numDocs = randomIntBetween(8, 256);
for (int i = 0; i < numDocs; i++) {
documents.add(generateRandomStringArray(64, 8, false, true));
}
verifySyntheticArrayInObject(documents);
}
private void verifySyntheticArray(Object[][] arrays) throws IOException {
var mapping = jsonBuilder().startObject()
.startObject("properties")
.startObject("field")
.field("type", "keyword")
.endObject()
.endObject()
.endObject();
verifySyntheticArray(arrays, mapping, null, "_id");
}
private void verifySyntheticArray(Object[][] arrays, XContentBuilder mapping, Integer ignoreAbove, String... expectedStoredFields)
throws IOException {
var indexService = createIndex(
"test-index",
Settings.builder().put("index.mapping.source.mode", "synthetic").put("index.mapping.synthetic_source_keep", "arrays").build(),
mapping
);
for (int i = 0; i < arrays.length; i++) {
var array = arrays[i];
var indexRequest = new IndexRequest("test-index");
indexRequest.id("my-id-" + i);
var source = jsonBuilder().startObject();
if (array != null) {
source.startArray("field");
for (Object arrayValue : array) {
source.value(arrayValue);
}
source.endArray();
} else {
source.field("field").nullValue();
}
indexRequest.source(source.endObject());
indexRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
client().index(indexRequest).actionGet();
var searchRequest = new SearchRequest("test-index");
searchRequest.source().query(new IdsQueryBuilder().addIds("my-id-" + i));
var searchResponse = client().search(searchRequest).actionGet();
try {
var hit = searchResponse.getHits().getHits()[0];
assertThat(hit.getId(), equalTo("my-id-" + i));
var sourceAsMap = hit.getSourceAsMap();
assertThat(sourceAsMap, hasKey("field"));
var actualArray = (List<?>) sourceAsMap.get("field");
if (array == null) {
assertThat(actualArray, nullValue());
} else if (array.length == 0) {
assertThat(actualArray, empty());
} else {
assertThat(actualArray, Matchers.contains(array));
}
} finally {
searchResponse.decRef();
}
}
try (var searcher = indexService.getShard(0).acquireSearcher(getTestName())) {
var reader = searcher.getDirectoryReader();
for (int i = 0; i < arrays.length; i++) {
var document = reader.storedFields().document(i);
// Verify that there is no ignored source:
Set<String> storedFieldNames = new LinkedHashSet<>(document.getFields().stream().map(IndexableField::name).toList());
assertThat(storedFieldNames, contains(expectedStoredFields));
}
var fieldInfo = FieldInfos.getMergedFieldInfos(reader).fieldInfo("field.offsets");
assertThat(fieldInfo.getDocValuesType(), equalTo(DocValuesType.SORTED));
}
}
private void verifySyntheticObjectArray(List<List<Object[]>> documents) throws IOException {
var indexService = createIndex(
"test-index",
Settings.builder().put("index.mapping.source.mode", "synthetic").put("index.mapping.synthetic_source_keep", "arrays").build(),
jsonBuilder().startObject()
.startObject("properties")
.startObject("object")
.startObject("properties")
.startObject("field")
.field("type", "keyword")
.endObject()
.endObject()
.endObject()
.endObject()
.endObject()
);
for (int i = 0; i < documents.size(); i++) {
var document = documents.get(i);
var indexRequest = new IndexRequest("test-index");
indexRequest.id("my-id-" + i);
var source = jsonBuilder().startObject();
source.startArray("object");
for (Object[] arrayValue : document) {
source.startObject();
source.array("field", arrayValue);
source.endObject();
}
source.endArray();
indexRequest.source(source.endObject());
indexRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
client().index(indexRequest).actionGet();
var searchRequest = new SearchRequest("test-index");
searchRequest.source().query(new IdsQueryBuilder().addIds("my-id-" + i));
var searchResponse = client().search(searchRequest).actionGet();
try {
var hit = searchResponse.getHits().getHits()[0];
assertThat(hit.getId(), equalTo("my-id-" + i));
var sourceAsMap = hit.getSourceAsMap();
var objectArray = (List<?>) sourceAsMap.get("object");
for (int j = 0; j < document.size(); j++) {
var expected = document.get(j);
List<?> actual = (List<?>) ((Map<?, ?>) objectArray.get(j)).get("field");
assertThat(actual, Matchers.contains(expected));
}
} finally {
searchResponse.decRef();
}
}
indexService.getShard(0).forceMerge(new ForceMergeRequest("test-index").maxNumSegments(1));
try (var searcher = indexService.getShard(0).acquireSearcher(getTestName())) {
var reader = searcher.getDirectoryReader();
for (int i = 0; i < documents.size(); i++) {
var document = reader.storedFields().document(i);
// Verify that there is ignored source because of leaf array being wrapped by object array:
List<String> storedFieldNames = document.getFields().stream().map(IndexableField::name).toList();
assertThat(storedFieldNames, contains("_id", "_ignored_source"));
// Verify that there is no offset field:
LeafReader leafReader = reader.leaves().get(0).reader();
for (FieldInfo fieldInfo : leafReader.getFieldInfos()) {
String name = fieldInfo.getName();
assertFalse("expected no field that contains [offsets] in name, but found [" + name + "]", name.contains("offsets"));
}
var binaryDocValues = leafReader.getBinaryDocValues("object.field.offsets");
assertThat(binaryDocValues, nullValue());
}
}
}
private void verifySyntheticArrayInObject(List<Object[]> documents) throws IOException {
var indexService = createIndex(
"test-index",
Settings.builder().put("index.mapping.source.mode", "synthetic").put("index.mapping.synthetic_source_keep", "arrays").build(),
jsonBuilder().startObject()
.startObject("properties")
.startObject("object")
.startObject("properties")
.startObject("field")
.field("type", "keyword")
.endObject()
.endObject()
.endObject()
.endObject()
.endObject()
);
for (int i = 0; i < documents.size(); i++) {
var arrayValue = documents.get(i);
var indexRequest = new IndexRequest("test-index");
indexRequest.id("my-id-" + i);
var source = jsonBuilder().startObject();
source.startObject("object");
source.array("field", arrayValue);
source.endObject();
indexRequest.source(source.endObject());
indexRequest.setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE);
client().index(indexRequest).actionGet();
var searchRequest = new SearchRequest("test-index");
searchRequest.source().query(new IdsQueryBuilder().addIds("my-id-" + i));
var searchResponse = client().search(searchRequest).actionGet();
try {
var hit = searchResponse.getHits().getHits()[0];
assertThat(hit.getId(), equalTo("my-id-" + i));
var sourceAsMap = hit.getSourceAsMap();
var objectArray = (Map<?, ?>) sourceAsMap.get("object");
List<?> actual = (List<?>) objectArray.get("field");
if (arrayValue == null) {
assertThat(actual, nullValue());
} else if (arrayValue.length == 0) {
assertThat(actual, empty());
} else {
assertThat(actual, Matchers.contains(arrayValue));
}
} finally {
searchResponse.decRef();
}
}
indexService.getShard(0).forceMerge(new ForceMergeRequest("test-index").maxNumSegments(1));
try (var searcher = indexService.getShard(0).acquireSearcher(getTestName())) {
var reader = searcher.getDirectoryReader();
for (int i = 0; i < documents.size(); i++) {
var document = reader.storedFields().document(i);
// Verify that there is no ignored source:
Set<String> storedFieldNames = new LinkedHashSet<>(document.getFields().stream().map(IndexableField::name).toList());
assertThat(storedFieldNames, contains("_id"));
}
var fieldInfo = FieldInfos.getMergedFieldInfos(reader).fieldInfo("object.field.offsets");
assertThat(fieldInfo.getDocValuesType(), equalTo(DocValuesType.SORTED));
}
}
}

View File

@ -64,7 +64,8 @@ public class MultiFieldsTests extends ESTestCase {
IndexAnalyzers.of(Map.of(), Map.of("normalizer", Lucene.STANDARD_ANALYZER), Map.of()),
ScriptCompiler.NONE,
Integer.MAX_VALUE,
IndexVersion.current()
IndexVersion.current(),
Mapper.SourceKeepMode.NONE
);
if (isStored) {
keywordFieldMapperBuilder.stored(true);

View File

@ -1707,7 +1707,7 @@ public abstract class MapperTestCase extends MapperServiceTestCase {
SyntheticSourceExample example = syntheticSourceSupportForKeepTests(shouldUseIgnoreMalformed()).example(1);
DocumentMapper mapperAll = createSytheticSourceMapperService(mapping(b -> {
b.startObject("field");
b.field("synthetic_source_keep", randomFrom("arrays", "all")); // Both options keep array source.
b.field("synthetic_source_keep", randomSyntheticSourceKeep());
example.mapping().accept(b);
b.endObject();
})).documentMapper();
@ -1726,6 +1726,10 @@ public abstract class MapperTestCase extends MapperServiceTestCase {
assertThat(actual, equalTo(expected));
}
protected String randomSyntheticSourceKeep() {
return randomFrom("all", "arrays");
}
@Override
protected final <T> T compileScript(Script script, ScriptContext<T> context) {
return ingestScriptSupport().compileScript(script, context);