ESQL: Fix sneaky bug in single value query (#127146)

Fixes a sneaky bug in single value query that happens when run against
a `keyword` field that:
* Is defined on every field
* Contains the same number of distinct values as documents

The simplest way to reproduce this is to build a single shard index
with two documents:
```
{"a": "foo"}
{"a": ["foo", "bar"]}
```

I don't think this is particularly likely in production, but it's quite
likely in tests. Which is where I hit this - in the serverless tests we
index an index with four documents into three shards and two of the
documents look just like this. So about 1/3 or the time we triggered
this bug.

Mechanically this is triggered by the `SingleValueMatchQuery`
incorrectly rewriting itself to `MatchAll` in the scenario above. This
fixes that.
This commit is contained in:
Nik Everett 2025-04-22 14:23:50 -04:00 committed by GitHub
parent a5f935a352
commit c2fdc06465
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 72 additions and 45 deletions

View File

@ -0,0 +1,5 @@
pr: 127146
summary: Fix sneaky bug in single value query
area: ES|QL
type: bug
issues: []

View File

@ -219,7 +219,7 @@ public final class SingleValueMatchQuery extends Query {
}
} else if (lfd instanceof LeafOrdinalsFieldData) {
final Terms terms = context.reader().terms(fieldData.getFieldName());
if (terms == null || terms.getDocCount() != context.reader().maxDoc() || terms.size() != terms.getDocCount()) {
if (terms == null || terms.getDocCount() != context.reader().maxDoc() || terms.getSumDocFreq() != terms.getDocCount()) {
return super.rewrite(indexSearcher);
}
} else {

View File

@ -48,10 +48,11 @@ public class SingleValueMathQueryTests extends MapperServiceTestCase {
void assertRewrite(IndexSearcher indexSearcher, Query query) throws IOException;
}
@ParametersFactory
@ParametersFactory(argumentFormatting = "%s")
public static List<Object[]> params() {
List<Object[]> params = new ArrayList<>();
for (String fieldType : new String[] { "long", "integer", "short", "byte", "double", "float", "keyword" }) {
params.add(new Object[] { new SneakyTwo(fieldType) });
for (boolean multivaluedField : new boolean[] { true, false }) {
for (boolean allowEmpty : new boolean[] { true, false }) {
params.add(new Object[] { new StandardSetup(fieldType, multivaluedField, allowEmpty, 100) });
@ -129,13 +130,13 @@ public class SingleValueMathQueryTests extends MapperServiceTestCase {
@Override
public List<List<Object>> build(RandomIndexWriter iw) throws IOException {
List<List<Object>> fieldValues = new ArrayList<>(100);
List<List<Object>> docs = new ArrayList<>(count);
for (int i = 0; i < count; i++) {
List<Object> values = values(i);
fieldValues.add(values);
docs.add(values);
iw.addDocument(docFor(values));
}
return fieldValues;
return docs;
}
@Override
@ -154,7 +155,7 @@ public class SingleValueMathQueryTests extends MapperServiceTestCase {
int count = between(2, 10);
Set<Object> set = new HashSet<>(count);
while (set.size() < count) {
set.add(randomValue());
set.add(randomValue(fieldType));
}
return List.copyOf(set);
}
@ -162,45 +163,66 @@ public class SingleValueMathQueryTests extends MapperServiceTestCase {
if (empty && (i == 0 || randomBoolean())) {
return List.of();
}
return List.of(randomValue());
}
private Object randomValue() {
return switch (fieldType) {
case "long" -> randomLong();
case "integer" -> randomInt();
case "short" -> randomShort();
case "byte" -> randomByte();
case "double" -> randomDouble();
case "float" -> randomFloat();
case "keyword" -> randomAlphaOfLength(5);
default -> throw new UnsupportedOperationException();
};
}
private List<IndexableField> docFor(Iterable<Object> values) {
List<IndexableField> fields = new ArrayList<>();
switch (fieldType) {
case "long", "integer", "short", "byte" -> {
for (Object v : values) {
long l = ((Number) v).longValue();
fields.add(new LongField("foo", l, Field.Store.NO));
}
}
case "double", "float" -> {
for (Object v : values) {
double d = ((Number) v).doubleValue();
fields.add(new DoubleField("foo", d, Field.Store.NO));
}
}
case "keyword" -> {
for (Object v : values) {
fields.add(new KeywordField("foo", v.toString(), Field.Store.NO));
}
}
default -> throw new UnsupportedOperationException();
}
return fields;
return List.of(randomValue(fieldType));
}
}
/**
* Tests a scenario where we were incorrectly rewriting {@code keyword} fields to
* {@link MatchAllDocsQuery} when:
* <ul>
* <li>Is defined on every field</li>
* <li>Contains the same number of distinct values as documents</li>
* </ul>
*/
private record SneakyTwo(String fieldType) implements Setup {
@Override
public XContentBuilder mapping(XContentBuilder builder) throws IOException {
return builder.startObject("foo").field("type", fieldType).endObject();
}
@Override
public List<List<Object>> build(RandomIndexWriter iw) throws IOException {
Object first = randomValue(fieldType);
Object second = randomValue(fieldType);
List<Object> justFirst = List.of(first);
List<Object> both = List.of(first, second);
iw.addDocument(docFor(justFirst));
iw.addDocument(docFor(both));
return List.of(justFirst, both);
}
@Override
public void assertRewrite(IndexSearcher indexSearcher, Query query) throws IOException {
// There are multivalued fields
assertThat(query.rewrite(indexSearcher), sameInstance(query));
}
}
private static Object randomValue(String fieldType) {
return switch (fieldType) {
case "long" -> randomLong();
case "integer" -> randomInt();
case "short" -> randomShort();
case "byte" -> randomByte();
case "double" -> randomDouble();
case "float" -> randomFloat();
case "keyword" -> randomAlphaOfLength(5);
default -> throw new UnsupportedOperationException();
};
}
private static List<IndexableField> docFor(Iterable<Object> values) {
List<IndexableField> fields = new ArrayList<>();
for (Object v : values) {
fields.add(switch (v) {
case Double n -> new DoubleField("foo", n, Field.Store.NO);
case Float n -> new DoubleField("foo", n, Field.Store.NO);
case Number n -> new LongField("foo", n.longValue(), Field.Store.NO);
case String s -> new KeywordField("foo", s, Field.Store.NO);
default -> throw new UnsupportedOperationException();
});
}
return fields;
}
}