[ES|QL] Add suggested_cast (#127139)

Adds a `suggested_cast` for unsupported fields based on `original_types`
for specific cases:
- when a field is mapped to `datetime` and `date_nanos`, suggests `date_nanos`
- when a field is mapped to `aggregate_metric_double` and other numerics,
suggests `aggregate_metric_double`
- when a field is unsupported anywhere, suggests nothing
- suggests `keyword` in all other cases

For example, a field that is mapped to `aggregate_metric_double` in one index,
but `double` in another will have a response like:
```
    {
      "name" : "agg_metric",
      "type" : "unsupported",
      "original_types" : [
        "aggregate_metric_double",
        "double"
      ],
      "suggested_cast" : "aggregate_metric_double"
    }
```
A field that is completely unsupported will have a response like:
```
    {
      "name" : "binary_field",
      "type" : "unsupported",
      "original_types" : [
        "binary"
      ]
    }
```
This commit is contained in:
Larisa Motova 2025-04-28 06:51:53 -10:00 committed by GitHub
parent cd4fcbff21
commit 12962583c2
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
10 changed files with 333 additions and 7 deletions

View File

@ -0,0 +1,5 @@
pr: 127139
summary: Add `suggested_cast`
area: ES|QL
type: enhancement
issues: []

View File

@ -708,6 +708,29 @@ public enum DataType {
};
}
public static DataType suggestedCast(Set<DataType> originalTypes) {
if (originalTypes.isEmpty() || originalTypes.contains(UNSUPPORTED)) {
return null;
}
if (originalTypes.contains(DATE_NANOS) && originalTypes.contains(DATETIME) && originalTypes.size() == 2) {
return DATE_NANOS;
}
if (originalTypes.contains(AGGREGATE_METRIC_DOUBLE)) {
boolean allNumeric = true;
for (DataType type : originalTypes) {
if (type.isNumeric() == false && type != AGGREGATE_METRIC_DOUBLE) {
allNumeric = false;
break;
}
}
if (allNumeric) {
return AGGREGATE_METRIC_DOUBLE;
}
}
return KEYWORD;
}
/**
* Named parameters with default values. It's just easier to do this with
* a builder in java....

View File

@ -27,6 +27,7 @@ import org.elasticsearch.test.cluster.LogType;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentType;
import org.elasticsearch.xcontent.json.JsonXContent;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.qa.rest.RestEsqlTestCase;
import org.elasticsearch.xpack.esql.tools.ProfileParser;
import org.hamcrest.Matchers;
@ -40,11 +41,14 @@ import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.elasticsearch.test.ListMatcher.matchesList;
import static org.elasticsearch.test.MapMatcher.assertMap;
@ -648,6 +652,120 @@ public class RestEsqlIT extends RestEsqlTestCase {
}
}
public void testSuggestedCast() throws IOException {
// TODO: Figure out how best to make sure we don't leave out new types
Map<DataType, String> typesAndValues = Map.ofEntries(
Map.entry(DataType.BOOLEAN, "\"true\""),
Map.entry(DataType.LONG, "-1234567890234567"),
Map.entry(DataType.INTEGER, "123"),
Map.entry(DataType.UNSIGNED_LONG, "1234567890234567"),
Map.entry(DataType.DOUBLE, "12.4"),
Map.entry(DataType.KEYWORD, "\"keyword\""),
Map.entry(DataType.TEXT, "\"some text\""),
Map.entry(DataType.DATE_NANOS, "\"2015-01-01T12:10:30.123456789Z\""),
Map.entry(DataType.DATETIME, "\"2015-01-01T12:10:30Z\""),
Map.entry(DataType.IP, "\"192.168.30.1\""),
Map.entry(DataType.VERSION, "\"8.19.0\""),
Map.entry(DataType.GEO_POINT, "[-71.34, 41.12]"),
Map.entry(DataType.GEO_SHAPE, """
{
"type": "Point",
"coordinates": [-77.03653, 38.897676]
}
"""),
Map.entry(DataType.AGGREGATE_METRIC_DOUBLE, """
{
"max": 14983.1
}
""")
);
Set<DataType> shouldBeSupported = Stream.of(DataType.values()).filter(DataType::isRepresentable).collect(Collectors.toSet());
shouldBeSupported.remove(DataType.CARTESIAN_POINT);
shouldBeSupported.remove(DataType.CARTESIAN_SHAPE);
shouldBeSupported.remove(DataType.NULL);
shouldBeSupported.remove(DataType.DOC_DATA_TYPE);
shouldBeSupported.remove(DataType.TSID_DATA_TYPE);
for (DataType type : shouldBeSupported) {
assertTrue(typesAndValues.containsKey(type));
}
assertThat(typesAndValues.size(), equalTo(shouldBeSupported.size()));
for (DataType type : typesAndValues.keySet()) {
String additionalProperties = "";
if (type == DataType.AGGREGATE_METRIC_DOUBLE) {
additionalProperties += """
,
"metrics": ["max"],
"default_metric": "max"
""";
}
createIndex("index-" + type.esType(), null, """
"properties": {
"my_field": {
"type": "%s" %s
}
}
""".formatted(type.esType(), additionalProperties));
Request doc = new Request("PUT", "index-" + type.esType() + "/_doc/1");
doc.setJsonEntity("{\"my_field\": " + typesAndValues.get(type) + "}");
client().performRequest(doc);
}
List<DataType> listOfTypes = new ArrayList<>(typesAndValues.keySet());
listOfTypes.sort(Comparator.comparing(DataType::typeName));
for (int i = 0; i < listOfTypes.size(); i++) {
for (int j = i + 1; j < listOfTypes.size(); j++) {
String query = """
{
"query": "FROM index-%s,index-%s | LIMIT 100 | KEEP my_field"
}
""".formatted(listOfTypes.get(i).esType(), listOfTypes.get(j).esType());
Request request = new Request("POST", "/_query");
request.setJsonEntity(query);
Response resp = client().performRequest(request);
Map<String, Object> results = entityAsMap(resp);
List<?> columns = (List<?>) results.get("columns");
DataType suggestedCast = DataType.suggestedCast(Set.of(listOfTypes.get(i), listOfTypes.get(j)));
assertThat(
columns,
equalTo(
List.of(
Map.ofEntries(
Map.entry("name", "my_field"),
Map.entry("type", "unsupported"),
Map.entry("original_types", List.of(listOfTypes.get(i).typeName(), listOfTypes.get(j).typeName())),
Map.entry("suggested_cast", suggestedCast.typeName())
)
)
)
);
String castedQuery = """
{
"query": "FROM index-%s,index-%s | LIMIT 100 | EVAL my_field = my_field::%s"
}
""".formatted(
listOfTypes.get(i).esType(),
listOfTypes.get(j).esType(),
suggestedCast == DataType.KEYWORD ? "STRING" : suggestedCast.nameUpper()
);
Request castedRequest = new Request("POST", "/_query");
castedRequest.setJsonEntity(castedQuery);
Response castedResponse = client().performRequest(castedRequest);
Map<String, Object> castedResults = entityAsMap(castedResponse);
List<?> castedColumns = (List<?>) castedResults.get("columns");
assertThat(
castedColumns,
equalTo(List.of(Map.ofEntries(Map.entry("name", "my_field"), Map.entry("type", suggestedCast.typeName()))))
);
}
}
for (DataType type : typesAndValues.keySet()) {
deleteIndex("index-" + type.esType());
}
}
static MapMatcher commonProfile() {
return matchesMap() //
.entry("description", any(String.class))

View File

@ -32,6 +32,7 @@ import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentType;
import org.elasticsearch.xcontent.json.JsonXContent;
import org.elasticsearch.xpack.esql.action.EsqlCapabilities;
import org.elasticsearch.xpack.esql.core.type.DataType;
import org.elasticsearch.xpack.esql.plugin.QueryPragmas;
import org.hamcrest.Matcher;
import org.junit.Before;
@ -45,8 +46,10 @@ import java.util.Comparator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.TreeMap;
import java.util.function.Function;
import java.util.stream.Collectors;
import static org.elasticsearch.test.ListMatcher.matchesList;
import static org.elasticsearch.test.MapMatcher.assertMap;
@ -690,7 +693,7 @@ public abstract class FieldExtractorTestCase extends ESRestTestCase {
* </pre>.
*/
public void testIncompatibleTypes() throws IOException {
assumeOriginalTypesReported();
assumeSuggestedCastReported();
keywordTest().createIndex("test1", "f");
index("test1", """
{"f": "f1"}""");
@ -764,7 +767,7 @@ public abstract class FieldExtractorTestCase extends ESRestTestCase {
* </pre>.
*/
public void testMergeKeywordAndObject() throws IOException {
assumeOriginalTypesReported();
assumeSuggestedCastReported();
keywordTest().createIndex("test1", "file");
index("test1", """
{"file": "f1"}""");
@ -959,7 +962,7 @@ public abstract class FieldExtractorTestCase extends ESRestTestCase {
* In an ideal world we'd promote the {@code integer} to an {@code long} and just go.
*/
public void testLongIntegerConflict() throws IOException {
assumeOriginalTypesReported();
assumeSuggestedCastReported();
longTest().sourceMode(SourceMode.DEFAULT).createIndex("test1", "emp_no");
index("test1", """
{"emp_no": 1}""");
@ -1002,7 +1005,7 @@ public abstract class FieldExtractorTestCase extends ESRestTestCase {
* In an ideal world we'd promote the {@code short} to an {@code integer} and just go.
*/
public void testIntegerShortConflict() throws IOException {
assumeOriginalTypesReported();
assumeSuggestedCastReported();
intTest().sourceMode(SourceMode.DEFAULT).createIndex("test1", "emp_no");
index("test1", """
{"emp_no": 1}""");
@ -1051,7 +1054,7 @@ public abstract class FieldExtractorTestCase extends ESRestTestCase {
* </pre>.
*/
public void testTypeConflictInObject() throws IOException {
assumeOriginalTypesReported();
assumeSuggestedCastReported();
createIndex("test1", empNoInObject("integer"));
index("test1", """
{"foo": {"emp_no": 1}}""");
@ -1379,6 +1382,12 @@ public abstract class FieldExtractorTestCase extends ESRestTestCase {
assumeTrue("This test makes sense for versions that report original types", requiredClusterCapability);
}
private void assumeSuggestedCastReported() throws IOException {
var capsName = EsqlCapabilities.Cap.SUGGESTED_CAST.name().toLowerCase(Locale.ROOT);
boolean requiredClusterCapability = clusterHasCapability("POST", "/_query", List.of(), List.of(capsName)).orElse(false);
assumeTrue("This test makes sense for versions that report suggested casts", requiredClusterCapability);
}
private CheckedConsumer<XContentBuilder, IOException> empNoInObject(String empNoType) {
return index -> {
index.startObject("properties");
@ -1715,7 +1724,19 @@ public abstract class FieldExtractorTestCase extends ESRestTestCase {
}
private static Map<String, Object> unsupportedColumnInfo(String name, String... originalTypes) {
return Map.of("name", name, "type", "unsupported", "original_types", List.of(originalTypes));
DataType suggested = DataType.suggestedCast(
List.of(originalTypes).stream().map(DataType::fromTypeName).filter(Objects::nonNull).collect(Collectors.toSet())
);
if (suggested == null) {
return Map.of("name", name, "type", "unsupported", "original_types", List.of(originalTypes));
} else {
return Map.ofEntries(
Map.entry("name", name),
Map.entry("type", "unsupported"),
Map.entry("original_types", List.of(originalTypes)),
Map.entry("suggested_cast", suggested.typeName())
);
}
}
private static void index(String name, String... docs) throws IOException {

View File

@ -481,3 +481,23 @@ x:integer | agg_metric:aggregate_metric_double
[5032, 11111, 40814] | {"min":5032.0,"max":40814.0,"sum":56957.0,"value_count":3}
//end::toAggregateMetricDoubleMv-result[]
;
convertToAggregateMetricDoubleCastingOperatorFromDouble
required_capability: suggested_cast
ROW x = 29384.1256
| EVAL agg_metric = x::aggregate_metric_double
;
x:double | agg_metric:aggregate_metric_double
29384.1256 | {"min":29384.1256,"max":29384.1256,"sum":29384.1256,"value_count":1}
;
convertToAggregateMetricDoubleCastingOperatorFromInt
required_capability: suggested_cast
ROW x = 55555
| EVAL agg_metric = x::aggregate_metric_double
;
x:integer | agg_metric:aggregate_metric_double
55555 | {"min":55555,"max":55555,"sum":55555,"value_count":1}
;

View File

@ -23,6 +23,7 @@ import org.elasticsearch.xpack.esql.core.type.DataType;
import java.io.IOException;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg;
import static org.elasticsearch.xcontent.ConstructingObjectParser.optionalConstructorArg;
@ -72,6 +73,9 @@ public class ColumnInfoImpl implements ColumnInfo {
@Nullable
private final List<String> originalTypes;
@Nullable
private final DataType suggestedCast;
@ParserConstructor
public ColumnInfoImpl(String name, String type, @Nullable List<String> originalTypes) {
this(name, DataType.fromEs(type), originalTypes);
@ -81,6 +85,16 @@ public class ColumnInfoImpl implements ColumnInfo {
this.name = name;
this.type = type;
this.originalTypes = originalTypes;
this.suggestedCast = calculateSuggestedCast(this.originalTypes);
}
private static DataType calculateSuggestedCast(List<String> originalTypes) {
if (originalTypes == null) {
return null;
}
return DataType.suggestedCast(
originalTypes.stream().map(DataType::fromTypeName).filter(Objects::nonNull).collect(Collectors.toSet())
);
}
public ColumnInfoImpl(StreamInput in) throws IOException {
@ -88,8 +102,10 @@ public class ColumnInfoImpl implements ColumnInfo {
this.type = DataType.fromEs(in.readString());
if (in.getTransportVersion().onOrAfter(TransportVersions.ESQL_REPORT_ORIGINAL_TYPES)) {
this.originalTypes = in.readOptionalStringCollectionAsList();
this.suggestedCast = calculateSuggestedCast(this.originalTypes);
} else {
this.originalTypes = null;
this.suggestedCast = null;
}
}
@ -110,6 +126,9 @@ public class ColumnInfoImpl implements ColumnInfo {
if (originalTypes != null) {
builder.field("original_types", originalTypes);
}
if (suggestedCast != null) {
builder.field("suggested_cast", suggestedCast.typeName());
}
builder.endObject();
return builder;
}

View File

@ -1044,7 +1044,12 @@ public class EsqlCapabilities {
/**
* Support for the SAMPLE command
*/
SAMPLE(Build.current().isSnapshot());
SAMPLE(Build.current().isSnapshot()),
/**
* The {@code _query} API now gives a cast recommendation if multiple types are found in certain instances.
*/
SUGGESTED_CAST;
private final boolean enabled;

View File

@ -36,6 +36,7 @@ import org.elasticsearch.xpack.esql.core.type.DataTypeConverter;
import org.elasticsearch.xpack.esql.core.util.NumericUtils;
import org.elasticsearch.xpack.esql.core.util.StringUtils;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.AbstractConvertFunction;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToAggregateMetricDouble;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToBoolean;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToCartesianPoint;
import org.elasticsearch.xpack.esql.expression.function.scalar.convert.ToCartesianShape;
@ -70,6 +71,7 @@ import java.util.function.BiFunction;
import java.util.function.Function;
import static java.util.Map.entry;
import static org.elasticsearch.xpack.esql.core.type.DataType.AGGREGATE_METRIC_DOUBLE;
import static org.elasticsearch.xpack.esql.core.type.DataType.BOOLEAN;
import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT;
import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_SHAPE;
@ -112,6 +114,7 @@ public class EsqlDataTypeConverter {
public static final DateFormatter HOUR_MINUTE_SECOND = DateFormatter.forPattern("strict_hour_minute_second_fraction");
private static final Map<DataType, BiFunction<Source, Expression, AbstractConvertFunction>> TYPE_TO_CONVERTER_FUNCTION = Map.ofEntries(
entry(AGGREGATE_METRIC_DOUBLE, ToAggregateMetricDouble::new),
entry(BOOLEAN, ToBoolean::new),
entry(CARTESIAN_POINT, ToCartesianPoint::new),
entry(CARTESIAN_SHAPE, ToCartesianShape::new),

View File

@ -12,9 +12,14 @@ import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.xpack.esql.core.type.DataType;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import static org.elasticsearch.xpack.esql.core.type.DataType.AGGREGATE_METRIC_DOUBLE;
import static org.elasticsearch.xpack.esql.core.type.DataType.BOOLEAN;
import static org.elasticsearch.xpack.esql.core.type.DataType.BYTE;
import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT;
@ -47,6 +52,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION;
import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTime;
import static org.elasticsearch.xpack.esql.core.type.DataType.isDateTimeOrNanosOrTemporal;
import static org.elasticsearch.xpack.esql.core.type.DataType.isString;
import static org.elasticsearch.xpack.esql.core.type.DataType.suggestedCast;
import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.commonType;
public class EsqlDataTypeConverterTests extends ESTestCase {
@ -186,4 +192,33 @@ public class EsqlDataTypeConverterTests extends ESTestCase {
assertNull("Expected null for " + dataType1 + " and " + dataType2, commonType(dataType1, dataType2));
assertNull("Expected null for " + dataType1 + " and " + dataType2, commonType(dataType2, dataType1));
}
public void testSuggestedCast() {
// date
{
assertEquals(DATE_NANOS, DataType.suggestedCast(Set.of(DATETIME, DATE_NANOS)));
DataType randomType = DataType.values()[random().nextInt(DataType.values().length)];
DataType suggested = DataType.suggestedCast(Set.of(DATETIME, DATE_NANOS, randomType));
if (randomType != DATETIME && randomType != DATE_NANOS) {
assertEquals(KEYWORD, suggested);
} else {
assertEquals(DATE_NANOS, suggested);
}
}
// aggregate metric double
{
List<DataType> NUMERICS = new ArrayList<>(Arrays.stream(DataType.values()).filter(DataType::isNumeric).toList());
Collections.shuffle(NUMERICS, random());
Set<DataType> subset = new HashSet<>(NUMERICS.subList(0, random().nextInt(NUMERICS.size())));
subset.add(AGGREGATE_METRIC_DOUBLE);
assertEquals(AGGREGATE_METRIC_DOUBLE, suggestedCast(subset));
}
// unsupported tests
{
assertNull(DataType.suggestedCast(Set.of()));
assertNull(DataType.suggestedCast(Set.of(UNSUPPORTED, DataType.values()[random().nextInt(DataType.values().length)])));
}
}
}

View File

@ -905,3 +905,80 @@ CASE:
- match: { values.2.0: 3 }
- match: { values.2.1: null }
- match: { values.2.2: "a" }
---
suggested_type:
- requires:
capabilities:
- method: POST
path: /_query
parameters: []
capabilities: [recommended_cast]
reason: "uses recommended_cast"
- do:
indices.create:
index: metrics_1
body:
mappings:
properties:
my_metric:
type: aggregate_metric_double
metrics: [ min, max, sum, value_count ]
default_metric: max
my_date:
type: date
my_double:
type: double
some_other_field:
type: geo_point
- do:
indices.create:
index: metrics_2
body:
mappings:
properties:
my_metric:
type: long
my_date:
type: date_nanos
my_double:
type: double
some_other_field:
type: ip
- do:
bulk:
refresh: true
body:
- { "index" : { "_index": "metrics_1" } }
- { "my_metric": { "min": 1.0, "max": 3.0, "sum": 10.1, "value_count": 5 }, "my_date": "2021-04-28T18:50:04.467Z", "my_double": 105.2, "some_other_field": "52.374081,4.912350" }
- { "index" : { "_index": "metrics_2" } }
- { "my_metric": 5, "my_date": "2021-04-28T19:34:00.000Z", "my_double": 843205.9, "some_other_field": 192.168.30.1 }
- do:
allowed_warnings_regex:
- "No limit defined, adding default limit of \\[.*\\]"
esql.query:
body:
query: 'FROM metrics_* | KEEP my_metric, my_date, my_double, some_other_field'
- match: { columns.0.name: "my_metric" }
- match: { columns.0.type: "unsupported" }
- match: { columns.0.original_types: ["aggregate_metric_double", "long"] }
- match: { columns.0.suggested_cast: "aggregate_metric_double" }
- match: { columns.1.name: "my_date" }
- match: { columns.1.type: "unsupported" }
- match: { columns.1.original_types: ["date_nanos", "datetime"] }
- match: { columns.1.suggested_cast: "date_nanos" }
- match: { columns.2.name: "my_double" }
- match: { columns.2.type: "double" }
- is_false: columns.2.original_types
- is_false: columns.2.suggested_cast
- match: { columns.3.name: "some_other_field" }
- match: { columns.3.type: "unsupported" }
- match: { columns.3.original_types: ["geo_point", "ip"] }
- match: { columns.3.suggested_cast: "keyword" }
- length: { values: 2 }