Add support to VALUES aggregation for spatial types (#122886)

The original work at https://github.com/elastic/elasticsearch/pull/106065 did not support geospatial types with this comment:

> I made this work for everything but geo_point and cartesian_point because I'm not 100% sure how to integrate with those. We can grab those in a follow up.

The geospatial types should be possible to collect using the VALUES aggregation with similar behavior to the `ST_COLLECT` OGC function, based on the Elasticsearch convention that treats multi-value geospatial fields as behaving similarly to any geometry collection. So this implementation is a trivial addition to the existing values types support.
This commit is contained in:
Craig Taverner 2025-02-25 11:38:51 +01:00 committed by GitHub
parent 12fcdd8633
commit ec82c24a87
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 189 additions and 36 deletions

View File

@ -0,0 +1,6 @@
pr: 122886
summary: Add support to VALUES aggregation for spatial types
area: ES|QL
type: bug
issues:
- 122413

View File

@ -16,6 +16,30 @@
"variadic" : false,
"returnType" : "boolean"
},
{
"params" : [
{
"name" : "field",
"type" : "cartesian_point",
"optional" : false,
"description" : ""
}
],
"variadic" : false,
"returnType" : "cartesian_point"
},
{
"params" : [
{
"name" : "field",
"type" : "cartesian_shape",
"optional" : false,
"description" : ""
}
],
"variadic" : false,
"returnType" : "cartesian_shape"
},
{
"params" : [
{
@ -52,6 +76,30 @@
"variadic" : false,
"returnType" : "double"
},
{
"params" : [
{
"name" : "field",
"type" : "geo_point",
"optional" : false,
"description" : ""
}
],
"variadic" : false,
"returnType" : "geo_point"
},
{
"params" : [
{
"name" : "field",
"type" : "geo_shape",
"optional" : false,
"description" : ""
}
],
"variadic" : false,
"returnType" : "geo_shape"
},
{
"params" : [
{

View File

@ -102,6 +102,8 @@ tasks.named("yamlRestCompatTestTransform").configure({ task ->
task.skipTest("esql/190_lookup_join/alias-pattern-multiple", "LOOKUP JOIN does not support index aliases for now")
task.skipTest("esql/190_lookup_join/alias-pattern-single", "LOOKUP JOIN does not support index aliases for now")
task.skipTest("esql/180_match_operator/match with disjunctions", "Disjunctions in full text functions work now")
task.skipTest("esql/130_spatial/values unsupported for geo_point", "Spatial types are now supported in VALUES aggregation")
task.skipTest("esql/130_spatial/values unsupported for geo_point status code", "Spatial types are now supported in VALUES aggregation")
// Expected deprecation warning to compat yaml tests:
task.addAllowedWarningRegex(".*rollup functionality will be removed in Elasticsearch.*")
task.skipTest("esql/40_tsdb/from doc with aggregate_metric_double", "TODO: support for subset of metric fields")

View File

@ -144,6 +144,70 @@ c:long | x:double | y:double
19 | null | null
;
values
required_capability: agg_values_spatial
FROM airports
| WHERE scalerank == 9
| STATS locations=VALUES(location)
| EVAL locations = MV_SORT(TO_STRING(locations))
;
locations:keyword
[POINT (101.446569298441 0.464600872998505), POINT (105.176060419161 -5.242566777132), POINT (112.711418617258 -7.92998002840567), POINT (126.810839481226 35.1400051390198), POINT (127.495916124681 36.7220227766673), POINT (128.637537699933 35.8999277969087), POINT (129.355731047528 35.5928957527107), POINT (145.243980298582 14.1717712971216), POINT (35.3018728575279 47.8732635579023), POINT (42.97109630194 14.7552534413725), POINT (48.7471065435931 31.3431585560757), POINT (60.900708564915 29.4752941956573), POINT (61.5122589740201 55.2977919496055), POINT (63.0279333519181 25.988794590011), POINT (66.9487311480949 30.249043186181), POINT (72.9878190922305 31.3627435480862), POINT (73.0320498392002 33.5614146278861), POINT (73.3163595376585 54.9576482934059), POINT (73.4084964764375 61.3401672194481), POINT (73.8105674924689 19.9660205672806), POINT (75.3958432922005 19.8672969621082), POINT (75.7584828456005 31.4329422397715), POINT (75.8092915005895 22.727749187571), POINT (75.9330597710755 17.625415183635), POINT (75.9570722403652 30.8503598561702), POINT (76.8017261105242 30.6707248949667), POINT (78.2172186546348 26.285487697937), POINT (78.7089578747476 10.7603571306554), POINT (79.452002687657 28.4218087161144), POINT (81.7317271462187 25.443522027821), POINT (82.6671524525865 55.0095847136264), POINT (83.5504532124038 53.3633850813046), POINT (85.3235970368767 23.3177245989962)]
;
valuesGrouped
required_capability: agg_values_spatial
FROM airports
| WHERE scalerank == 9
| EVAL first_letter = SUBSTRING(abbrev, 0, 1)
| STATS locations=VALUES(location) BY first_letter
| EVAL locations = MV_SORT(TO_STRING(locations))
| SORT first_letter
| KEEP first_letter, locations
;
first_letter:keyword | locations:keyword
A | POINT (48.7471065435931 31.3431585560757)
B | POINT (83.5504532124038 53.3633850813046)
C | [POINT (127.495916124681 36.7220227766673), POINT (61.5122589740201 55.2977919496055)]
G | POINT (78.2172186546348 26.285487697937)
H | POINT (42.97109630194 14.7552534413725)
I | [POINT (73.8105674924689 19.9660205672806), POINT (75.3958432922005 19.8672969621082), POINT (75.8092915005895 22.727749187571), POINT (76.8017261105242 30.6707248949667), POINT (81.7317271462187 25.443522027821), POINT (85.3235970368767 23.3177245989962)]
K | POINT (126.810839481226 35.1400051390198)
L | [POINT (72.9878190922305 31.3627435480862), POINT (75.9570722403652 30.8503598561702)]
M | POINT (112.711418617258 -7.92998002840567)
O | [POINT (35.3018728575279 47.8732635579023), POINT (73.0320498392002 33.5614146278861), POINT (73.3163595376585 54.9576482934059), POINT (82.6671524525865 55.0095847136264)]
P | POINT (101.446569298441 0.464600872998505)
R | POINT (145.243980298582 14.1717712971216)
S | [POINT (73.4084964764375 61.3401672194481), POINT (75.9330597710755 17.625415183635)]
T | [POINT (128.637537699933 35.8999277969087), POINT (63.0279333519181 25.988794590011), POINT (78.7089578747476 10.7603571306554)]
U | [POINT (129.355731047528 35.5928957527107), POINT (66.9487311480949 30.249043186181)]
V | [POINT (75.7584828456005 31.4329422397715), POINT (79.452002687657 28.4218087161144)]
W | POINT (105.176060419161 -5.242566777132)
Z | POINT (60.900708564915 29.4752941956573)
;
valuesGroupedByOrdinals
required_capability: agg_values_spatial
FROM airports
| WHERE scalerank == 9
| STATS locations=VALUES(location) BY type
| EVAL locations = MV_SORT(TO_STRING(locations))
| SORT type
| KEEP type, locations
;
type:keyword | locations:keyword
major | [POINT (127.495916124681 36.7220227766673), POINT (76.8017261105242 30.6707248949667)]
mid | [POINT (101.446569298441 0.464600872998505), POINT (105.176060419161 -5.242566777132), POINT (112.711418617258 -7.92998002840567), POINT (126.810839481226 35.1400051390198), POINT (128.637537699933 35.8999277969087), POINT (129.355731047528 35.5928957527107), POINT (145.243980298582 14.1717712971216), POINT (35.3018728575279 47.8732635579023), POINT (42.97109630194 14.7552534413725), POINT (48.7471065435931 31.3431585560757), POINT (60.900708564915 29.4752941956573), POINT (61.5122589740201 55.2977919496055), POINT (63.0279333519181 25.988794590011), POINT (66.9487311480949 30.249043186181), POINT (72.9878190922305 31.3627435480862), POINT (73.3163595376585 54.9576482934059), POINT (73.4084964764375 61.3401672194481), POINT (73.8105674924689 19.9660205672806), POINT (75.3958432922005 19.8672969621082), POINT (75.7584828456005 31.4329422397715), POINT (75.8092915005895 22.727749187571), POINT (75.9330597710755 17.625415183635), POINT (78.2172186546348 26.285487697937), POINT (78.7089578747476 10.7603571306554), POINT (82.6671524525865 55.0095847136264), POINT (83.5504532124038 53.3633850813046), POINT (85.3235970368767 23.3177245989962)]
military | [POINT (112.711418617258 -7.92998002840567), POINT (126.810839481226 35.1400051390198), POINT (35.3018728575279 47.8732635579023), POINT (72.9878190922305 31.3627435480862), POINT (75.7584828456005 31.4329422397715), POINT (76.8017261105242 30.6707248949667), POINT (78.2172186546348 26.285487697937), POINT (79.452002687657 28.4218087161144), POINT (81.7317271462187 25.443522027821)]
small | [POINT (73.0320498392002 33.5614146278861), POINT (75.9570722403652 30.8503598561702)]
;
###############################################
# Tests for ST_CENTROID_AGG on GEO_POINT type

View File

@ -81,6 +81,11 @@ public class EsqlCapabilities {
*/
AGG_VALUES,
/**
* Expand the {@code VALUES} agg to cover spatial types.
*/
AGG_VALUES_SPATIAL,
/**
* Does ESQL support async queries.
*/

View File

@ -63,12 +63,6 @@ public class EsqlTypeResolutions {
GEO_SHAPE.typeName(),
CARTESIAN_SHAPE.typeName() };
private static final String[] POINT_TYPE_NAMES = new String[] { GEO_POINT.typeName(), CARTESIAN_POINT.typeName() };
private static final String[] NON_SPATIAL_TYPE_NAMES = DataType.types()
.stream()
.filter(DataType::isRepresentable)
.filter(t -> DataType.isSpatial(t) == false)
.map(DataType::esType)
.toArray(String[]::new);
public static Expression.TypeResolution isSpatialPoint(Expression e, String operationName, TypeResolutions.ParamOrdinal paramOrd) {
return isType(e, DataType::isSpatialPoint, operationName, paramOrd, POINT_TYPE_NAMES);
@ -77,9 +71,4 @@ public class EsqlTypeResolutions {
public static Expression.TypeResolution isSpatial(Expression e, String operationName, TypeResolutions.ParamOrdinal paramOrd) {
return isType(e, DataType::isSpatial, operationName, paramOrd, SPATIAL_TYPE_NAMES);
}
public static Expression.TypeResolution isNotSpatial(Expression e, String operationName, TypeResolutions.ParamOrdinal paramOrd) {
return isType(e, t -> DataType.isSpatial(t) == false, operationName, paramOrd, NON_SPATIAL_TYPE_NAMES);
}
}

View File

@ -50,11 +50,28 @@ public class Values extends AggregateFunction implements ToAggregator {
Map.entry(DataType.SEMANTIC_TEXT, ValuesBytesRefAggregatorFunctionSupplier::new),
Map.entry(DataType.IP, ValuesBytesRefAggregatorFunctionSupplier::new),
Map.entry(DataType.VERSION, ValuesBytesRefAggregatorFunctionSupplier::new),
Map.entry(DataType.GEO_POINT, ValuesBytesRefAggregatorFunctionSupplier::new),
Map.entry(DataType.CARTESIAN_POINT, ValuesBytesRefAggregatorFunctionSupplier::new),
Map.entry(DataType.GEO_SHAPE, ValuesBytesRefAggregatorFunctionSupplier::new),
Map.entry(DataType.CARTESIAN_SHAPE, ValuesBytesRefAggregatorFunctionSupplier::new),
Map.entry(DataType.BOOLEAN, ValuesBooleanAggregatorFunctionSupplier::new)
);
@FunctionInfo(
returnType = { "boolean", "date", "date_nanos", "double", "integer", "ip", "keyword", "long", "version" },
returnType = {
"boolean",
"cartesian_point",
"cartesian_shape",
"date",
"date_nanos",
"double",
"geo_point",
"geo_shape",
"integer",
"ip",
"keyword",
"long",
"version" },
preview = true,
description = "Returns all values in a group as a multivalued field. The order of the returned values isn't guaranteed. "
+ "If you need the values returned in order use <<esql-mv_sort>>.",
@ -74,7 +91,21 @@ public class Values extends AggregateFunction implements ToAggregator {
Source source,
@Param(
name = "field",
type = { "boolean", "date", "date_nanos", "double", "integer", "ip", "keyword", "long", "text", "version" }
type = {
"boolean",
"cartesian_point",
"cartesian_shape",
"date",
"date_nanos",
"double",
"geo_point",
"geo_shape",
"integer",
"ip",
"keyword",
"long",
"text",
"version" }
) Expression v
) {
this(source, v, Literal.TRUE);
@ -115,13 +146,7 @@ public class Values extends AggregateFunction implements ToAggregator {
@Override
protected TypeResolution resolveType() {
return TypeResolutions.isType(
field(),
SUPPLIERS::containsKey,
sourceText(),
DEFAULT,
"any type except unsigned_long and spatial types"
);
return TypeResolutions.isType(field(), SUPPLIERS::containsKey, sourceText(), DEFAULT, "any type except unsigned_long");
}
@Override

View File

@ -858,7 +858,9 @@ public abstract class AbstractFunctionTestCase extends ESTestCase {
@AfterClass
public static void renderSignature() throws IOException {
if (System.getProperty("generateDocs") == null) {
// Temporarily turn off docs generation during docs freeze
// TODO: Only turn this back on once this generates the correct MD files
if (System.getProperty("generateDocs") == null || true) {
return;
}
String name = functionName();
@ -933,7 +935,9 @@ public abstract class AbstractFunctionTestCase extends ESTestCase {
}
protected static void renderDocs(String name) throws IOException {
if (System.getProperty("generateDocs") == null) {
// Temporarily turn off docs generation during docs freeze
// TODO: Only turn this back on once this generates the correct MD files
if (System.getProperty("generateDocs") == null || true) {
return;
}
if (binaryOperator(name) != null || unaryOperator(name) != null || searchOperator(name) != null || likeOrInOperator(name)) {

View File

@ -32,6 +32,6 @@ public class ValuesErrorTests extends ErrorsForCasesWithoutExamplesTestCase {
@Override
protected Matcher<String> expectedTypeErrorMatcher(List<Set<DataType>> validPerPosition, List<DataType> signature) {
return equalTo(typeErrorMessage(false, validPerPosition, signature, (v, p) -> "any type except unsigned_long and spatial types"));
return equalTo(typeErrorMessage(false, validPerPosition, signature, (v, p) -> "any type except unsigned_long"));
}
}

View File

@ -52,7 +52,12 @@ public class ValuesTests extends AbstractAggregationTestCase {
// Lower values for strings, as they take more space and may trigger the circuit breaker
MultiRowTestCaseSupplier.stringCases(1, 20, DataType.KEYWORD),
MultiRowTestCaseSupplier.stringCases(1, 20, DataType.TEXT),
MultiRowTestCaseSupplier.stringCases(1, 20, DataType.SEMANTIC_TEXT)
MultiRowTestCaseSupplier.stringCases(1, 20, DataType.SEMANTIC_TEXT),
// For spatial types, we can have many rows for points, but reduce rows for shapes to avoid circuit breaker
MultiRowTestCaseSupplier.geoPointCases(1, 1000, MultiRowTestCaseSupplier.IncludingAltitude.NO),
MultiRowTestCaseSupplier.cartesianPointCases(1, 1000, MultiRowTestCaseSupplier.IncludingAltitude.NO),
MultiRowTestCaseSupplier.geoShapeCasesWithoutCircle(1, 100, MultiRowTestCaseSupplier.IncludingAltitude.NO),
MultiRowTestCaseSupplier.cartesianShapeCasesWithoutCircle(1, 100, MultiRowTestCaseSupplier.IncludingAltitude.NO)
).flatMap(List::stream).map(ValuesTests::makeSupplier).collect(Collectors.toCollection(() -> suppliers));
return parameterSuppliersFromTypedDataWithDefaultChecksNoErrors(suppliers, false);

View File

@ -3,7 +3,7 @@ setup:
- requires:
cluster_features: ["gte_v8.14.0"]
reason: "Mixed cluster tests don't work with the changed error message from sort"
test_runner_features: allowed_warnings_regex
test_runner_features: [ capabilities, allowed_warnings_regex ]
- do:
indices.create:
@ -148,20 +148,25 @@ geo_point unsortable with limit from row:
query: 'ROW wkt = ["POINT(42.9711 -14.7553)", "POINT(75.8093 22.7277)"] | MV_EXPAND wkt | EVAL pt = TO_GEOPOINT(wkt) | limit 5 | sort pt'
---
values unsupported for geo_point:
values supported for geo_point:
- requires:
capabilities:
- method: POST
path: /_query
parameters: [ method, path, parameters, capabilities ]
capabilities: [ agg_values_spatial ]
reason: "Spatial types added to values aggregation in 8.19.0"
- do:
catch: '/.+argument of \[VALUES\(location\)\] must be .+/'
allowed_warnings_regex:
- "No limit defined, adding default limit of \\[.*\\]"
esql.query:
body:
query: 'FROM geo_points | STATS VALUES(location)'
---
values unsupported for geo_point status code:
- do:
catch: bad_request
esql.query:
body:
query: 'FROM geo_points | STATS VALUES(location)'
query: 'FROM geo_points | STATS locations = VALUES(location) | EVAL locations = MV_SORT(TO_STRING(locations))'
- length: { columns: 1 }
- match: { columns.0.name: locations }
- match: { columns.0.type: keyword }
- length: { values: 1 }
- match: { values.0.0: ["POINT (-1.0 1.0)", "POINT (1.0 -1.0)"] }
---
cartesian_point: