[ES|QL] Allow lookup join on mixed numeric fields (#128263)

* allow lookup join on mixed numeric fields
This commit is contained in:
Fang Xing 2025-05-25 14:56:07 -04:00 committed by GitHub
parent 844ee68f0f
commit dfe1357e26
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 2947 additions and 51 deletions

View File

@ -0,0 +1,5 @@
pr: 128263
summary: Allow lookup join on mixed numeric fields
area: ES|QL
type: enhancement
issues: []

View File

@ -71,6 +71,9 @@ public class CsvTestsDataLoader {
"mapping-languages_nested_fields.json",
"languages_nested_fields.csv"
).withSetting("lookup-settings.json");
private static final TestDataset LANGUAGES_MIX_NUMERICS = new TestDataset("languages_mixed_numerics").withSetting(
"lookup-settings.json"
);
private static final TestDataset ALERTS = new TestDataset("alerts");
private static final TestDataset UL_LOGS = new TestDataset("ul_logs");
private static final TestDataset SAMPLE_DATA = new TestDataset("sample_data");
@ -151,6 +154,7 @@ public class CsvTestsDataLoader {
Map.entry(LANGUAGES_LOOKUP.indexName, LANGUAGES_LOOKUP),
Map.entry(LANGUAGES_LOOKUP_NON_UNIQUE_KEY.indexName, LANGUAGES_LOOKUP_NON_UNIQUE_KEY),
Map.entry(LANGUAGES_NESTED_FIELDS.indexName, LANGUAGES_NESTED_FIELDS),
Map.entry(LANGUAGES_MIX_NUMERICS.indexName, LANGUAGES_MIX_NUMERICS),
Map.entry(UL_LOGS.indexName, UL_LOGS),
Map.entry(SAMPLE_DATA.indexName, SAMPLE_DATA),
Map.entry(SAMPLE_DATA_PARTIAL_MAPPING.indexName, SAMPLE_DATA_PARTIAL_MAPPING),

View File

@ -0,0 +1,30 @@
language_code_byte:byte,language_code_short:short,language_code_integer:integer,language_code_long:long,language_code_half_float:half_float,language_code_scaled_float:scaled_float,language_code_float:float,language_code_double:double,language_name:keyword
1,1,1,1,1.0,1.0,1.0,1.0,English
2,2,2,2,2.0,2.0,2.0,2.0,French
3,3,3,3,3.0,3.0,3.0,3.0,Spanish
4,4,4,4,4.0,4.0,4.0,4.0,German
-128,-128,-128,-128,-128.0,-128.0,-128.0,-128.0,min_byte
,-129,-129,-129,-129.0,-129.0,-129.0,-129.0,min_byte_minus_1
127,127,127,127,127.0,127.0,127.0,127.0,max_byte
,128,128,128,128.0,128.0,128.0,128.0,max_byte_plus_1
,-32768,-32768,-32768,-32768.0,-32768.0,-32768.0,-32768.0,min_short
,,-32769,-32769,-32769.0,-32769.0,-32769.0,-32769.0,min_short_minus_1
,32767,32767,32767,32767.0,32767.0,32767.0,32767.0,max_short
,,32768,32768,32768.0,32768.0,32768.0,32768.0,max_short_plus_1
,,-2147483648,-2147483648,,-2147483648.0,-2147483648.0,-2147483648.0,min_int
,,,-2147483649,,-2147483649.0,-2147483649.0,-2147483649.0,min_int_minus_1
,,2147483646,2147483646,,2147483646.0,2147483646.0,2147483646.0,max_int_minus_1
,,2147483647,2147483647,,2147483647.0,2147483647.0,2147483647.0,max_int
,,,2147483648,,2147483648.0,2147483648.0,2147483648.0,max_int_plus_1
,,,-9223372036854775808,,-9223372036854775808.0,-9223372036854775808.0,-9223372036854775808.0,min_long
,,,,,-9223372036854775809.0,-9223372036854775809.0,-9223372036854775809.0,min_long_minus_1
,,,9223372036854775806,,9223372036854775806.0,9223372036854775806.0,9223372036854775806.0,max_long_minus_1
,,,9223372036854775807,,9223372036854775807.0,9223372036854775807.0,9223372036854775807.0,max_long
,,,,,9223372036854775808.0,9223372036854775808.0,9223372036854775808.0,max_long_plus_1
,,65504,65504,65504.0,65504.0,65504.0,65504.0,max_half_float
,,65505,65505,65505.0,65505.0,65505.0,65505.0,max_half_float_plus_1
,,-65504,-65504,-65504.0,-65504.0,-65504.0,-65504.0,min_half_float
,,-65505,-65505,-65505.0,-65505.0,-65505.0,-65505.0,min_half_float_minus_1
,,,,,,3.40282346638528860e+38,3.40282346638528860e+38,max_float
,,,,,,-3.40282346638528860e+38,-3.40282346638528860e+38,min_float
,,,,,,,3.40282346638528860e+39,double
1 language_code_byte:byte language_code_short:short language_code_integer:integer language_code_long:long language_code_half_float:half_float language_code_scaled_float:scaled_float language_code_float:float language_code_double:double language_name:keyword
2 1 1 1 1 1.0 1.0 1.0 1.0 English
3 2 2 2 2 2.0 2.0 2.0 2.0 French
4 3 3 3 3 3.0 3.0 3.0 3.0 Spanish
5 4 4 4 4 4.0 4.0 4.0 4.0 German
6 -128 -128 -128 -128 -128.0 -128.0 -128.0 -128.0 min_byte
7 -129 -129 -129 -129.0 -129.0 -129.0 -129.0 min_byte_minus_1
8 127 127 127 127 127.0 127.0 127.0 127.0 max_byte
9 128 128 128 128.0 128.0 128.0 128.0 max_byte_plus_1
10 -32768 -32768 -32768 -32768.0 -32768.0 -32768.0 -32768.0 min_short
11 -32769 -32769 -32769.0 -32769.0 -32769.0 -32769.0 min_short_minus_1
12 32767 32767 32767 32767.0 32767.0 32767.0 32767.0 max_short
13 32768 32768 32768.0 32768.0 32768.0 32768.0 max_short_plus_1
14 -2147483648 -2147483648 -2147483648.0 -2147483648.0 -2147483648.0 min_int
15 -2147483649 -2147483649.0 -2147483649.0 -2147483649.0 min_int_minus_1
16 2147483646 2147483646 2147483646.0 2147483646.0 2147483646.0 max_int_minus_1
17 2147483647 2147483647 2147483647.0 2147483647.0 2147483647.0 max_int
18 2147483648 2147483648.0 2147483648.0 2147483648.0 max_int_plus_1
19 -9223372036854775808 -9223372036854775808.0 -9223372036854775808.0 -9223372036854775808.0 min_long
20 -9223372036854775809.0 -9223372036854775809.0 -9223372036854775809.0 min_long_minus_1
21 9223372036854775806 9223372036854775806.0 9223372036854775806.0 9223372036854775806.0 max_long_minus_1
22 9223372036854775807 9223372036854775807.0 9223372036854775807.0 9223372036854775807.0 max_long
23 9223372036854775808.0 9223372036854775808.0 9223372036854775808.0 max_long_plus_1
24 65504 65504 65504.0 65504.0 65504.0 65504.0 max_half_float
25 65505 65505 65505.0 65505.0 65505.0 65505.0 max_half_float_plus_1
26 -65504 -65504 -65504.0 -65504.0 -65504.0 -65504.0 min_half_float
27 -65505 -65505 -65505.0 -65505.0 -65505.0 -65505.0 min_half_float_minus_1
28 3.40282346638528860e+38 3.40282346638528860e+38 max_float
29 -3.40282346638528860e+38 -3.40282346638528860e+38 min_float
30 3.40282346638528860e+39 double

View File

@ -0,0 +1,32 @@
{
"properties" : {
"language_code_byte" : {
"type" : "byte"
},
"language_code_short" : {
"type" : "short"
},
"language_code_integer" : {
"type" : "integer"
},
"language_code_long" : {
"type" : "long"
},
"language_code_half_float" : {
"type" : "half_float"
},
"language_code_scaled_float" : {
"type" : "scaled_float",
"scaling_factor" : 100
},
"language_code_float" : {
"type" : "float"
},
"language_code_double" : {
"type" : "double"
},
"language_name" : {
"type" : "keyword"
}
}
}

View File

@ -138,16 +138,13 @@ public class LookupJoinTypesIT extends ESIntegTestCase {
}
// Test integer types
var integerTypes = List.of(BYTE, SHORT, INTEGER);
var integerTypes = List.of(BYTE, SHORT, INTEGER, LONG);
{
TestConfigs configs = testConfigurations.computeIfAbsent("integers", TestConfigs::new);
for (DataType mainType : integerTypes) {
for (DataType lookupType : integerTypes) {
configs.addPasses(mainType, lookupType);
}
// Long is currently treated differently in the validation, but we could consider changing that
configs.addFails(mainType, LONG);
configs.addFails(LONG, mainType);
}
}
@ -167,9 +164,8 @@ public class LookupJoinTypesIT extends ESIntegTestCase {
TestConfigs configs = testConfigurations.computeIfAbsent("mixed-numerical", TestConfigs::new);
for (DataType mainType : integerTypes) {
for (DataType lookupType : floatTypes) {
// TODO: We should probably allow this, but we need to change the validation code in Join.java
configs.addFails(mainType, lookupType);
configs.addFails(lookupType, mainType);
configs.addPasses(mainType, lookupType);
configs.addPasses(lookupType, mainType);
}
}
}

View File

@ -1112,7 +1112,12 @@ public class EsqlCapabilities {
/**
* The {@code ROUND_TO} function.
*/
ROUND_TO;
ROUND_TO,
/**
* Allow lookup join on mixed numeric fields, among byte, short, int, long, half_float, scaled_float, float and double.
*/
LOOKUP_JOIN_ON_MIXED_NUMERIC_FIELDS;
private final boolean enabled;

View File

@ -54,6 +54,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.UNSUPPORTED;
import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION;
import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputAttributes;
import static org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes.LEFT;
import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.commonType;
public class Join extends BinaryPlan implements PostAnalysisVerificationAware, SortAgnostic {
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(LogicalPlan.class, "Join", Join::new);
@ -283,8 +284,12 @@ public class Join extends BinaryPlan implements PostAnalysisVerificationAware, S
}
private static boolean comparableTypes(Attribute left, Attribute right) {
// TODO: Consider allowing more valid types
// return left.dataType().noText() == right.dataType().noText() || left.dataType().isNumeric() == right.dataType().isNumeric();
return left.dataType().noText() == right.dataType().noText();
DataType leftType = left.dataType();
DataType rightType = right.dataType();
if (leftType.isNumeric() && rightType.isNumeric()) {
// Allow byte, short, integer, long, half_float, scaled_float, float and double to join against each other
return commonType(leftType, rightType) != null;
}
return leftType.noText() == rightType.noText();
}
}