[ES|QL] Allow lookup join on mixed numeric fields (#128263)
* allow lookup join on mixed numeric fields
This commit is contained in:
parent
844ee68f0f
commit
dfe1357e26
|
@ -0,0 +1,5 @@
|
|||
pr: 128263
|
||||
summary: Allow lookup join on mixed numeric fields
|
||||
area: ES|QL
|
||||
type: enhancement
|
||||
issues: []
|
|
@ -71,6 +71,9 @@ public class CsvTestsDataLoader {
|
|||
"mapping-languages_nested_fields.json",
|
||||
"languages_nested_fields.csv"
|
||||
).withSetting("lookup-settings.json");
|
||||
private static final TestDataset LANGUAGES_MIX_NUMERICS = new TestDataset("languages_mixed_numerics").withSetting(
|
||||
"lookup-settings.json"
|
||||
);
|
||||
private static final TestDataset ALERTS = new TestDataset("alerts");
|
||||
private static final TestDataset UL_LOGS = new TestDataset("ul_logs");
|
||||
private static final TestDataset SAMPLE_DATA = new TestDataset("sample_data");
|
||||
|
@ -151,6 +154,7 @@ public class CsvTestsDataLoader {
|
|||
Map.entry(LANGUAGES_LOOKUP.indexName, LANGUAGES_LOOKUP),
|
||||
Map.entry(LANGUAGES_LOOKUP_NON_UNIQUE_KEY.indexName, LANGUAGES_LOOKUP_NON_UNIQUE_KEY),
|
||||
Map.entry(LANGUAGES_NESTED_FIELDS.indexName, LANGUAGES_NESTED_FIELDS),
|
||||
Map.entry(LANGUAGES_MIX_NUMERICS.indexName, LANGUAGES_MIX_NUMERICS),
|
||||
Map.entry(UL_LOGS.indexName, UL_LOGS),
|
||||
Map.entry(SAMPLE_DATA.indexName, SAMPLE_DATA),
|
||||
Map.entry(SAMPLE_DATA_PARTIAL_MAPPING.indexName, SAMPLE_DATA_PARTIAL_MAPPING),
|
||||
|
|
|
@ -0,0 +1,30 @@
|
|||
language_code_byte:byte,language_code_short:short,language_code_integer:integer,language_code_long:long,language_code_half_float:half_float,language_code_scaled_float:scaled_float,language_code_float:float,language_code_double:double,language_name:keyword
|
||||
1,1,1,1,1.0,1.0,1.0,1.0,English
|
||||
2,2,2,2,2.0,2.0,2.0,2.0,French
|
||||
3,3,3,3,3.0,3.0,3.0,3.0,Spanish
|
||||
4,4,4,4,4.0,4.0,4.0,4.0,German
|
||||
-128,-128,-128,-128,-128.0,-128.0,-128.0,-128.0,min_byte
|
||||
,-129,-129,-129,-129.0,-129.0,-129.0,-129.0,min_byte_minus_1
|
||||
127,127,127,127,127.0,127.0,127.0,127.0,max_byte
|
||||
,128,128,128,128.0,128.0,128.0,128.0,max_byte_plus_1
|
||||
,-32768,-32768,-32768,-32768.0,-32768.0,-32768.0,-32768.0,min_short
|
||||
,,-32769,-32769,-32769.0,-32769.0,-32769.0,-32769.0,min_short_minus_1
|
||||
,32767,32767,32767,32767.0,32767.0,32767.0,32767.0,max_short
|
||||
,,32768,32768,32768.0,32768.0,32768.0,32768.0,max_short_plus_1
|
||||
,,-2147483648,-2147483648,,-2147483648.0,-2147483648.0,-2147483648.0,min_int
|
||||
,,,-2147483649,,-2147483649.0,-2147483649.0,-2147483649.0,min_int_minus_1
|
||||
,,2147483646,2147483646,,2147483646.0,2147483646.0,2147483646.0,max_int_minus_1
|
||||
,,2147483647,2147483647,,2147483647.0,2147483647.0,2147483647.0,max_int
|
||||
,,,2147483648,,2147483648.0,2147483648.0,2147483648.0,max_int_plus_1
|
||||
,,,-9223372036854775808,,-9223372036854775808.0,-9223372036854775808.0,-9223372036854775808.0,min_long
|
||||
,,,,,-9223372036854775809.0,-9223372036854775809.0,-9223372036854775809.0,min_long_minus_1
|
||||
,,,9223372036854775806,,9223372036854775806.0,9223372036854775806.0,9223372036854775806.0,max_long_minus_1
|
||||
,,,9223372036854775807,,9223372036854775807.0,9223372036854775807.0,9223372036854775807.0,max_long
|
||||
,,,,,9223372036854775808.0,9223372036854775808.0,9223372036854775808.0,max_long_plus_1
|
||||
,,65504,65504,65504.0,65504.0,65504.0,65504.0,max_half_float
|
||||
,,65505,65505,65505.0,65505.0,65505.0,65505.0,max_half_float_plus_1
|
||||
,,-65504,-65504,-65504.0,-65504.0,-65504.0,-65504.0,min_half_float
|
||||
,,-65505,-65505,-65505.0,-65505.0,-65505.0,-65505.0,min_half_float_minus_1
|
||||
,,,,,,3.40282346638528860e+38,3.40282346638528860e+38,max_float
|
||||
,,,,,,-3.40282346638528860e+38,-3.40282346638528860e+38,min_float
|
||||
,,,,,,,3.40282346638528860e+39,double
|
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,32 @@
|
|||
{
|
||||
"properties" : {
|
||||
"language_code_byte" : {
|
||||
"type" : "byte"
|
||||
},
|
||||
"language_code_short" : {
|
||||
"type" : "short"
|
||||
},
|
||||
"language_code_integer" : {
|
||||
"type" : "integer"
|
||||
},
|
||||
"language_code_long" : {
|
||||
"type" : "long"
|
||||
},
|
||||
"language_code_half_float" : {
|
||||
"type" : "half_float"
|
||||
},
|
||||
"language_code_scaled_float" : {
|
||||
"type" : "scaled_float",
|
||||
"scaling_factor" : 100
|
||||
},
|
||||
"language_code_float" : {
|
||||
"type" : "float"
|
||||
},
|
||||
"language_code_double" : {
|
||||
"type" : "double"
|
||||
},
|
||||
"language_name" : {
|
||||
"type" : "keyword"
|
||||
}
|
||||
}
|
||||
}
|
|
@ -138,16 +138,13 @@ public class LookupJoinTypesIT extends ESIntegTestCase {
|
|||
}
|
||||
|
||||
// Test integer types
|
||||
var integerTypes = List.of(BYTE, SHORT, INTEGER);
|
||||
var integerTypes = List.of(BYTE, SHORT, INTEGER, LONG);
|
||||
{
|
||||
TestConfigs configs = testConfigurations.computeIfAbsent("integers", TestConfigs::new);
|
||||
for (DataType mainType : integerTypes) {
|
||||
for (DataType lookupType : integerTypes) {
|
||||
configs.addPasses(mainType, lookupType);
|
||||
}
|
||||
// Long is currently treated differently in the validation, but we could consider changing that
|
||||
configs.addFails(mainType, LONG);
|
||||
configs.addFails(LONG, mainType);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -167,9 +164,8 @@ public class LookupJoinTypesIT extends ESIntegTestCase {
|
|||
TestConfigs configs = testConfigurations.computeIfAbsent("mixed-numerical", TestConfigs::new);
|
||||
for (DataType mainType : integerTypes) {
|
||||
for (DataType lookupType : floatTypes) {
|
||||
// TODO: We should probably allow this, but we need to change the validation code in Join.java
|
||||
configs.addFails(mainType, lookupType);
|
||||
configs.addFails(lookupType, mainType);
|
||||
configs.addPasses(mainType, lookupType);
|
||||
configs.addPasses(lookupType, mainType);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1112,7 +1112,12 @@ public class EsqlCapabilities {
|
|||
/**
|
||||
* The {@code ROUND_TO} function.
|
||||
*/
|
||||
ROUND_TO;
|
||||
ROUND_TO,
|
||||
|
||||
/**
|
||||
* Allow lookup join on mixed numeric fields, among byte, short, int, long, half_float, scaled_float, float and double.
|
||||
*/
|
||||
LOOKUP_JOIN_ON_MIXED_NUMERIC_FIELDS;
|
||||
|
||||
private final boolean enabled;
|
||||
|
||||
|
|
|
@ -54,6 +54,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.UNSUPPORTED;
|
|||
import static org.elasticsearch.xpack.esql.core.type.DataType.VERSION;
|
||||
import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputAttributes;
|
||||
import static org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes.LEFT;
|
||||
import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.commonType;
|
||||
|
||||
public class Join extends BinaryPlan implements PostAnalysisVerificationAware, SortAgnostic {
|
||||
public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(LogicalPlan.class, "Join", Join::new);
|
||||
|
@ -283,8 +284,12 @@ public class Join extends BinaryPlan implements PostAnalysisVerificationAware, S
|
|||
}
|
||||
|
||||
private static boolean comparableTypes(Attribute left, Attribute right) {
|
||||
// TODO: Consider allowing more valid types
|
||||
// return left.dataType().noText() == right.dataType().noText() || left.dataType().isNumeric() == right.dataType().isNumeric();
|
||||
return left.dataType().noText() == right.dataType().noText();
|
||||
DataType leftType = left.dataType();
|
||||
DataType rightType = right.dataType();
|
||||
if (leftType.isNumeric() && rightType.isNumeric()) {
|
||||
// Allow byte, short, integer, long, half_float, scaled_float, float and double to join against each other
|
||||
return commonType(leftType, rightType) != null;
|
||||
}
|
||||
return leftType.noText() == rightType.noText();
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue