ESQL: Avoid unintended attribute removal (#127563)
--------- Co-authored-by: Andrei Stefan <astefan@users.noreply.github.com>
This commit is contained in:
parent
288f47eef6
commit
88b61e3621
|
@ -0,0 +1,6 @@
|
|||
pr: 127563
|
||||
summary: "ESQL: Avoid unintended attribute removal"
|
||||
area: ES|QL
|
||||
type: bug
|
||||
issues:
|
||||
- 127468
|
|
@ -51,7 +51,6 @@ public abstract class GenerativeRestTest extends ESRestTestCase {
|
|||
"Unknown column \\[<all-fields-projected>\\]", // https://github.com/elastic/elasticsearch/issues/121741,
|
||||
"Plan \\[ProjectExec\\[\\[<no-fields>.* optimized incorrectly due to missing references", // https://github.com/elastic/elasticsearch/issues/125866
|
||||
"optimized incorrectly due to missing references", // https://github.com/elastic/elasticsearch/issues/116781
|
||||
"only supports KEYWORD or TEXT values", // https://github.com/elastic/elasticsearch/issues/127468
|
||||
"The incoming YAML document exceeds the limit:" // still to investigate, but it seems to be specific to the test framework
|
||||
);
|
||||
|
||||
|
|
|
@ -331,3 +331,43 @@ ROW a="b c d x"| DISSECT a "%{b} %{} %{d} %{}";
|
|||
a:keyword | b:keyword | d:keyword
|
||||
b c d x | b | d
|
||||
;
|
||||
|
||||
avoidAttributesRemoval
|
||||
// https://github.com/elastic/elasticsearch/issues/127468
|
||||
required_capability: keep_regex_extract_attributes
|
||||
required_capability: join_lookup_v12
|
||||
from message_types
|
||||
| eval type = 1
|
||||
| lookup join message_types_lookup on message
|
||||
| drop message
|
||||
| dissect type "%{b}"
|
||||
| stats x = max(b)
|
||||
| keep x
|
||||
;
|
||||
|
||||
x:keyword
|
||||
Success
|
||||
;
|
||||
|
||||
avoidAttributesRemoval2
|
||||
// https://github.com/elastic/elasticsearch/issues/127468
|
||||
required_capability: keep_regex_extract_attributes
|
||||
required_capability: join_lookup_v12
|
||||
FROM sample_data, employees
|
||||
| EVAL client_ip = client_ip::keyword
|
||||
| RENAME languages AS language_code
|
||||
| LOOKUP JOIN clientips_lookup ON client_ip
|
||||
| EVAL type = 1::keyword
|
||||
| EVAL type = 2
|
||||
| LOOKUP JOIN message_types_lookup ON message
|
||||
| LOOKUP JOIN languages_lookup ON language_code
|
||||
| DISSECT type "%{type_as_text}"
|
||||
| KEEP message
|
||||
| WHERE message IS NOT NULL
|
||||
| SORT message DESC
|
||||
| LIMIT 1
|
||||
;
|
||||
|
||||
message:keyword
|
||||
Disconnected
|
||||
;
|
||||
|
|
|
@ -297,3 +297,35 @@ row text = "123 abc", int = 5 | sort int asc | grok text "%{NUMBER:text:int} %{W
|
|||
text:integer | int:integer | description:keyword
|
||||
123 | 5 | abc
|
||||
;
|
||||
|
||||
avoidAttributesRemoval
|
||||
// https://github.com/elastic/elasticsearch/issues/127468
|
||||
required_capability: union_types
|
||||
required_capability: join_lookup_v12
|
||||
required_capability: keep_regex_extract_attributes
|
||||
from multivalue_points,h*,messa*
|
||||
| eval `card` = true, PbehoQUqKSF = "VLGjhcgNkQiEVyCLo", DsxMWtGL = true, qSxTIvUorMim = true, `location` = 8593178066470220111, type = -446161601, FSkGQkgmS = false
|
||||
| eval PbehoQUqKSF = 753987034, HLNMQfQj = true, `within` = true, `id` = "JDKKkYwhhh", lk = null, aecuvjTkgZza = 510616700, aDAMpuVtNX = null, qCopgNZPt = "AjhJUtZefqKdJYH", BxHHlFoA = "isBrmhKLc"
|
||||
| rename message as message
|
||||
| lookup join message_types_lookup on message
|
||||
| sort PbehoQUqKSF DESC, ip1 DESC NULLS LAST
|
||||
| limit 5845
|
||||
| drop `subset`, ip*, `card`, `within`, host.v*, description, `aecuvjTkgZza`, host.version, `ip0`, height_range, DsxMWtGL, host_group, `aDAMpuVtNX`, PbehoQUqKSF, `intersects`, `host.os`, aDAMpuVtNX, *ight_range, HLNMQfQj, `FSkGQkgmS`, BxHHlFoA, card
|
||||
| grok type "%{WORD:GknCxQFo}"
|
||||
| eval `location` = null, ZjWUUvGusyyz = null, HeeKIpzgh = false, `id` = 4325287503714500302, host = false, `lk` = null, HvTQdOqFajpH = false, fKNlsYoT = true, `location` = -1158449473, `qCopgNZPt` = 1219986202615280617
|
||||
| drop HeeKIpzg*, `ZjWUUvGusyyz`, `message`, `type`, `lk`
|
||||
| grok GknCxQFo "%{WORD:location} %{WORD:HvTQdOqFajpH}"
|
||||
| drop HvTQdOqFajpH, `location`, centroid
|
||||
| mv_expand GknCxQFo
|
||||
| limit 410
|
||||
| limit 3815
|
||||
| rename `id` AS `GknCxQFo`
|
||||
| grok host.name "%{WORD:oGQQZHxQHj} %{WORD:qCopgNZPt} %{WORD:vHKOmmocPcTO}"
|
||||
| stats BkQXJRMeAM = min(GknCxQFo)
|
||||
| keep `BkQXJRMeAM`
|
||||
;
|
||||
|
||||
BkQXJRMeAM:long
|
||||
4325287503714500302
|
||||
;
|
||||
|
||||
|
|
|
@ -1091,7 +1091,13 @@ public class EsqlCapabilities {
|
|||
* During resolution (pre-analysis) we have to consider that joins can override regex extracted values
|
||||
* see <a href="https://github.com/elastic/elasticsearch/issues/127467"> ES|QL: pruning of JOINs leads to missing fields #127467 </a>
|
||||
*/
|
||||
FIX_JOIN_MASKING_REGEX_EXTRACT;
|
||||
FIX_JOIN_MASKING_REGEX_EXTRACT,
|
||||
|
||||
/**
|
||||
* Avid GROK and DISSECT attributes being removed when resolving fields.
|
||||
* see <a href="https://github.com/elastic/elasticsearch/issues/127468"> ES|QL: Grok only supports KEYWORD or TEXT values, found expression [type] type [INTEGER] #127468 </a>
|
||||
*/
|
||||
KEEP_REGEX_EXTRACT_ATTRIBUTES;
|
||||
|
||||
private final boolean enabled;
|
||||
|
||||
|
|
|
@ -656,7 +656,7 @@ public class EsqlSession {
|
|||
//
|
||||
// and ips_policy enriches the results with the same name ip field),
|
||||
// these aliases should be kept in the list of fields.
|
||||
if (canRemoveAliases[0] && couldOverrideAliases(p)) {
|
||||
if (canRemoveAliases[0] && p.anyMatch(EsqlSession::couldOverrideAliases)) {
|
||||
canRemoveAliases[0] = false;
|
||||
}
|
||||
if (canRemoveAliases[0]) {
|
||||
|
@ -726,7 +726,8 @@ public class EsqlSession {
|
|||
|| p instanceof Project
|
||||
|| p instanceof RegexExtract
|
||||
|| p instanceof Rename
|
||||
|| p instanceof TopN) == false;
|
||||
|| p instanceof TopN
|
||||
|| p instanceof UnresolvedRelation) == false;
|
||||
}
|
||||
|
||||
private static boolean matchByName(Attribute attr, String other, boolean skipIfPattern) {
|
||||
|
|
|
@ -604,7 +604,20 @@ public class IndexResolverFieldNamesTests extends ESTestCase {
|
|||
| eval language = concat(x, "-", lang)
|
||||
| keep emp_no, x, lang, language
|
||||
| sort emp_no desc | limit 3""",
|
||||
Set.of("languages", "languages.*", "emp_no", "emp_no.*", "language_name", "language_name.*", "x", "x.*", "lang", "lang.*")
|
||||
Set.of(
|
||||
"emp_no",
|
||||
"x",
|
||||
"lang",
|
||||
"language",
|
||||
"language_name",
|
||||
"languages",
|
||||
"x.*",
|
||||
"language_name.*",
|
||||
"languages.*",
|
||||
"emp_no.*",
|
||||
"lang.*",
|
||||
"language.*"
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -1355,7 +1368,7 @@ public class IndexResolverFieldNamesTests extends ESTestCase {
|
|||
| grok type "%{WORD:b}"
|
||||
| stats x = max(b)
|
||||
| keep x""", Set.of());
|
||||
assertThat(fieldNames, equalTo(Set.of("message", "x", "x.*", "message.*")));
|
||||
assertThat(fieldNames, equalTo(Set.of("x", "b", "type", "message", "x.*", "message.*", "type.*", "b.*")));
|
||||
}
|
||||
|
||||
public void testAvoidGrokAttributesRemoval2() {
|
||||
|
@ -1388,6 +1401,60 @@ public class IndexResolverFieldNamesTests extends ESTestCase {
|
|||
|
||||
}
|
||||
|
||||
/**
|
||||
* @see <a href="https://github.com/elastic/elasticsearch/issues/127468">ES|QL: Grok only supports KEYWORD or TEXT values, found expression [type] type [INTEGER]</a>
|
||||
*/
|
||||
public void testAvoidGrokAttributesRemoval4() {
|
||||
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
|
||||
Set<String> fieldNames = fieldNames("""
|
||||
from message_types
|
||||
| eval type = 1
|
||||
| lookup join message_types_lookup on message
|
||||
| drop message
|
||||
| grok type "%{WORD:b}"
|
||||
| stats x = max(b)
|
||||
| keep x""", Set.of());
|
||||
assertThat(fieldNames, equalTo(Set.of("x", "b", "type", "message", "x.*", "message.*", "type.*", "b.*")));
|
||||
}
|
||||
|
||||
/**
|
||||
* @see <a href="https://github.com/elastic/elasticsearch/issues/127468">ES|QL: Grok only supports KEYWORD or TEXT values, found expression [type] type [INTEGER]</a>
|
||||
*/
|
||||
public void testAvoidGrokAttributesRemoval5() {
|
||||
assumeTrue("LOOKUP JOIN available as snapshot only", EsqlCapabilities.Cap.JOIN_LOOKUP_V12.isEnabled());
|
||||
Set<String> fieldNames = fieldNames("""
|
||||
FROM sample_data, employees
|
||||
| EVAL client_ip = client_ip::keyword
|
||||
| RENAME languages AS language_code
|
||||
| LOOKUP JOIN clientips_lookup ON client_ip
|
||||
| EVAL type = 1::keyword
|
||||
| EVAL type = 2
|
||||
| LOOKUP JOIN message_types_lookup ON message
|
||||
| LOOKUP JOIN languages_lookup ON language_code
|
||||
| DISSECT type "%{type_as_text}"
|
||||
| KEEP message
|
||||
| WHERE message IS NOT NULL
|
||||
| SORT message DESC
|
||||
| LIMIT 1""", Set.of());
|
||||
assertThat(
|
||||
fieldNames,
|
||||
equalTo(
|
||||
Set.of(
|
||||
"message",
|
||||
"type",
|
||||
"languages",
|
||||
"client_ip",
|
||||
"language_code",
|
||||
"language_code.*",
|
||||
"client_ip.*",
|
||||
"message.*",
|
||||
"type.*",
|
||||
"languages.*"
|
||||
)
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
public void testEnrichOnDefaultField() {
|
||||
Set<String> fieldNames = fieldNames("""
|
||||
from employees
|
||||
|
|
Loading…
Reference in New Issue