ESQL: Workaround for RLike handling of empty lang pattern (#128895)
Lucene's `org.apache.lucene.util.automaton.Operations#getSingleton` fails with an Automaton for a `REGEXP_EMPTY` `RegExp`. This adds a workaround for that, to check the type of automaton before calling into that failing method. Closes #128813
This commit is contained in:
parent
e24fd32c35
commit
1a76bc2dc8
|
@ -0,0 +1,6 @@
|
|||
pr: 128895
|
||||
summary: Workaround for RLike handling of empty lang pattern
|
||||
area: ES|QL
|
||||
type: bug
|
||||
issues:
|
||||
- 128813
|
|
@ -32,7 +32,11 @@ public abstract class AbstractStringPattern implements StringPattern {
|
|||
|
||||
@Override
|
||||
public String exactMatch() {
|
||||
IntsRef singleton = Operations.getSingleton(automaton());
|
||||
Automaton a = automaton();
|
||||
if (a.getNumStates() == 0) { // workaround for https://github.com/elastic/elasticsearch/pull/128887
|
||||
return null; // Empty automaton has no matches
|
||||
}
|
||||
IntsRef singleton = Operations.getSingleton(a);
|
||||
return singleton != null ? UnicodeUtil.newString(singleton.ints, singleton.offset, singleton.length) : null;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,8 +33,12 @@ public class StringPatternTests extends ESTestCase {
|
|||
return rlike(pattern).matchesAll();
|
||||
}
|
||||
|
||||
private String exactMatchRLike(String pattern) {
|
||||
return rlike(pattern).exactMatch();
|
||||
}
|
||||
|
||||
private boolean rlikeExactMatch(String pattern) {
|
||||
return pattern.equals(rlike(pattern).exactMatch());
|
||||
return pattern.equals(exactMatchRLike(pattern));
|
||||
}
|
||||
|
||||
public void testWildcardMatchAll() {
|
||||
|
@ -86,4 +90,20 @@ public class StringPatternTests extends ESTestCase {
|
|||
assertTrue(rlikeExactMatch("abc"));
|
||||
assertTrue(rlikeExactMatch("12345"));
|
||||
}
|
||||
|
||||
public void testRegexExactMatchWithEmptyMatch() {
|
||||
// As soon as there's one no conditional `#` in the pattern, it'll match nothing
|
||||
assertNull(exactMatchRLike("#"));
|
||||
assertNull(exactMatchRLike("##"));
|
||||
assertNull(exactMatchRLike("#foo"));
|
||||
assertNull(exactMatchRLike("#foo#"));
|
||||
assertNull(exactMatchRLike("f#oo"));
|
||||
assertNull(exactMatchRLike("foo#"));
|
||||
assertNull(exactMatchRLike("#[A-Z]*"));
|
||||
assertNull(exactMatchRLike("foo(#)"));
|
||||
|
||||
assertNotNull(exactMatchRLike("foo#?"));
|
||||
assertNotNull(exactMatchRLike("#|foo"));
|
||||
assertNotNull(exactMatchRLike("foo|#"));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -66,8 +66,8 @@ public final class TestUtils {
|
|||
/** Returns the input string, but with parts of it having the letter casing changed. */
|
||||
public static String randomCasing(String input) {
|
||||
StringBuilder sb = new StringBuilder(input.length());
|
||||
for (int i = 0, inputLen = input.length(), step = (int) Math.sqrt(inputLen), chunkEnd; i < inputLen; i += step) {
|
||||
chunkEnd = Math.min(i + step, inputLen);
|
||||
for (int i = 0, inputLen = input.length(), step = (int) Math.sqrt(inputLen); i < inputLen; i += step) {
|
||||
var chunkEnd = Math.min(i + step, inputLen);
|
||||
var chunk = input.substring(i, chunkEnd);
|
||||
sb.append(randomBoolean() ? chunk.toLowerCase(Locale.ROOT) : chunk.toUpperCase(Locale.ROOT));
|
||||
}
|
||||
|
|
|
@ -1440,6 +1440,46 @@ public abstract class RestEsqlTestCase extends ESRestTestCase {
|
|||
assertThat(answer.get("values"), equalTo(List.of(List.of("_\"_$_(_)_+_._[_]_^_{_|_}___", "_#_&_<_>___"))));
|
||||
}
|
||||
|
||||
public void testRLikeHandlingOfEmptyLanguagePattern() throws IOException {
|
||||
createIndex(testIndexName(), Settings.EMPTY, """
|
||||
{
|
||||
"properties": {
|
||||
"field": {
|
||||
"type": "keyword"
|
||||
}
|
||||
}
|
||||
}
|
||||
""");
|
||||
for (var val : List.of("#", "foo#bar")) {
|
||||
Request doc = new Request("POST", testIndexName() + "/_doc?refresh=true");
|
||||
doc.setJsonEntity("""
|
||||
{
|
||||
"field": "%s"
|
||||
}
|
||||
""".formatted(val));
|
||||
client().performRequest(doc);
|
||||
}
|
||||
// pushed down, matches nothing
|
||||
var query = "FROM " + testIndexName() + " | WHERE TO_LOWER(field) RLIKE \"#\"";
|
||||
var answer = runEsql(requestObjectBuilder().query(query));
|
||||
assertThat(answer.get("values"), equalTo(List.of()));
|
||||
|
||||
// matches nothing
|
||||
query = "FROM " + testIndexName() + " | WHERE field RLIKE \"#\"";
|
||||
answer = runEsql(requestObjectBuilder().query(query));
|
||||
assertThat(answer.get("values"), equalTo(List.of()));
|
||||
|
||||
// matches one doc
|
||||
query = "FROM " + testIndexName() + " | WHERE field RLIKE \"\\\\#\"";
|
||||
answer = runEsql(requestObjectBuilder().query(query));
|
||||
assertThat(answer.get("values"), equalTo(List.of(List.of("#"))));
|
||||
|
||||
// matches both docs
|
||||
query = "FROM " + testIndexName() + " | WHERE field RLIKE \".*\\\\#.*\" | SORT field";
|
||||
answer = runEsql(requestObjectBuilder().query(query));
|
||||
assertThat(answer.get("values"), equalTo(List.of(List.of("#"), List.of("foo#bar"))));
|
||||
}
|
||||
|
||||
protected static Request prepareRequestWithOptions(RequestObjectBuilder requestObject, Mode mode) throws IOException {
|
||||
requestObject.build();
|
||||
Request request = prepareRequest(mode);
|
||||
|
|
|
@ -423,3 +423,13 @@ emp_no:integer |first_name:keyword
|
|||
10001 |Georgi
|
||||
10055 |Georgy
|
||||
;
|
||||
|
||||
# test for https://github.com/elastic/elasticsearch/issues/128813
|
||||
rlikeWithEmptyLanguagePattern
|
||||
required_capability: rlike_with_empty_language_pattern
|
||||
ROW x = "abc" | EVAL bool = x RLIKE "#"
|
||||
;
|
||||
|
||||
x:keyword | bool:boolean
|
||||
abc | false
|
||||
;
|
||||
|
|
|
@ -1177,6 +1177,11 @@ public class EsqlCapabilities {
|
|||
*/
|
||||
ENABLE_LOOKUP_JOIN_ON_ALIASES,
|
||||
|
||||
/**
|
||||
* Allows RLIKE to correctly handle the "empty language" flag, `#`.
|
||||
*/
|
||||
RLIKE_WITH_EMPTY_LANGUAGE_PATTERN,
|
||||
|
||||
/**
|
||||
* MATCH PHRASE function
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue