ESQL: Workaround for RLike handling of empty lang pattern (#128895)

Lucene's `org.apache.lucene.util.automaton.Operations#getSingleton` fails with an Automaton for a `REGEXP_EMPTY` `RegExp`. This adds a workaround for that, to check the type of automaton before calling into that failing method.

Closes #128813
This commit is contained in:
Bogdan Pintea 2025-06-06 11:01:49 +02:00 committed by GitHub
parent e24fd32c35
commit 1a76bc2dc8
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 89 additions and 4 deletions

View File

@ -0,0 +1,6 @@
pr: 128895
summary: Workaround for RLike handling of empty lang pattern
area: ES|QL
type: bug
issues:
- 128813

View File

@ -32,7 +32,11 @@ public abstract class AbstractStringPattern implements StringPattern {
@Override
public String exactMatch() {
IntsRef singleton = Operations.getSingleton(automaton());
Automaton a = automaton();
if (a.getNumStates() == 0) { // workaround for https://github.com/elastic/elasticsearch/pull/128887
return null; // Empty automaton has no matches
}
IntsRef singleton = Operations.getSingleton(a);
return singleton != null ? UnicodeUtil.newString(singleton.ints, singleton.offset, singleton.length) : null;
}
}

View File

@ -33,8 +33,12 @@ public class StringPatternTests extends ESTestCase {
return rlike(pattern).matchesAll();
}
private String exactMatchRLike(String pattern) {
return rlike(pattern).exactMatch();
}
private boolean rlikeExactMatch(String pattern) {
return pattern.equals(rlike(pattern).exactMatch());
return pattern.equals(exactMatchRLike(pattern));
}
public void testWildcardMatchAll() {
@ -86,4 +90,20 @@ public class StringPatternTests extends ESTestCase {
assertTrue(rlikeExactMatch("abc"));
assertTrue(rlikeExactMatch("12345"));
}
public void testRegexExactMatchWithEmptyMatch() {
// As soon as there's one no conditional `#` in the pattern, it'll match nothing
assertNull(exactMatchRLike("#"));
assertNull(exactMatchRLike("##"));
assertNull(exactMatchRLike("#foo"));
assertNull(exactMatchRLike("#foo#"));
assertNull(exactMatchRLike("f#oo"));
assertNull(exactMatchRLike("foo#"));
assertNull(exactMatchRLike("#[A-Z]*"));
assertNull(exactMatchRLike("foo(#)"));
assertNotNull(exactMatchRLike("foo#?"));
assertNotNull(exactMatchRLike("#|foo"));
assertNotNull(exactMatchRLike("foo|#"));
}
}

View File

@ -66,8 +66,8 @@ public final class TestUtils {
/** Returns the input string, but with parts of it having the letter casing changed. */
public static String randomCasing(String input) {
StringBuilder sb = new StringBuilder(input.length());
for (int i = 0, inputLen = input.length(), step = (int) Math.sqrt(inputLen), chunkEnd; i < inputLen; i += step) {
chunkEnd = Math.min(i + step, inputLen);
for (int i = 0, inputLen = input.length(), step = (int) Math.sqrt(inputLen); i < inputLen; i += step) {
var chunkEnd = Math.min(i + step, inputLen);
var chunk = input.substring(i, chunkEnd);
sb.append(randomBoolean() ? chunk.toLowerCase(Locale.ROOT) : chunk.toUpperCase(Locale.ROOT));
}

View File

@ -1440,6 +1440,46 @@ public abstract class RestEsqlTestCase extends ESRestTestCase {
assertThat(answer.get("values"), equalTo(List.of(List.of("_\"_$_(_)_+_._[_]_^_{_|_}___", "_#_&_<_>___"))));
}
public void testRLikeHandlingOfEmptyLanguagePattern() throws IOException {
createIndex(testIndexName(), Settings.EMPTY, """
{
"properties": {
"field": {
"type": "keyword"
}
}
}
""");
for (var val : List.of("#", "foo#bar")) {
Request doc = new Request("POST", testIndexName() + "/_doc?refresh=true");
doc.setJsonEntity("""
{
"field": "%s"
}
""".formatted(val));
client().performRequest(doc);
}
// pushed down, matches nothing
var query = "FROM " + testIndexName() + " | WHERE TO_LOWER(field) RLIKE \"#\"";
var answer = runEsql(requestObjectBuilder().query(query));
assertThat(answer.get("values"), equalTo(List.of()));
// matches nothing
query = "FROM " + testIndexName() + " | WHERE field RLIKE \"#\"";
answer = runEsql(requestObjectBuilder().query(query));
assertThat(answer.get("values"), equalTo(List.of()));
// matches one doc
query = "FROM " + testIndexName() + " | WHERE field RLIKE \"\\\\#\"";
answer = runEsql(requestObjectBuilder().query(query));
assertThat(answer.get("values"), equalTo(List.of(List.of("#"))));
// matches both docs
query = "FROM " + testIndexName() + " | WHERE field RLIKE \".*\\\\#.*\" | SORT field";
answer = runEsql(requestObjectBuilder().query(query));
assertThat(answer.get("values"), equalTo(List.of(List.of("#"), List.of("foo#bar"))));
}
protected static Request prepareRequestWithOptions(RequestObjectBuilder requestObject, Mode mode) throws IOException {
requestObject.build();
Request request = prepareRequest(mode);

View File

@ -423,3 +423,13 @@ emp_no:integer |first_name:keyword
10001 |Georgi
10055 |Georgy
;
# test for https://github.com/elastic/elasticsearch/issues/128813
rlikeWithEmptyLanguagePattern
required_capability: rlike_with_empty_language_pattern
ROW x = "abc" | EVAL bool = x RLIKE "#"
;
x:keyword | bool:boolean
abc | false
;

View File

@ -1177,6 +1177,11 @@ public class EsqlCapabilities {
*/
ENABLE_LOOKUP_JOIN_ON_ALIASES,
/**
* Allows RLIKE to correctly handle the "empty language" flag, `#`.
*/
RLIKE_WITH_EMPTY_LANGUAGE_PATTERN,
/**
* MATCH PHRASE function
*/