ESQL: Improve error message for ( and [ (#124177)

Due to recent grammar changes made ( token to no longer be reported by its text rather by his internal token name. Due to the use of pushMode, the symbol is not treated as a literal rather as a symbol. To address this, the parser listener looks at the error message and changes the message before returning it to the user. Replace hacky regex approach with Vocabulary substitution (not as pluggable as it could be yet much better) Fix #124145 Relates #123085 #121948 Co-authored-by: Alexander Spies <alexander.spies@elastic.co>
2025-03-15 16:18:21 -07:00 · 2025-03-15 16:18:21 -07:00 · e8533c1732
parent 4a0c935e82
commit e8533c1732
3 changed files with 69 additions and 4 deletions
--- a/docs/changelog/124177.yaml
+++ b/docs/changelog/124177.yaml
@ -0,0 +1,6 @@
+pr: 124177
+summary: "Improve error message for ( and ["
+area: ES|QL
+type: bug
+issues:
+ - 124145
--- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlParser.java
+++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/parser/EsqlParser.java
@ -14,6 +14,7 @@ import org.antlr.v4.runtime.RecognitionException;
 import org.antlr.v4.runtime.Recognizer;
 import org.antlr.v4.runtime.Token;
 import org.antlr.v4.runtime.TokenSource;
+import org.antlr.v4.runtime.VocabularyImpl;
 import org.antlr.v4.runtime.atn.PredictionMode;
 import org.elasticsearch.logging.LogManager;
 import org.elasticsearch.logging.Logger;
@ -23,6 +24,7 @@ import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan;
 import org.elasticsearch.xpack.esql.telemetry.PlanTelemetry;

 import java.util.BitSet;
+import java.util.Map;
 import java.util.function.BiFunction;
 import java.util.function.Function;
 import java.util.regex.Matcher;
@ -45,6 +47,45 @@ public class EsqlParser {
     */
    public static final int MAX_LENGTH = 1_000_000;

+    private static void replaceSymbolWithLiteral(Map<String, String> symbolReplacements, String[] literalNames, String[] symbolicNames) {
+        for (int i = 0, replacements = symbolReplacements.size(); i < symbolicNames.length && replacements > 0; i++) {
+            String symName = symbolicNames[i];
+            if (symName != null) {
+                String replacement = symbolReplacements.get(symName);
+                if (replacement != null && literalNames[i] == null) {
+                    // literals are single quoted
+                    literalNames[i] = "'" + replacement + "'";
+                    replacements--;
+                }
+            }
+        }
+    }
+
+    /**
+     * Add the literal name to a number of tokens that due to ANTLR internals/ATN
+     * have their symbolic name returns instead during error reporting.
+     * When reporting token errors, ANTLR uses the Vocabulary class to get the displayName
+     * (if set), otherwise falls back to the literal one and eventually uses the symbol name.
+     * Since the Vocabulary is static and not pluggable, this code modifies the underlying
+     * arrays by setting the literal string manually based on the token index.
+     * This is needed since some symbols, especially around setting up the mode, end up losing
+     * their literal representation.
+     * NB: this code is highly dependent on the ANTLR internals and thus will likely break
+     * during upgrades.
+     * NB: Can't use this for replacing DEV_ since the Vocabular is static while DEV_ replacement occurs per runtime configuration
+     */
+    static {
+        Map<String, String> symbolReplacements = Map.of("LP", "(", "OPENING_BRACKET", "[");
+
+        // the vocabularies have the same content however are different instances
+        // for extra reliability, perform the replacement for each map
+        VocabularyImpl parserVocab = (VocabularyImpl) EsqlBaseParser.VOCABULARY;
+        replaceSymbolWithLiteral(symbolReplacements, parserVocab.getLiteralNames(), parserVocab.getSymbolicNames());
+
+        VocabularyImpl lexerVocab = (VocabularyImpl) EsqlBaseLexer.VOCABULARY;
+        replaceSymbolWithLiteral(symbolReplacements, lexerVocab.getLiteralNames(), lexerVocab.getSymbolicNames());
+    }
+
    private EsqlConfig config = new EsqlConfig();

    public EsqlConfig config() {
@ -142,11 +183,14 @@ public class EsqlParser {
            String message,
            RecognitionException e
        ) {
-            if (recognizer instanceof EsqlBaseParser parser && parser.isDevVersion() == false) {
-                Matcher m = REPLACE_DEV.matcher(message);
-                message = m.replaceAll(StringUtils.EMPTY);
-            }
+            if (recognizer instanceof EsqlBaseParser parser) {
+                Matcher m;

+                if (parser.isDevVersion() == false) {
+                    m = REPLACE_DEV.matcher(message);
+                    message = m.replaceAll(StringUtils.EMPTY);
+                }
+            }
            throw new ParsingException(message, e, line, charPositionInLine);
        }
    };
--- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java
+++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/parser/StatementParserTests.java
@ -3141,6 +3141,21 @@ public class StatementParserTests extends AbstractStatementParserTests {
        }
    }

+    // [ and ( are used to trigger a double mode causing their symbol name (instead of text) to be used in error reporting
+    // this test checks that their are properly replaced in the error message
+    public void testPreserveParanthesis() {
+        // test for (
+        expectError("row a = 1 not in", "line 1:17: mismatched input '<EOF>' expecting '('");
+        expectError("row a = 1 | where a not in", "line 1:27: mismatched input '<EOF>' expecting '('");
+        expectError("row a = 1 | where a not in (1", "line 1:30: mismatched input '<EOF>' expecting {',', ')'}");
+        expectError("row a = 1 | where a not in [1", "line 1:28: missing '(' at '['");
+        expectError("row a = 1 | where a not in 123", "line 1:28: missing '(' at '123'");
+        // test for [
+        expectError("explain", "line 1:8: mismatched input '<EOF>' expecting '['");
+        expectError("explain ]", "line 1:9: token recognition error at: ']'");
+        expectError("explain [row x = 1", "line 1:19: missing ']' at '<EOF>'");
+    }
+
    static Alias alias(String name, Expression value) {
        return new Alias(EMPTY, name, value);
    }