qtbase-opensource-src/util/lexgen/re2nfa.cpp

/****************************************************************************
**
** Copyright (C) 2016 The Qt Company Ltd.
** Contact: https://www.qt.io/licensing/
**
** This file is part of the utils of the Qt Toolkit.
**
** $QT_BEGIN_LICENSE:GPL-EXCEPT$
** Commercial License Usage
** Licensees holding valid commercial Qt licenses may use this file in
** accordance with the commercial license agreement provided with the
** Software or, alternatively, in accordance with the terms contained in
** a written agreement between you and The Qt Company. For licensing terms
** and conditions see https://www.qt.io/terms-conditions. For further
** information use the contact form at https://www.qt.io/contact-us.
**
** GNU General Public License Usage
** Alternatively, this file may be used under the terms of the GNU
** General Public License version 3 as published by the Free Software
** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT
** included in the packaging of this file. Please review the following
** information to ensure the GNU General Public License requirements will
** be met: https://www.gnu.org/licenses/gpl-3.0.html.
**
** $QT_END_LICENSE$
**
****************************************************************************/

#include "re2nfa.h"
#include "tokenizer.cpp"

RE2NFA::RE2NFA(const QMap<QString, NFA> &macros, const QSet<InputType> &maxInputSet, Qt::CaseSensitivity cs)
    : macros(macros), index(0), errorColumn(-1), maxInputSet(maxInputSet), caseSensitivity(cs)
{
}

NFA RE2NFA::parse(const QString &expression, int *errCol)
{
    tokenize(expression);

    if (symbols.isEmpty())
        return NFA();

    index = 0;

    NFA result = parseExpr();
    if (result.isEmpty()) {
        if (errCol)
            *errCol = errorColumn;
    }
    return result;
}

NFA RE2NFA::parseExpr()
{
    NFA value = parseBranch();
    while (test(TOK_OR)) {
        NFA rhs = parseBranch();
        value = NFA::createAlternatingNFA(value, rhs);
    }
    return value;
}

NFA RE2NFA::parseBranch()
{
    NFA value = parsePiece();
    if (!hasNext())
        return value;
    NFA next;
    do {
        next = parsePiece();
        if (!next.isEmpty())
            value = NFA::createConcatenatingNFA(value, next);
    } while (!next.isEmpty() && hasNext());
    return value;
}

NFA RE2NFA::parsePiece()
{
    NFA atom = parseAtom();
    if (atom.isEmpty() || !hasNext())
        return atom;
    return parseMaybeQuantifier(atom);
}

NFA RE2NFA::parseAtom()
{
    // ####
    switch (next()) {
        case TOK_STRING:
            return createCharNFA();
        case TOK_LPAREN: {
            NFA subExpr = parseExpr();
            next(TOK_RPAREN);
            return subExpr;
        }
        case TOK_LBRACE: {
            QString macroName = lexemUntil(TOK_RBRACE);
            QMap<QString, NFA>::ConstIterator macro = macros.find(macroName);
            if (macro == macros.end()) {
                qWarning("Unknown macro '%s' - probably used before defined", qPrintable(macroName));
                return NFA();
            }
            return *macro;
        }
        case TOK_LBRACKET: {
            NFA set = parseSet();
            next(TOK_RBRACKET);
            return set;
        }
        case TOK_SEQUENCE:
            return parseSet2();
        case TOK_DOT:
            return NFA::createSetNFA(maxInputSet);
        default:
            prev();
            return NFA();
    }
}

NFA RE2NFA::parseMaybeQuantifier(const NFA &nfa)
{
    // ####
    switch (next()) {
        case TOK_STAR:
            return NFA::createOptionalNFA(nfa);
        case TOK_QUESTION:
            return NFA::createZeroOrOneNFA(nfa);
        case TOK_PLUS:
            return NFA::createConcatenatingNFA(nfa, NFA::createOptionalNFA(nfa));
        case TOK_LBRACE: {
              const int rewind = index - 1;

              QString lexemBeforeComma;
              QString lexemAfterComma;
              bool seenComma = false;
              forever {
                  if (test(TOK_COMMA)) {
                      if (seenComma) {
                          errorColumn = symbol().column;
                          return NFA();
                      }
                      seenComma = true;
                  } else if (test(TOK_RBRACE)) {
                      break;
                  } else {
                      next(TOK_STRING);
                      if (seenComma)
                          lexemAfterComma += symbol().lexem;
                      else
                          lexemBeforeComma += symbol().lexem;
                  }
              }
              bool isNumber = false;
              int min = lexemBeforeComma.toInt(&isNumber);
              if (!isNumber) {
                  index = rewind;
                  return nfa;
              }
              int max = min;
              if (seenComma) {
                  max = lexemAfterComma.toInt(&isNumber);
                  if (!isNumber) {
                      errorColumn = symbol().column;
                      return NFA();
                  }
              }
              return NFA::applyQuantity(nfa, min, max);
        }
        default:
            prev();
            return nfa;
    }
}

NFA RE2NFA::parseSet()
{
    QSet<InputType> set;
    bool negate = false;

    next(TOK_STRING);

    do {
        Q_ASSERT(symbol().lexem.length() == 1);
        // ###
        QChar ch = symbol().lexem.at(0);
        if (set.isEmpty() && ch == QLatin1Char('^')) {
            negate = true;
            continue;
        }

        // look ahead for ranges like a-z
        bool rangeFound = false;
        if (test(TOK_STRING)) {
            if (symbol().lexem.length() == 1
                && symbol().lexem.at(0) == QLatin1Char('-')) {
                next(TOK_STRING);
                Q_ASSERT(symbol().lexem.length() == 1);
                QChar last = symbol().lexem.at(0);

                if (ch.unicode() > last.unicode())
                    qSwap(ch, last);

                for (ushort i = ch.unicode(); i <= last.unicode(); ++i) {
                    if (caseSensitivity == Qt::CaseInsensitive) {
                        set.insert(QChar(i).toLower().unicode());
                    } else {
                        set.insert(i);
                    }
                }

                rangeFound = true;
            } else {
                prev();
            }
        }

        if (!rangeFound) {
            if (caseSensitivity == Qt::CaseInsensitive) {
                set.insert(ch.toLower().unicode());
            } else {
                set.insert(ch.unicode());
            }
        }
    } while (test(TOK_STRING));

    if (negate) {
        QSet<InputType> negatedSet = maxInputSet;
        negatedSet.subtract(set);
        set = negatedSet;
    }

    return NFA::createSetNFA(set);
}

NFA RE2NFA::parseSet2()
{
    QSet<InputType> set;
    bool negate = false;

    QString str = symbol().lexem;
    // strip off brackets
    str.chop(1);
    str.remove(0, 1);

    int i = 0;
    while (i < str.length()) {
        // ###
        QChar ch = str.at(i++);
        if (set.isEmpty() && ch == QLatin1Char('^')) {
            negate = true;
            continue;
        }

        // look ahead for ranges like a-z
        bool rangeFound = false;
        if (i < str.length() - 1 && str.at(i) == QLatin1Char('-')) {
            ++i;
            QChar last = str.at(i++);

            if (ch.unicode() > last.unicode())
                qSwap(ch, last);

            for (ushort i = ch.unicode(); i <= last.unicode(); ++i) {
                if (caseSensitivity == Qt::CaseInsensitive) {
                    set.insert(QChar(i).toLower().unicode());
                } else {
                    set.insert(i);
                }
            }

            rangeFound = true;
        }

        if (!rangeFound) {
            if (caseSensitivity == Qt::CaseInsensitive) {
                set.insert(ch.toLower().unicode());
            } else {
                set.insert(ch.unicode());
            }
        }
    }

    if (negate) {
        QSet<InputType> negatedSet = maxInputSet;
        negatedSet.subtract(set);
        set = negatedSet;
    }

    return NFA::createSetNFA(set);
}
NFA RE2NFA::createCharNFA()
{
    NFA nfa;
    // ####
    if (caseSensitivity == Qt::CaseInsensitive) {
        nfa = NFA::createStringNFA(symbol().lexem.toLower().toLatin1());
    } else {
        nfa = NFA::createStringNFA(symbol().lexem.toLatin1());
    }
    return nfa;
}

static inline int skipQuote(const QString &str, int pos)
{
    while (pos < str.length()
           && str.at(pos) != QLatin1Char('"')) {
        if (str.at(pos) == QLatin1Char('\\')) {
            ++pos;
            if (pos >= str.length())
                break;
        }
        ++pos;
    }
    if (pos < str.length())
        ++pos;
    return pos;
}

#if 0
static const char*tokStr(Token t)
{
    switch (t) {
        case TOK_INVALID: return "TOK_INVALID";
        case TOK_STRING: return "TOK_STRING";
        case TOK_LBRACE: return "TOK_LBRACE";
        case TOK_RBRACE: return "TOK_RBRACE";
        case TOK_LBRACKET: return "TOK_LBRACKET";
        case TOK_RBRACKET: return "TOK_RBRACKET";
        case TOK_LPAREN: return "TOK_LPAREN";
        case TOK_RPAREN: return "TOK_RPAREN";
        case TOK_COMMA: return "TOK_COMMA";
        case TOK_STAR: return "TOK_STAR";
        case TOK_OR: return "TOK_OR";
        case TOK_QUESTION: return "TOK_QUESTION";
        case TOK_DOT: return "TOK_DOT";
        case TOK_PLUS: return "TOK_PLUS";
        case TOK_SEQUENCE: return "TOK_SEQUENCE";
        case TOK_QUOTED_STRING: return "TOK_QUOTED_STRING";
    }
    return "";
}
#endif

void RE2NFA::tokenize(const QString &input)
{
    symbols.clear();
#if 1
    RegExpTokenizer tokenizer(input);
    Symbol sym;
    int tok = tokenizer.lex();
    while (tok != -1) {
        Symbol sym;
        sym.token = static_cast<Token>(tok);
        sym.lexem = input.mid(tokenizer.lexemStart, tokenizer.lexemLength);

        if (sym.token == TOK_QUOTED_STRING) {
            sym.lexem.chop(1);
            sym.lexem.remove(0, 1);
            sym.token = TOK_STRING;
        }

        if (sym.token == TOK_STRING || sym.token == TOK_SEQUENCE) {
            for (int i = 0; i < sym.lexem.length(); ++i) {
                if (sym.lexem.at(i) == '\\') {
                    if (i >= sym.lexem.length() - 1)
                        break;
                    QChar ch = sym.lexem.at(i + 1);
                    if (ch == QLatin1Char('n')) {
                        ch = '\n';
                    } else if (ch == QLatin1Char('r')) {
                        ch = '\r';
                    } else if (ch == QLatin1Char('t')) {
                        ch = '\t';
                    } else if (ch == QLatin1Char('f')) {
                        ch = '\f';
                    }
                    sym.lexem.replace(i, 2, ch);
                }
            }
        }

        /*
        if (sym.token == TOK_SEQUENCE) {
            Symbol s;
            s.token = TOK_LBRACKET;
            s.lexem = "[";
            symbols.append(s);

            for (int i = 1; i < sym.lexem.length() - 1; ++i) {
                s.token = TOK_STRING;
                s.lexem = sym.lexem.at(i);
                symbols.append(s);
            }

            s.token = TOK_RBRACKET;
            s.lexem = "]";
            symbols.append(s);

            tok = tokenizer.lex();
            continue;
        }
        */

        symbols.append(sym);
        tok = tokenizer.lex();
    }
#else
    int pos = 0;
    bool insideSet = false;
    while (pos < input.length()) {
        QChar ch = input.at(pos);

        Symbol sym;
        sym.column = pos;
        sym.token = TOK_INVALID;
        sym.lexem = QString(ch);
        switch (ch.toLatin1()) {
            case '"': {
                if (insideSet) {
                    sym.token = TOK_STRING;
                    sym.lexem = QString(ch);
                    symbols += sym;
                    ++pos;
                    continue;
                }
                if (pos + 1 >= input.length())
                    return;
                int quoteEnd = skipQuote(input, pos + 1);
                sym.token = TOK_STRING;
                sym.lexem = input.mid(pos + 1, quoteEnd - pos - 2);
                symbols += sym;
                pos = quoteEnd;
                continue;
            }
            case '{':
                sym.token = (insideSet ? TOK_STRING : TOK_LBRACE);
                break;
            case '}':
                sym.token = (insideSet ? TOK_STRING : TOK_RBRACE);
                break;
            case '[':
                insideSet = true;
                sym.token = TOK_LBRACKET;
                break;
            case ']':
                insideSet = false;
                sym.token = TOK_RBRACKET;
                break;
            case '(':
                sym.token = (insideSet ? TOK_STRING : TOK_LPAREN);
                break;
            case ')':
                sym.token = (insideSet ? TOK_STRING : TOK_RPAREN);
                break;
            case ',':
                sym.token = (insideSet ? TOK_STRING : TOK_COMMA);
                break;
            case '*':
                sym.token = (insideSet ? TOK_STRING : TOK_STAR);
                break;
            case '|':
                sym.token = (insideSet ? TOK_STRING : TOK_OR);
                break;
            case '?':
                sym.token = (insideSet ? TOK_STRING : TOK_QUESTION);
                break;
            case '.':
                sym.token = (insideSet ? TOK_STRING : TOK_DOT);
                break;
            case '+':
                sym.token = (insideSet ? TOK_STRING : TOK_PLUS);
                break;
            case '\\':
                ++pos;
                if (pos >= input.length())
                    return;
                ch = input.at(pos);
                if (ch == QLatin1Char('n')) {
                    ch = '\n';
                } else if (ch == QLatin1Char('r')) {
                    ch = '\r';
                } else if (ch == QLatin1Char('t')) {
                    ch = '\t';
                } else if (ch == QLatin1Char('f')) {
                    ch = '\f';
                }
                // fall through
            default:
                sym.token = TOK_STRING;
                sym.lexem = QString(ch);
                symbols += sym;
                ++pos;
                continue;
        }
        symbols += sym;
        ++pos;
    }
#endif
#if 0
    foreach (Symbol s, symbols) {
        qDebug() << "Tok" << tokStr(s.token) << "lexem" << s.lexem;
    }
#endif
}

bool RE2NFA::next(Token t)
{
    if (hasNext() && next() == t)
        return true;
    errorColumn = symbol().column;
    Q_ASSERT(false);
    return false;
}

bool RE2NFA::test(Token t)
{
    if (index >= symbols.count())
        return false;
    if (symbols.at(index).token == t) {
        ++index;
        return true;
    }
    return false;
}

QString RE2NFA::lexemUntil(Token t)
{
    QString lexem;
    while (hasNext() && next() != t)
        lexem += symbol().lexem;
    return lexem;
}
Import Upstream version 5.15.3+dfsg 2022-05-14 17:39:13 +08:00			`/****************************************************************************`
			`**`
			`** Copyright (C) 2016 The Qt Company Ltd.`
			`** Contact: https://www.qt.io/licensing/`
			`**`
			`** This file is part of the utils of the Qt Toolkit.`
			`**`
			`** $QT_BEGIN_LICENSE:GPL-EXCEPT$`
			`** Commercial License Usage`
			`** Licensees holding valid commercial Qt licenses may use this file in`
			`** accordance with the commercial license agreement provided with the`
			`** Software or, alternatively, in accordance with the terms contained in`
			`** a written agreement between you and The Qt Company. For licensing terms`
			`** and conditions see https://www.qt.io/terms-conditions. For further`
			`** information use the contact form at https://www.qt.io/contact-us.`
			`**`
			`** GNU General Public License Usage`
			`** Alternatively, this file may be used under the terms of the GNU`
			`** General Public License version 3 as published by the Free Software`
			`** Foundation with exceptions as appearing in the file LICENSE.GPL3-EXCEPT`
			`** included in the packaging of this file. Please review the following`
			`** information to ensure the GNU General Public License requirements will`
			`** be met: https://www.gnu.org/licenses/gpl-3.0.html.`
			`**`
			`** $QT_END_LICENSE$`
			`**`
			`****************************************************************************/`

			`#include "re2nfa.h"`
			`#include "tokenizer.cpp"`

			`RE2NFA::RE2NFA(const QMap<QString, NFA> &macros, const QSet<InputType> &maxInputSet, Qt::CaseSensitivity cs)`
			`: macros(macros), index(0), errorColumn(-1), maxInputSet(maxInputSet), caseSensitivity(cs)`
			`{`
			`}`

			`NFA RE2NFA::parse(const QString &expression, int *errCol)`
			`{`
			`tokenize(expression);`

			`if (symbols.isEmpty())`
			`return NFA();`

			`index = 0;`

			`NFA result = parseExpr();`
			`if (result.isEmpty()) {`
			`if (errCol)`
			`*errCol = errorColumn;`
			`}`
			`return result;`
			`}`

			`NFA RE2NFA::parseExpr()`
			`{`
			`NFA value = parseBranch();`
			`while (test(TOK_OR)) {`
			`NFA rhs = parseBranch();`
			`value = NFA::createAlternatingNFA(value, rhs);`
			`}`
			`return value;`
			`}`

			`NFA RE2NFA::parseBranch()`
			`{`
			`NFA value = parsePiece();`
			`if (!hasNext())`
			`return value;`
			`NFA next;`
			`do {`
			`next = parsePiece();`
			`if (!next.isEmpty())`
			`value = NFA::createConcatenatingNFA(value, next);`
			`} while (!next.isEmpty() && hasNext());`
			`return value;`
			`}`

			`NFA RE2NFA::parsePiece()`
			`{`
			`NFA atom = parseAtom();`
			`if (atom.isEmpty() \|\| !hasNext())`
			`return atom;`
			`return parseMaybeQuantifier(atom);`
			`}`

			`NFA RE2NFA::parseAtom()`
			`{`
			`// ####`
			`switch (next()) {`
			`case TOK_STRING:`
			`return createCharNFA();`
			`case TOK_LPAREN: {`
			`NFA subExpr = parseExpr();`
			`next(TOK_RPAREN);`
			`return subExpr;`
			`}`
			`case TOK_LBRACE: {`
			`QString macroName = lexemUntil(TOK_RBRACE);`
			`QMap<QString, NFA>::ConstIterator macro = macros.find(macroName);`
			`if (macro == macros.end()) {`
			`qWarning("Unknown macro '%s' - probably used before defined", qPrintable(macroName));`
			`return NFA();`
			`}`
			`return *macro;`
			`}`
			`case TOK_LBRACKET: {`
			`NFA set = parseSet();`
			`next(TOK_RBRACKET);`
			`return set;`
			`}`
			`case TOK_SEQUENCE:`
			`return parseSet2();`
			`case TOK_DOT:`
			`return NFA::createSetNFA(maxInputSet);`
			`default:`
			`prev();`
			`return NFA();`
			`}`
			`}`

			`NFA RE2NFA::parseMaybeQuantifier(const NFA &nfa)`
			`{`
			`// ####`
			`switch (next()) {`
			`case TOK_STAR:`
			`return NFA::createOptionalNFA(nfa);`
			`case TOK_QUESTION:`
			`return NFA::createZeroOrOneNFA(nfa);`
			`case TOK_PLUS:`
			`return NFA::createConcatenatingNFA(nfa, NFA::createOptionalNFA(nfa));`
			`case TOK_LBRACE: {`
			`const int rewind = index - 1;`

			`QString lexemBeforeComma;`
			`QString lexemAfterComma;`
			`bool seenComma = false;`
			`forever {`
			`if (test(TOK_COMMA)) {`
			`if (seenComma) {`
			`errorColumn = symbol().column;`
			`return NFA();`
			`}`
			`seenComma = true;`
			`} else if (test(TOK_RBRACE)) {`
			`break;`
			`} else {`
			`next(TOK_STRING);`
			`if (seenComma)`
			`lexemAfterComma += symbol().lexem;`
			`else`
			`lexemBeforeComma += symbol().lexem;`
			`}`
			`}`
			`bool isNumber = false;`
			`int min = lexemBeforeComma.toInt(&isNumber);`
			`if (!isNumber) {`
			`index = rewind;`
			`return nfa;`
			`}`
			`int max = min;`
			`if (seenComma) {`
			`max = lexemAfterComma.toInt(&isNumber);`
			`if (!isNumber) {`
			`errorColumn = symbol().column;`
			`return NFA();`
			`}`
			`}`
			`return NFA::applyQuantity(nfa, min, max);`
			`}`
			`default:`
			`prev();`
			`return nfa;`
			`}`
			`}`

			`NFA RE2NFA::parseSet()`
			`{`
			`QSet<InputType> set;`
			`bool negate = false;`

			`next(TOK_STRING);`

			`do {`
			`Q_ASSERT(symbol().lexem.length() == 1);`
			`// ###`
			`QChar ch = symbol().lexem.at(0);`
			`if (set.isEmpty() && ch == QLatin1Char('^')) {`
			`negate = true;`
			`continue;`
			`}`

			`// look ahead for ranges like a-z`
			`bool rangeFound = false;`
			`if (test(TOK_STRING)) {`
			`if (symbol().lexem.length() == 1`
			`&& symbol().lexem.at(0) == QLatin1Char('-')) {`
			`next(TOK_STRING);`
			`Q_ASSERT(symbol().lexem.length() == 1);`
			`QChar last = symbol().lexem.at(0);`

			`if (ch.unicode() > last.unicode())`
			`qSwap(ch, last);`

			`for (ushort i = ch.unicode(); i <= last.unicode(); ++i) {`
			`if (caseSensitivity == Qt::CaseInsensitive) {`
			`set.insert(QChar(i).toLower().unicode());`
			`} else {`
			`set.insert(i);`
			`}`
			`}`

			`rangeFound = true;`
			`} else {`
			`prev();`
			`}`
			`}`

			`if (!rangeFound) {`
			`if (caseSensitivity == Qt::CaseInsensitive) {`
			`set.insert(ch.toLower().unicode());`
			`} else {`
			`set.insert(ch.unicode());`
			`}`
			`}`
			`} while (test(TOK_STRING));`

			`if (negate) {`
			`QSet<InputType> negatedSet = maxInputSet;`
			`negatedSet.subtract(set);`
			`set = negatedSet;`
			`}`

			`return NFA::createSetNFA(set);`
			`}`

			`NFA RE2NFA::parseSet2()`
			`{`
			`QSet<InputType> set;`
			`bool negate = false;`

			`QString str = symbol().lexem;`
			`// strip off brackets`
			`str.chop(1);`
			`str.remove(0, 1);`

			`int i = 0;`
			`while (i < str.length()) {`
			`// ###`
			`QChar ch = str.at(i++);`
			`if (set.isEmpty() && ch == QLatin1Char('^')) {`
			`negate = true;`
			`continue;`
			`}`

			`// look ahead for ranges like a-z`
			`bool rangeFound = false;`
			`if (i < str.length() - 1 && str.at(i) == QLatin1Char('-')) {`
			`++i;`
			`QChar last = str.at(i++);`

			`if (ch.unicode() > last.unicode())`
			`qSwap(ch, last);`

			`for (ushort i = ch.unicode(); i <= last.unicode(); ++i) {`
			`if (caseSensitivity == Qt::CaseInsensitive) {`
			`set.insert(QChar(i).toLower().unicode());`
			`} else {`
			`set.insert(i);`
			`}`
			`}`

			`rangeFound = true;`
			`}`

			`if (!rangeFound) {`
			`if (caseSensitivity == Qt::CaseInsensitive) {`
			`set.insert(ch.toLower().unicode());`
			`} else {`
			`set.insert(ch.unicode());`
			`}`
			`}`
			`}`

			`if (negate) {`
			`QSet<InputType> negatedSet = maxInputSet;`
			`negatedSet.subtract(set);`
			`set = negatedSet;`
			`}`

			`return NFA::createSetNFA(set);`
			`}`
			`NFA RE2NFA::createCharNFA()`
			`{`
			`NFA nfa;`
			`// ####`
			`if (caseSensitivity == Qt::CaseInsensitive) {`
			`nfa = NFA::createStringNFA(symbol().lexem.toLower().toLatin1());`
			`} else {`
			`nfa = NFA::createStringNFA(symbol().lexem.toLatin1());`
			`}`
			`return nfa;`
			`}`

			`static inline int skipQuote(const QString &str, int pos)`
			`{`
			`while (pos < str.length()`
			`&& str.at(pos) != QLatin1Char('"')) {`
			`if (str.at(pos) == QLatin1Char('\\')) {`
			`++pos;`
			`if (pos >= str.length())`
			`break;`
			`}`
			`++pos;`
			`}`
			`if (pos < str.length())`
			`++pos;`
			`return pos;`
			`}`

			`#if 0`
			`static const char*tokStr(Token t)`
			`{`
			`switch (t) {`
			`case TOK_INVALID: return "TOK_INVALID";`
			`case TOK_STRING: return "TOK_STRING";`
			`case TOK_LBRACE: return "TOK_LBRACE";`
			`case TOK_RBRACE: return "TOK_RBRACE";`
			`case TOK_LBRACKET: return "TOK_LBRACKET";`
			`case TOK_RBRACKET: return "TOK_RBRACKET";`
			`case TOK_LPAREN: return "TOK_LPAREN";`
			`case TOK_RPAREN: return "TOK_RPAREN";`
			`case TOK_COMMA: return "TOK_COMMA";`
			`case TOK_STAR: return "TOK_STAR";`
			`case TOK_OR: return "TOK_OR";`
			`case TOK_QUESTION: return "TOK_QUESTION";`
			`case TOK_DOT: return "TOK_DOT";`
			`case TOK_PLUS: return "TOK_PLUS";`
			`case TOK_SEQUENCE: return "TOK_SEQUENCE";`
			`case TOK_QUOTED_STRING: return "TOK_QUOTED_STRING";`
			`}`
			`return "";`
			`}`
			`#endif`

			`void RE2NFA::tokenize(const QString &input)`
			`{`
			`symbols.clear();`
			`#if 1`
			`RegExpTokenizer tokenizer(input);`
			`Symbol sym;`
			`int tok = tokenizer.lex();`
			`while (tok != -1) {`
			`Symbol sym;`
			`sym.token = static_cast<Token>(tok);`
			`sym.lexem = input.mid(tokenizer.lexemStart, tokenizer.lexemLength);`

			`if (sym.token == TOK_QUOTED_STRING) {`
			`sym.lexem.chop(1);`
			`sym.lexem.remove(0, 1);`
			`sym.token = TOK_STRING;`
			`}`

			`if (sym.token == TOK_STRING \|\| sym.token == TOK_SEQUENCE) {`
			`for (int i = 0; i < sym.lexem.length(); ++i) {`
			`if (sym.lexem.at(i) == '\\') {`
			`if (i >= sym.lexem.length() - 1)`
			`break;`
			`QChar ch = sym.lexem.at(i + 1);`
			`if (ch == QLatin1Char('n')) {`
			`ch = '\n';`
			`} else if (ch == QLatin1Char('r')) {`
			`ch = '\r';`
			`} else if (ch == QLatin1Char('t')) {`
			`ch = '\t';`
			`} else if (ch == QLatin1Char('f')) {`
			`ch = '\f';`
			`}`
			`sym.lexem.replace(i, 2, ch);`
			`}`
			`}`
			`}`

			`/*`
			`if (sym.token == TOK_SEQUENCE) {`
			`Symbol s;`
			`s.token = TOK_LBRACKET;`
			`s.lexem = "[";`
			`symbols.append(s);`

			`for (int i = 1; i < sym.lexem.length() - 1; ++i) {`
			`s.token = TOK_STRING;`
			`s.lexem = sym.lexem.at(i);`
			`symbols.append(s);`
			`}`

			`s.token = TOK_RBRACKET;`
			`s.lexem = "]";`
			`symbols.append(s);`

			`tok = tokenizer.lex();`
			`continue;`
			`}`
			`*/`

			`symbols.append(sym);`
			`tok = tokenizer.lex();`
			`}`
			`#else`
			`int pos = 0;`
			`bool insideSet = false;`
			`while (pos < input.length()) {`
			`QChar ch = input.at(pos);`

			`Symbol sym;`
			`sym.column = pos;`
			`sym.token = TOK_INVALID;`
			`sym.lexem = QString(ch);`
			`switch (ch.toLatin1()) {`
			`case '"': {`
			`if (insideSet) {`
			`sym.token = TOK_STRING;`
			`sym.lexem = QString(ch);`
			`symbols += sym;`
			`++pos;`
			`continue;`
			`}`
			`if (pos + 1 >= input.length())`
			`return;`
			`int quoteEnd = skipQuote(input, pos + 1);`
			`sym.token = TOK_STRING;`
			`sym.lexem = input.mid(pos + 1, quoteEnd - pos - 2);`
			`symbols += sym;`
			`pos = quoteEnd;`
			`continue;`
			`}`
			`case '{':`
			`sym.token = (insideSet ? TOK_STRING : TOK_LBRACE);`
			`break;`
			`case '}':`
			`sym.token = (insideSet ? TOK_STRING : TOK_RBRACE);`
			`break;`
			`case '[':`
			`insideSet = true;`
			`sym.token = TOK_LBRACKET;`
			`break;`
			`case ']':`
			`insideSet = false;`
			`sym.token = TOK_RBRACKET;`
			`break;`
			`case '(':`
			`sym.token = (insideSet ? TOK_STRING : TOK_LPAREN);`
			`break;`
			`case ')':`
			`sym.token = (insideSet ? TOK_STRING : TOK_RPAREN);`
			`break;`
			`case ',':`
			`sym.token = (insideSet ? TOK_STRING : TOK_COMMA);`
			`break;`
			`case '*':`
			`sym.token = (insideSet ? TOK_STRING : TOK_STAR);`
			`break;`
			`case '\|':`
			`sym.token = (insideSet ? TOK_STRING : TOK_OR);`
			`break;`
			`case '?':`
			`sym.token = (insideSet ? TOK_STRING : TOK_QUESTION);`
			`break;`
			`case '.':`
			`sym.token = (insideSet ? TOK_STRING : TOK_DOT);`
			`break;`
			`case '+':`
			`sym.token = (insideSet ? TOK_STRING : TOK_PLUS);`
			`break;`
			`case '\\':`
			`++pos;`
			`if (pos >= input.length())`
			`return;`
			`ch = input.at(pos);`
			`if (ch == QLatin1Char('n')) {`
			`ch = '\n';`
			`} else if (ch == QLatin1Char('r')) {`
			`ch = '\r';`
			`} else if (ch == QLatin1Char('t')) {`
			`ch = '\t';`
			`} else if (ch == QLatin1Char('f')) {`
			`ch = '\f';`
			`}`
			`// fall through`
			`default:`
			`sym.token = TOK_STRING;`
			`sym.lexem = QString(ch);`
			`symbols += sym;`
			`++pos;`
			`continue;`
			`}`
			`symbols += sym;`
			`++pos;`
			`}`
			`#endif`
			`#if 0`
			`foreach (Symbol s, symbols) {`
			`qDebug() << "Tok" << tokStr(s.token) << "lexem" << s.lexem;`
			`}`
			`#endif`
			`}`

			`bool RE2NFA::next(Token t)`
			`{`
			`if (hasNext() && next() == t)`
			`return true;`
			`errorColumn = symbol().column;`
			`Q_ASSERT(false);`
			`return false;`
			`}`

			`bool RE2NFA::test(Token t)`
			`{`
			`if (index >= symbols.count())`
			`return false;`
			`if (symbols.at(index).token == t) {`
			`++index;`
			`return true;`
			`}`
			`return false;`
			`}`

			`QString RE2NFA::lexemUntil(Token t)`
			`{`
			`QString lexem;`
			`while (hasNext() && next() != t)`
			`lexem += symbol().lexem;`
			`return lexem;`
			`}`