linux/scripts/genksyms/lex.l

441 lines
8.3 KiB
Plaintext

/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* Lexical analysis for genksyms.
* Copyright 1996, 1997 Linux International.
*
* New implementation contributed by Richard Henderson <rth@tamu.edu>
* Based on original work by Bjorn Ekwall <bj0rn@blox.se>
*
* Taken from Linux modutils 2.4.22.
*/
%{
#include <limits.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include "genksyms.h"
#include "parse.tab.h"
/* We've got a two-level lexer here. We let flex do basic tokenization
and then we categorize those basic tokens in the second stage. */
#define YY_DECL static int yylex1(void)
%}
IDENT [A-Za-z_\$][A-Za-z0-9_\$]*
O_INT 0[0-7]*
D_INT [1-9][0-9]*
X_INT 0[Xx][0-9A-Fa-f]+
I_SUF [Uu]|[Ll]|[Uu][Ll]|[Ll][Uu]
INT ({O_INT}|{D_INT}|{X_INT}){I_SUF}?
FRAC ([0-9]*\.[0-9]+)|([0-9]+\.)
EXP [Ee][+-]?[0-9]+
F_SUF [FfLl]
REAL ({FRAC}{EXP}?{F_SUF}?)|([0-9]+{EXP}{F_SUF}?)
STRING L?\"([^\\\"]*\\.)*[^\\\"]*\"
CHAR L?\'([^\\\']*\\.)*[^\\\']*\'
MC_TOKEN ([~%^&*+=|<>/-]=)|(&&)|("||")|(->)|(<<)|(>>)
/* We don't do multiple input files. */
%option noyywrap
%option noinput
%%
/* Keep track of our location in the original source files. */
^#[ \t]+{INT}[ \t]+\"[^\"\n]+\".*\n return FILENAME;
^#.*\n cur_line++;
\n cur_line++;
/* Ignore all other whitespace. */
[ \t\f\v\r]+ ;
{STRING} return STRING;
{CHAR} return CHAR;
{IDENT} return IDENT;
/* The Pedant requires that the other C multi-character tokens be
recognized as tokens. We don't actually use them since we don't
parse expressions, but we do want whitespace to be arranged
around them properly. */
{MC_TOKEN} return OTHER;
{INT} return INT;
{REAL} return REAL;
"..." return DOTS;
/* All other tokens are single characters. */
. return yytext[0];
%%
/* Bring in the keyword recognizer. */
#include "keywords.c"
/* Macros to append to our phrase collection list. */
/*
* We mark any token, that that equals to a known enumerator, as
* SYM_ENUM_CONST. The parser will change this for struct and union tags later,
* the only problem is struct and union members:
* enum e { a, b }; struct s { int a, b; }
* but in this case, the only effect will be, that the ABI checksums become
* more volatile, which is acceptable. Also, such collisions are quite rare,
* so far it was only observed in include/linux/telephony.h.
*/
#define _APP(T,L) do { \
cur_node = next_node; \
next_node = xmalloc(sizeof(*next_node)); \
next_node->next = cur_node; \
cur_node->string = memcpy(xmalloc(L+1), T, L+1); \
cur_node->tag = \
find_symbol(cur_node->string, SYM_ENUM_CONST, 1)?\
SYM_ENUM_CONST : SYM_NORMAL ; \
cur_node->in_source_file = in_source_file; \
} while (0)
#define APP _APP(yytext, yyleng)
/* The second stage lexer. Here we incorporate knowledge of the state
of the parser to tailor the tokens that are returned. */
int
yylex(void)
{
static enum {
ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1,
ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_STATIC_ASSERT,
} lexstate = ST_NOTSTARTED;
static int suppress_type_lookup, dont_want_brace_phrase;
static struct string_list *next_node;
static char *source_file;
int token, count = 0;
struct string_list *cur_node;
if (lexstate == ST_NOTSTARTED)
{
next_node = xmalloc(sizeof(*next_node));
next_node->next = NULL;
lexstate = ST_NORMAL;
}
repeat:
token = yylex1();
if (token == 0)
return 0;
else if (token == FILENAME)
{
char *file, *e;
/* Save the filename and line number for later error messages. */
if (cur_filename)
free(cur_filename);
file = strchr(yytext, '\"')+1;
e = strchr(file, '\"');
*e = '\0';
cur_filename = memcpy(xmalloc(e-file+1), file, e-file+1);
cur_line = atoi(yytext+2);
if (!source_file) {
source_file = xstrdup(cur_filename);
in_source_file = 1;
} else {
in_source_file = (strcmp(cur_filename, source_file) == 0);
}
goto repeat;
}
switch (lexstate)
{
case ST_NORMAL:
switch (token)
{
case IDENT:
APP;
{
int r = is_reserved_word(yytext, yyleng);
if (r >= 0)
{
switch (token = r)
{
case ATTRIBUTE_KEYW:
lexstate = ST_ATTRIBUTE;
count = 0;
goto repeat;
case ASM_KEYW:
lexstate = ST_ASM;
count = 0;
goto repeat;
case TYPEOF_KEYW:
lexstate = ST_TYPEOF;
count = 0;
goto repeat;
case STRUCT_KEYW:
case UNION_KEYW:
case ENUM_KEYW:
dont_want_brace_phrase = 3;
suppress_type_lookup = 2;
goto fini;
case EXPORT_SYMBOL_KEYW:
goto fini;
case STATIC_ASSERT_KEYW:
lexstate = ST_STATIC_ASSERT;
count = 0;
goto repeat;
}
}
if (!suppress_type_lookup)
{
if (find_symbol(yytext, SYM_TYPEDEF, 1))
token = TYPE;
}
}
break;
case '[':
APP;
lexstate = ST_BRACKET;
count = 1;
goto repeat;
case '{':
APP;
if (dont_want_brace_phrase)
break;
lexstate = ST_BRACE;
count = 1;
goto repeat;
case '=': case ':':
APP;
lexstate = ST_EXPRESSION;
break;
default:
APP;
break;
}
break;
case ST_ATTRIBUTE:
APP;
switch (token)
{
case '(':
++count;
goto repeat;
case ')':
if (--count == 0)
{
lexstate = ST_NORMAL;
token = ATTRIBUTE_PHRASE;
break;
}
goto repeat;
default:
goto repeat;
}
break;
case ST_ASM:
APP;
switch (token)
{
case '(':
++count;
goto repeat;
case ')':
if (--count == 0)
{
lexstate = ST_NORMAL;
token = ASM_PHRASE;
break;
}
goto repeat;
default:
goto repeat;
}
break;
case ST_TYPEOF_1:
if (token == IDENT)
{
if (is_reserved_word(yytext, yyleng) >= 0
|| find_symbol(yytext, SYM_TYPEDEF, 1))
{
yyless(0);
unput('(');
lexstate = ST_NORMAL;
token = TYPEOF_KEYW;
break;
}
_APP("(", 1);
}
lexstate = ST_TYPEOF;
/* FALLTHRU */
case ST_TYPEOF:
switch (token)
{
case '(':
if ( ++count == 1 )
lexstate = ST_TYPEOF_1;
else
APP;
goto repeat;
case ')':
APP;
if (--count == 0)
{
lexstate = ST_NORMAL;
token = TYPEOF_PHRASE;
break;
}
goto repeat;
default:
APP;
goto repeat;
}
break;
case ST_BRACKET:
APP;
switch (token)
{
case '[':
++count;
goto repeat;
case ']':
if (--count == 0)
{
lexstate = ST_NORMAL;
token = BRACKET_PHRASE;
break;
}
goto repeat;
default:
goto repeat;
}
break;
case ST_BRACE:
APP;
switch (token)
{
case '{':
++count;
goto repeat;
case '}':
if (--count == 0)
{
lexstate = ST_NORMAL;
token = BRACE_PHRASE;
break;
}
goto repeat;
default:
goto repeat;
}
break;
case ST_EXPRESSION:
switch (token)
{
case '(': case '[': case '{':
++count;
APP;
goto repeat;
case '}':
/* is this the last line of an enum declaration? */
if (count == 0)
{
/* Put back the token we just read so's we can find it again
after registering the expression. */
unput(token);
lexstate = ST_NORMAL;
token = EXPRESSION_PHRASE;
break;
}
/* FALLTHRU */
case ')': case ']':
--count;
APP;
goto repeat;
case ',': case ';':
if (count == 0)
{
/* Put back the token we just read so's we can find it again
after registering the expression. */
unput(token);
lexstate = ST_NORMAL;
token = EXPRESSION_PHRASE;
break;
}
APP;
goto repeat;
default:
APP;
goto repeat;
}
break;
case ST_STATIC_ASSERT:
APP;
switch (token)
{
case '(':
++count;
goto repeat;
case ')':
if (--count == 0)
{
lexstate = ST_NORMAL;
token = STATIC_ASSERT_PHRASE;
break;
}
goto repeat;
default:
goto repeat;
}
break;
default:
exit(1);
}
fini:
if (suppress_type_lookup > 0)
--suppress_type_lookup;
if (dont_want_brace_phrase > 0)
--dont_want_brace_phrase;
yylval = &next_node->next;
return token;
}