Nasal-Interpreter/nasal_lexer.h

#ifndef __NASAL_LEXER_H__
#define __NASAL_LEXER_H__

#define IS_IDENTIFIER(c)      ((c=='_')||('a'<=c && c<='z')||('A'<=c&&c<='Z'))
#define IS_HEX_NUMBER(c)      (('0'<=c&&c<='9')||('a'<=c&&c<='f')||('A'<=c && c<='F'))
#define IS_OCT_NUMEBR(c)      ('0'<=c&&c<='7')
#define IS_DIGIT(c)           ('0'<=c&&c<='9')
#define IS_STRING(c)          (c=='\''||c=='\"'||c=='`')
// single operators have only one character
#define IS_SINGLE_OPRATOR(c)  (c=='('||c==')'||c=='['||c==']'||c=='{'||c=='}'||c==','||c==';'||c=='|'||c==':'||\
							   c=='?'||c=='`'||c=='&'||c=='@'||c=='%'||c=='$'||c=='^'||c=='\\')
// calculation operators may have two chars, for example: += -= *= /= ~= != == >= <=
#define IS_CALC_OPERATOR(c)   (c=='='||c=='+'||c=='-'||c=='*'||c=='!'||c=='/'||c=='<'||c=='>'||c=='~')
#define IS_NOTE(c)            (c=='#')

enum token_type
{
    tok_null=0,
    tok_number,tok_string,tok_identifier,
    tok_for,tok_forindex,tok_foreach,tok_while,
    tok_var,tok_func,tok_break,tok_continue,
    tok_return,tok_if,tok_elsif,tok_else,tok_nil,
    tok_left_curve,tok_right_curve,
    tok_left_bracket,tok_right_bracket,
    tok_left_brace,tok_right_brace,
    tok_semi,tok_and,tok_or,tok_comma,tok_dot,tok_ellipsis,tok_quesmark,
    tok_colon,tok_add,tok_sub,tok_mult,tok_div,tok_link,tok_not,
    tok_equal,
    tok_add_equal,tok_sub_equal,tok_mult_equal,tok_div_equal,tok_link_equal,
    tok_cmp_equal,tok_cmp_not_equal,tok_less_than,tok_less_equal,tok_greater_than,tok_greater_equal
};

struct
{
    const char* str;
    int tok_type;
}token_table[]=
{
    {"for"     ,tok_for          },
    {"forindex",tok_forindex     },
    {"foreach" ,tok_foreach      },
    {"while"   ,tok_while        },
    {"var"     ,tok_var          },
    {"func"    ,tok_func         },
    {"break"   ,tok_break        },
    {"continue",tok_continue     },
    {"return"  ,tok_return       },
    {"if"      ,tok_if           },
    {"elsif"   ,tok_elsif        },
    {"else"    ,tok_else         },
    {"nil"     ,tok_nil          },
    {"("       ,tok_left_curve   },
    {")"       ,tok_right_curve  },
    {"["       ,tok_left_bracket },
    {"]"       ,tok_right_bracket},
    {"{"       ,tok_left_brace   },
    {"}"       ,tok_right_brace  },
    {";"       ,tok_semi         },
    {"and"     ,tok_and          },
    {"or"      ,tok_or           },
    {","       ,tok_comma        },
    {"."       ,tok_dot          },
    {"..."     ,tok_ellipsis     },
    {"?"       ,tok_quesmark     },
    {":"       ,tok_colon        },
    {"+"       ,tok_add          },
    {"-"       ,tok_sub          },
    {"*"       ,tok_mult         },
    {"/"       ,tok_div          },
    {"~"       ,tok_link         },
    {"!"       ,tok_not          },
	{"="       ,tok_equal        },
    {"+="      ,tok_add_equal    },
    {"-="      ,tok_sub_equal    },
    {"*="      ,tok_mult_equal   },
    {"/="      ,tok_div_equal    },
    {"~="      ,tok_link_equal   },
    {"=="      ,tok_cmp_equal    },
    {"!="      ,tok_cmp_not_equal},
    {"<"       ,tok_less_than    },
	{"<="      ,tok_less_equal   },
    {">"       ,tok_greater_than },
    {">="      ,tok_greater_equal},
	{NULL      ,-1               }
};

struct token
{
    int line;
    int type;
    std::string str;
};

class nasal_lexer
{
private:
    int error;
	int res_size;
	int line;
	int ptr;
	std::string line_code;
	std::vector<char> res;
    std::vector<token> token_list;
	std::string identifier_gen();
    std::string number_gen();
    std::string string_gen();
public:
    void clear();
	void openfile(std::string);
	void die(std::string,int,int);
    void scanner();
    void print_token();
    int  get_error();
	std::vector<token>& get_token_list();
};

void nasal_lexer::clear()
{
	error=0;
	res_size=0;
	line=0;
	ptr=0;
	line_code="";
	res.clear();
    token_list.clear();
    return;
}

void nasal_lexer::openfile(std::string filename)
{
	error=0;
	res.clear();
    std::ifstream fin(filename,std::ios::binary);
    if(fin.fail())
    {
		++error;
        std::cout<<">> [lexer] cannot open file \""<<filename<<"\".\n";
        fin.close();
        return;
    }
    while(!fin.eof())
    {
        char c=fin.get();
        if(fin.eof())
            break;
        res.push_back(c);
    }
    fin.close();
	res_size=res.size();
    return;
}

void nasal_lexer::die(std::string error_info,int line=-1,int column=-1)
{
	++error;
	std::cout<<">> [lexer] line "<<line<<" column "<<column<<": "<<error_info<<"\n";
	return;
}

std::string nasal_lexer::identifier_gen()
{
	std::string token_str="";
	while(ptr<res_size && (IS_IDENTIFIER(res[ptr])||IS_DIGIT(res[ptr])))
		token_str+=res[ptr++];
	line_code+=token_str;
	return token_str;
	// after running this process, ptr will point to the next token's beginning character
}

std::string nasal_lexer::number_gen()
{
	bool scientific_notation=false;// numbers like 1e8 are scientific_notation
	std::string token_str="";
	// generate hex number
	if(res[ptr]=='0' && ptr+1<res_size && res[ptr+1]=='x')
	{
		token_str="0x";
		ptr+=2;
		while(ptr<res_size && IS_HEX_NUMBER(res[ptr]))
			token_str+=res[ptr++];
		line_code+=token_str;
		if(token_str=="0x")
		{
			die("["+line_code+"_] incorrect number.",line,line_code.length());
			return "0";
		}
		return token_str;
	}
	// generate oct number
	else if(res[ptr]=='0' && ptr+1<res_size && res[ptr+1]=='o')
	{
		token_str="0o";
		ptr+=2;
		while(ptr<res_size && IS_OCT_NUMEBR(res[ptr]))
			token_str+=res[ptr++];
		line_code+=token_str;
		if(token_str=="0o")
		{
			die("["+line_code+"_] incorrect number.",line,line_code.length());
			return "0";
		}
		return token_str;
	}
	// generate dec number
	// dec number -> [0~9][0~9]*(.[0~9]*)(e|E(+|-)0|[1~9][0~9]*)
	while(ptr<res_size && IS_DIGIT(res[ptr]))
		token_str+=res[ptr++];
	if(ptr<res_size && res[ptr]=='.')
	{
		token_str+=res[ptr++];
		// "xxxx." is not a correct number
		if(ptr>=res_size)
		{
			line_code+=token_str;
			die("["+line_code+"_] incorrect number.",line,line_code.length());
			return "0";
		}
		while(ptr<res_size && IS_DIGIT(res[ptr]))
			token_str+=res[ptr++];
		// "xxxx." is not a correct number
		if(token_str.back()=='.')
		{
			line_code+=token_str;
			die("["+line_code+"_] incorrect number.",line,line_code.length());
			return "0";
		}
	}
	if(ptr<res_size && (res[ptr]=='e' || res[ptr]=='E'))
	{
		token_str+=res[ptr++];
		// "xxxe" is not a correct number
		if(ptr>=res_size)
		{
			line_code+=token_str;
			die("["+line_code+"_] incorrect number.",line,line_code.length());
			return "0";
		}
		if(ptr<res_size && (res[ptr]=='-' || res[ptr]=='+'))
			token_str+=res[ptr++];
		if(ptr>=res_size)
		{
			line_code+=token_str;
			die("["+line_code+"_] incorrect number.",line,line_code.length());
			return "0";
		}
		if(ptr<res_size && res[ptr]=='0')
			token_str+=res[ptr++];
		while(ptr<res_size && IS_DIGIT(res[ptr]))
			token_str+=res[ptr++];
		// "xxxe(-|+)" is not a correct number
		if(token_str.back()=='e' || token_str.back()=='E' || token_str.back()=='-' || token_str.back()=='+')
		{
			line_code+=token_str;
			die("["+line_code+"_] incorrect number.",line,line_code.length());
			return "0";
		}
	}
	line_code+=token_str;
	return token_str;
}

std::string nasal_lexer::string_gen()
{
	std::string token_str="";
	line_code+=res[ptr];
	char str_begin=res[ptr++];
	while(ptr<res_size && res[ptr]!=str_begin)
	{
		line_code+=res[ptr];
		if(res[ptr]=='\n')
		{
			line_code="";
			++line;
		}
		if(res[ptr]=='\\' && ptr+1<res_size)
		{
			++ptr;
			line_code+=res[ptr];
			switch(res[ptr])
			{
				case 'a':token_str.push_back('\a');break;
				case 'b':token_str.push_back('\b');break;
				case 'f':token_str.push_back('\f');break;
				case 'n':token_str.push_back('\n');break;
				case 'r':token_str.push_back('\r');break;
				case 't':token_str.push_back('\t');break;
				case 'v':token_str.push_back('\v');break;
				case '?':token_str.push_back('\?');break;
				case '0':token_str.push_back('\0');break;
				case '\\':token_str.push_back('\\');break;
				case '\'':token_str.push_back('\'');break;
				case '\"':token_str.push_back('\"');break;
				default:  token_str.push_back(res[ptr]);break;
			}
		}
		else
			token_str+=res[ptr];
		++ptr;
	}
	// check if this string ends with a " or '
	if(ptr>=res_size)
		die("["+line_code+"_] get EOF when generating string.",line,line_code.length());
	++ptr;
	return token_str;
}

void nasal_lexer::scanner()
{
    token_list.clear();
    line=1;
	ptr=0;
	line_code="";

	std::string token_str;
	while(ptr<res_size)
	{
		while(ptr<res_size && (res[ptr]==' ' || res[ptr]=='\n' || res[ptr]=='\t' || res[ptr]=='\r' || res[ptr]<0))
		{
			// these characters will be ignored, and '\n' will cause ++line
			line_code+=res[ptr];
			if(res[ptr]=='\n')
			{
				++line;
				line_code="";
			}
			++ptr;
		}
		if(ptr>=res_size) break;
		if(IS_IDENTIFIER(res[ptr]))
		{
			token_str=identifier_gen();
			token new_token;
			new_token.line=line;
            new_token.str=token_str;
            new_token.type=0;
            for(int i=0;token_table[i].str;++i)
                if(token_str==token_table[i].str)
                {
                    new_token.type=token_table[i].tok_type;
                    break;
                }
            if(!new_token.type)
                new_token.type=tok_identifier;
			token_list.push_back(new_token);
		}
		else if(IS_DIGIT(res[ptr]))
		{
			token_str=number_gen();
			token new_token;
			new_token.line=line;
            new_token.str=token_str;
			new_token.type=tok_number;
			token_list.push_back(new_token);
		}
		else if(IS_STRING(res[ptr]))
		{
			token_str=string_gen();
			token new_token;
			new_token.line=line;
			new_token.type=tok_string;
			new_token.str=token_str;
			token_list.push_back(new_token);
		}
		else if(IS_SINGLE_OPRATOR(res[ptr]))
		{
			token_str="";
			token_str+=res[ptr];
			line_code+=res[ptr];
			token new_token;
			new_token.line=line;
            new_token.str=token_str;
			new_token.type=-1;
            for(int i=0;token_table[i].str;++i)
                if(token_str==token_table[i].str)
                {
                    new_token.type=token_table[i].tok_type;
                    break;
                }
			if(new_token.type<0)
				die("["+line_code+"_] incorrect operator.",line,line_code.length());
			token_list.push_back(new_token);
			++ptr;
		}
        else if(res[ptr]=='.')
        {
            if(ptr+2<res_size && res[ptr+1]=='.' && res[ptr+2]=='.')
            {
                token_str="...";
                ptr+=3;
            }
            else
            {
                token_str=".";
                ++ptr;
            }
			line_code+=token_str;
            token new_token;
            new_token.line=line;
            new_token.str=token_str;
            for(int i=0;token_table[i].str;++i)
                if(token_str==token_table[i].str)
                {
                    new_token.type=token_table[i].tok_type;
                    break;
                }
			token_list.push_back(new_token);
        }
		else if(IS_CALC_OPERATOR(res[ptr]))
		{
			// get calculation operator
			token_str=res[ptr];
			++ptr;
			if(ptr<res.size() && res[ptr]=='=')
			{
				token_str+=res[ptr];
				++ptr;
			}
			line_code+=token_str;
			token new_token;
			new_token.line=line;
			new_token.str=token_str;
            for(int i=0;token_table[i].str;++i)
                if(token_str==token_table[i].str)
                {
                    new_token.type=token_table[i].tok_type;
                    break;
                }
			token_list.push_back(new_token);
		}
		else if(IS_NOTE(res[ptr]))
		{
			// avoid note
			while(ptr<res_size && res[ptr]!='\n') ++ptr;
			// after this process ptr will point to a '\n'
			// don't ++ptr then the counter for line can work correctly
		}
		else
		{
			line_code+=res[ptr];
			die("["+line_code+"_] unknown character.",line,line_code.length());
			++ptr;
		}
	}
	return;
}

void nasal_lexer::print_token()
{
    int size=token_list.size();
    for(int i=0;i<size;++i)
        std::cout<<"("<<token_list[i].line<<" | "<<token_list[i].str<<")\n";
    return;
}

int nasal_lexer::get_error()
{
    return error;
}

std::vector<token>& nasal_lexer::get_token_list()
{
	return token_list;
}
#endif
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`#ifndef __NASAL_LEXER_H__`
			`#define __NASAL_LEXER_H__`

update 2020-11-02 12:24:12 +08:00			`#define IS_IDENTIFIER(c) ((c=='_')\|\|('a'<=c && c<='z')\|\|('A'<=c&&c<='Z'))`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`#define IS_HEX_NUMBER(c) (('0'<=c&&c<='9')\|\|('a'<=c&&c<='f')\|\|('A'<=c && c<='F'))`
			`#define IS_OCT_NUMEBR(c) ('0'<=c&&c<='7')`
			`#define IS_DIGIT(c) ('0'<=c&&c<='9')`
update 2020-11-02 12:24:12 +08:00			#define IS_STRING(c) (c=='\''\|\|c=='\"'\|\|c=='`')
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`// single operators have only one character`
			`#define IS_SINGLE_OPRATOR(c) (c=='('\|\|c==')'\|\|c=='['\|\|c==']'\|\|c=='{'\|\|c=='}'\|\|c==','\|\|c==';'\|\|c=='\|'\|\|c==':'\|\|\`
			c=='?'\|\|c=='`'\|\|c=='&'\|\|c=='@'\|\|c=='%'\|\|c=='$'\|\|c=='^'\|\|c=='\\')
			`// calculation operators may have two chars, for example: += -= *= /= ~= != == >= <=`
			`#define IS_CALC_OPERATOR(c) (c=='='\|\|c=='+'\|\|c=='-'\|\|c=='*'\|\|c=='!'\|\|c=='/'\|\|c=='<'\|\|c=='>'\|\|c=='~')`
update 2020-11-02 12:24:12 +08:00			`#define IS_NOTE(c) (c=='#')`
change project structure version 3.0 2020-10-23 14:53:04 +08:00
update 2020-11-30 23:54:32 +08:00			`enum token_type`
			`{`
			`tok_null=0,`
			`tok_number,tok_string,tok_identifier,`
			`tok_for,tok_forindex,tok_foreach,tok_while,`
			`tok_var,tok_func,tok_break,tok_continue,`
			`tok_return,tok_if,tok_elsif,tok_else,tok_nil,`
			`tok_left_curve,tok_right_curve,`
			`tok_left_bracket,tok_right_bracket,`
			`tok_left_brace,tok_right_brace,`
			`tok_semi,tok_and,tok_or,tok_comma,tok_dot,tok_ellipsis,tok_quesmark,`
			`tok_colon,tok_add,tok_sub,tok_mult,tok_div,tok_link,tok_not,`
			`tok_equal,`
			`tok_add_equal,tok_sub_equal,tok_mult_equal,tok_div_equal,tok_link_equal,`
update 2020-12-14 00:10:31 +08:00			`tok_cmp_equal,tok_cmp_not_equal,tok_less_than,tok_less_equal,tok_greater_than,tok_greater_equal`
update 2020-11-30 23:54:32 +08:00			`};`

update 2020-11-02 12:24:12 +08:00			`struct`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`{`
update 2020-11-02 12:24:12 +08:00			`const char* str;`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`int tok_type;`
update 2020-11-02 12:24:12 +08:00			`}token_table[]=`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`{`
			`{"for" ,tok_for },`
			`{"forindex",tok_forindex },`
			`{"foreach" ,tok_foreach },`
			`{"while" ,tok_while },`
			`{"var" ,tok_var },`
			`{"func" ,tok_func },`
			`{"break" ,tok_break },`
			`{"continue",tok_continue },`
			`{"return" ,tok_return },`
			`{"if" ,tok_if },`
			`{"elsif" ,tok_elsif },`
			`{"else" ,tok_else },`
			`{"nil" ,tok_nil },`
			`{"(" ,tok_left_curve },`
			`{")" ,tok_right_curve },`
			`{"[" ,tok_left_bracket },`
			`{"]" ,tok_right_bracket},`
			`{"{" ,tok_left_brace },`
			`{"}" ,tok_right_brace },`
			`{";" ,tok_semi },`
			`{"and" ,tok_and },`
			`{"or" ,tok_or },`
			`{"," ,tok_comma },`
			`{"." ,tok_dot },`
			`{"..." ,tok_ellipsis },`
			`{"?" ,tok_quesmark },`
			`{":" ,tok_colon },`
			`{"+" ,tok_add },`
			`{"-" ,tok_sub },`
			`{"*" ,tok_mult },`
			`{"/" ,tok_div },`
			`{"~" ,tok_link },`
			`{"!" ,tok_not },`
			`{"=" ,tok_equal },`
			`{"+=" ,tok_add_equal },`
			`{"-=" ,tok_sub_equal },`
			`{"*=" ,tok_mult_equal },`
			`{"/=" ,tok_div_equal },`
			`{"~=" ,tok_link_equal },`
			`{"==" ,tok_cmp_equal },`
			`{"!=" ,tok_cmp_not_equal},`
			`{"<" ,tok_less_than },`
update 2020-12-14 00:10:31 +08:00			`{"<=" ,tok_less_equal },`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`{">" ,tok_greater_than },`
			`{">=" ,tok_greater_equal},`
update 2020-11-02 12:24:12 +08:00			`{NULL ,-1 }`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`};`

			`struct token`
			`{`
			`int line;`
			`int type;`
			`std::string str;`
			`};`

			`class nasal_lexer`
			`{`
			`private:`
			`int error;`
update 2020-10-23 17:10:02 +08:00			`int res_size;`
			`int line;`
			`int ptr;`
update 2020-11-02 12:24:12 +08:00			`std::string line_code;`
update 2020-10-23 17:10:02 +08:00			`std::vector<char> res;`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`std::vector<token> token_list;`
update 2020-10-23 17:10:02 +08:00			`std::string identifier_gen();`
			`std::string number_gen();`
			`std::string string_gen();`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`public:`
			`void clear();`
update 2020-10-23 17:10:02 +08:00			`void openfile(std::string);`
update 2020-11-02 12:24:12 +08:00			`void die(std::string,int,int);`
update 2020-10-23 17:10:02 +08:00			`void scanner();`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`void print_token();`
			`int get_error();`
			`std::vector<token>& get_token_list();`
			`};`

			`void nasal_lexer::clear()`
			`{`
update 2020-10-23 17:10:02 +08:00			`error=0;`
			`res_size=0;`
			`line=0;`
			`ptr=0;`
update 2020-11-02 12:24:12 +08:00			`line_code="";`
update 2020-10-23 17:10:02 +08:00			`res.clear();`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`token_list.clear();`
			`return;`
			`}`

update 2020-10-23 17:10:02 +08:00			`void nasal_lexer::openfile(std::string filename)`
			`{`
			`error=0;`
			`res.clear();`
			`std::ifstream fin(filename,std::ios::binary);`
			`if(fin.fail())`
			`{`
			`++error;`
			`std::cout<<">> [lexer] cannot open file \""<<filename<<"\".\n";`
			`fin.close();`
			`return;`
			`}`
			`while(!fin.eof())`
			`{`
			`char c=fin.get();`
			`if(fin.eof())`
			`break;`
			`res.push_back(c);`
			`}`
			`fin.close();`
			`res_size=res.size();`
			`return;`
			`}`

update 2020-11-02 12:24:12 +08:00			`void nasal_lexer::die(std::string error_info,int line=-1,int column=-1)`
			`{`
			`++error;`
			`std::cout<<">> [lexer] line "<<line<<" column "<<column<<": "<<error_info<<"\n";`
			`return;`
			`}`

update 2020-10-23 17:10:02 +08:00			`std::string nasal_lexer::identifier_gen()`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`{`
			`std::string token_str="";`
update 2020-11-02 12:24:12 +08:00			`while(ptr<res_size && (IS_IDENTIFIER(res[ptr])\|\|IS_DIGIT(res[ptr])))`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`token_str+=res[ptr++];`
update 2020-11-02 12:24:12 +08:00			`line_code+=token_str;`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`return token_str;`
			`// after running this process, ptr will point to the next token's beginning character`
			`}`

update 2020-10-23 17:10:02 +08:00			`std::string nasal_lexer::number_gen()`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`{`
			`bool scientific_notation=false;// numbers like 1e8 are scientific_notation`
			`std::string token_str="";`
			`// generate hex number`
			`if(res[ptr]=='0' && ptr+1<res_size && res[ptr+1]=='x')`
			`{`
			`token_str="0x";`
			`ptr+=2;`
			`while(ptr<res_size && IS_HEX_NUMBER(res[ptr]))`
			`token_str+=res[ptr++];`
update 2020-11-02 12:24:12 +08:00			`line_code+=token_str;`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`if(token_str=="0x")`
			`{`
update 2020-11-02 12:24:12 +08:00			`die("["+line_code+"_] incorrect number.",line,line_code.length());`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`return "0";`
			`}`
			`return token_str;`
			`}`
			`// generate oct number`
			`else if(res[ptr]=='0' && ptr+1<res_size && res[ptr+1]=='o')`
			`{`
			`token_str="0o";`
			`ptr+=2;`
			`while(ptr<res_size && IS_OCT_NUMEBR(res[ptr]))`
			`token_str+=res[ptr++];`
update 2020-11-02 12:24:12 +08:00			`line_code+=token_str;`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`if(token_str=="0o")`
			`{`
update 2020-11-02 12:24:12 +08:00			`die("["+line_code+"_] incorrect number.",line,line_code.length());`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`return "0";`
			`}`
			`return token_str;`
			`}`
			`// generate dec number`
update 2020-11-02 12:24:12 +08:00			`// dec number -> [0~9][0~9](.[0~9])(e\|E(+\|-)0\|[1~9][0~9]*)`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`while(ptr<res_size && IS_DIGIT(res[ptr]))`
			`token_str+=res[ptr++];`
			`if(ptr<res_size && res[ptr]=='.')`
			`{`
			`token_str+=res[ptr++];`
			`// "xxxx." is not a correct number`
			`if(ptr>=res_size)`
			`{`
update 2020-11-02 12:24:12 +08:00			`line_code+=token_str;`
			`die("["+line_code+"_] incorrect number.",line,line_code.length());`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`return "0";`
			`}`
			`while(ptr<res_size && IS_DIGIT(res[ptr]))`
			`token_str+=res[ptr++];`
			`// "xxxx." is not a correct number`
			`if(token_str.back()=='.')`
			`{`
update 2020-11-02 12:24:12 +08:00			`line_code+=token_str;`
			`die("["+line_code+"_] incorrect number.",line,line_code.length());`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`return "0";`
			`}`
			`}`
			`if(ptr<res_size && (res[ptr]=='e' \|\| res[ptr]=='E'))`
			`{`
			`token_str+=res[ptr++];`
			`// "xxxe" is not a correct number`
			`if(ptr>=res_size)`
			`{`
update 2020-11-02 12:24:12 +08:00			`line_code+=token_str;`
			`die("["+line_code+"_] incorrect number.",line,line_code.length());`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`return "0";`
			`}`
			`if(ptr<res_size && (res[ptr]=='-' \|\| res[ptr]=='+'))`
			`token_str+=res[ptr++];`
			`if(ptr>=res_size)`
			`{`
update 2020-11-02 12:24:12 +08:00			`line_code+=token_str;`
			`die("["+line_code+"_] incorrect number.",line,line_code.length());`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`return "0";`
			`}`
			`if(ptr<res_size && res[ptr]=='0')`
			`token_str+=res[ptr++];`
			`while(ptr<res_size && IS_DIGIT(res[ptr]))`
			`token_str+=res[ptr++];`
			`// "xxxe(-\|+)" is not a correct number`
			`if(token_str.back()=='e' \|\| token_str.back()=='E' \|\| token_str.back()=='-' \|\| token_str.back()=='+')`
			`{`
update 2020-11-02 12:24:12 +08:00			`line_code+=token_str;`
			`die("["+line_code+"_] incorrect number.",line,line_code.length());`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`return "0";`
			`}`
			`}`
update 2020-11-02 12:24:12 +08:00			`line_code+=token_str;`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`return token_str;`
			`}`

update 2020-10-23 17:10:02 +08:00			`std::string nasal_lexer::string_gen()`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`{`
			`std::string token_str="";`
update 2020-11-02 12:24:12 +08:00			`line_code+=res[ptr];`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`char str_begin=res[ptr++];`
			`while(ptr<res_size && res[ptr]!=str_begin)`
			`{`
update 2020-11-02 12:24:12 +08:00			`line_code+=res[ptr];`
			`if(res[ptr]=='\n')`
			`{`
			`line_code="";`
			`++line;`
			`}`
			`if(res[ptr]=='\\' && ptr+1<res_size)`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`{`
			`++ptr;`
update 2020-11-02 12:24:12 +08:00			`line_code+=res[ptr];`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`switch(res[ptr])`
			`{`
			`case 'a':token_str.push_back('\a');break;`
			`case 'b':token_str.push_back('\b');break;`
			`case 'f':token_str.push_back('\f');break;`
			`case 'n':token_str.push_back('\n');break;`
			`case 'r':token_str.push_back('\r');break;`
			`case 't':token_str.push_back('\t');break;`
			`case 'v':token_str.push_back('\v');break;`
			`case '?':token_str.push_back('\?');break;`
			`case '0':token_str.push_back('\0');break;`
			`case '\\':token_str.push_back('\\');break;`
			`case '\'':token_str.push_back('\'');break;`
			`case '\"':token_str.push_back('\"');break;`
			`default: token_str.push_back(res[ptr]);break;`
			`}`
			`}`
			`else`
			`token_str+=res[ptr];`
			`++ptr;`
			`}`
			`// check if this string ends with a " or '`
			`if(ptr>=res_size)`
update 2020-11-02 12:24:12 +08:00			`die("["+line_code+"_] get EOF when generating string.",line,line_code.length());`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`++ptr;`
			`return token_str;`
			`}`

update 2020-10-23 17:10:02 +08:00			`void nasal_lexer::scanner()`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`{`
			`token_list.clear();`
update 2020-10-23 17:10:02 +08:00			`line=1;`
			`ptr=0;`
update 2020-11-02 12:24:12 +08:00			`line_code="";`
update 2020-10-23 17:10:02 +08:00
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`std::string token_str;`
			`while(ptr<res_size)`
			`{`
			`while(ptr<res_size && (res[ptr]==' ' \|\| res[ptr]=='\n' \|\| res[ptr]=='\t' \|\| res[ptr]=='\r' \|\| res[ptr]<0))`
			`{`
			`// these characters will be ignored, and '\n' will cause ++line`
update 2020-11-02 12:24:12 +08:00			`line_code+=res[ptr];`
			`if(res[ptr]=='\n')`
			`{`
			`++line;`
			`line_code="";`
			`}`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`++ptr;`
			`}`
			`if(ptr>=res_size) break;`
update 2020-11-02 12:24:12 +08:00			`if(IS_IDENTIFIER(res[ptr]))`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`{`
update 2020-10-23 17:10:02 +08:00			`token_str=identifier_gen();`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`token new_token;`
			`new_token.line=line;`
			`new_token.str=token_str;`
			`new_token.type=0;`
update 2020-11-02 12:24:12 +08:00			`for(int i=0;token_table[i].str;++i)`
			`if(token_str==token_table[i].str)`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`{`
update 2020-11-02 12:24:12 +08:00			`new_token.type=token_table[i].tok_type;`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`break;`
			`}`
			`if(!new_token.type)`
			`new_token.type=tok_identifier;`
			`token_list.push_back(new_token);`
			`}`
			`else if(IS_DIGIT(res[ptr]))`
			`{`
update 2020-10-23 17:10:02 +08:00			`token_str=number_gen();`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`token new_token;`
			`new_token.line=line;`
			`new_token.str=token_str;`
			`new_token.type=tok_number;`
			`token_list.push_back(new_token);`
			`}`
update 2020-11-02 12:24:12 +08:00			`else if(IS_STRING(res[ptr]))`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`{`
update 2020-10-23 17:10:02 +08:00			`token_str=string_gen();`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`token new_token;`
			`new_token.line=line;`
			`new_token.type=tok_string;`
			`new_token.str=token_str;`
			`token_list.push_back(new_token);`
			`}`
			`else if(IS_SINGLE_OPRATOR(res[ptr]))`
			`{`
			`token_str="";`
			`token_str+=res[ptr];`
update 2020-11-02 12:24:12 +08:00			`line_code+=res[ptr];`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`token new_token;`
			`new_token.line=line;`
			`new_token.str=token_str;`
update 2020-11-02 12:24:12 +08:00			`new_token.type=-1;`
			`for(int i=0;token_table[i].str;++i)`
			`if(token_str==token_table[i].str)`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`{`
update 2020-11-02 12:24:12 +08:00			`new_token.type=token_table[i].tok_type;`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`break;`
			`}`
update 2020-11-02 12:24:12 +08:00			`if(new_token.type<0)`
			`die("["+line_code+"_] incorrect operator.",line,line_code.length());`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`token_list.push_back(new_token);`
			`++ptr;`
			`}`
			`else if(res[ptr]=='.')`
			`{`
			`if(ptr+2<res_size && res[ptr+1]=='.' && res[ptr+2]=='.')`
			`{`
			`token_str="...";`
			`ptr+=3;`
			`}`
			`else`
			`{`
			`token_str=".";`
			`++ptr;`
			`}`
update 2020-11-02 12:24:12 +08:00			`line_code+=token_str;`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`token new_token;`
			`new_token.line=line;`
			`new_token.str=token_str;`
update 2020-11-02 12:24:12 +08:00			`for(int i=0;token_table[i].str;++i)`
			`if(token_str==token_table[i].str)`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`{`
update 2020-11-02 12:24:12 +08:00			`new_token.type=token_table[i].tok_type;`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`break;`
			`}`
			`token_list.push_back(new_token);`
			`}`
			`else if(IS_CALC_OPERATOR(res[ptr]))`
			`{`
			`// get calculation operator`
update 2020-11-02 12:24:12 +08:00			`token_str=res[ptr];`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`++ptr;`
			`if(ptr<res.size() && res[ptr]=='=')`
			`{`
			`token_str+=res[ptr];`
			`++ptr;`
			`}`
update 2020-11-02 12:24:12 +08:00			`line_code+=token_str;`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`token new_token;`
			`new_token.line=line;`
			`new_token.str=token_str;`
update 2020-11-02 12:24:12 +08:00			`for(int i=0;token_table[i].str;++i)`
			`if(token_str==token_table[i].str)`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`{`
update 2020-11-02 12:24:12 +08:00			`new_token.type=token_table[i].tok_type;`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`break;`
			`}`
			`token_list.push_back(new_token);`
			`}`
update 2020-11-02 12:24:12 +08:00			`else if(IS_NOTE(res[ptr]))`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`{`
			`// avoid note`
			`while(ptr<res_size && res[ptr]!='\n') ++ptr;`
			`// after this process ptr will point to a '\n'`
			`// don't ++ptr then the counter for line can work correctly`
			`}`
			`else`
			`{`
update 2020-11-02 12:24:12 +08:00			`line_code+=res[ptr];`
			`die("["+line_code+"_] unknown character.",line,line_code.length());`
change project structure version 3.0 2020-10-23 14:53:04 +08:00			`++ptr;`
			`}`
			`}`
			`return;`
			`}`

			`void nasal_lexer::print_token()`
			`{`
			`int size=token_list.size();`
			`for(int i=0;i<size;++i)`
			`std::cout<<"("<<token_list[i].line<<" \| "<<token_list[i].str<<")\n";`
			`return;`
			`}`

			`int nasal_lexer::get_error()`
			`{`
			`return error;`
			`}`

			`std::vector<token>& nasal_lexer::get_token_list()`
			`{`
			`return token_list;`
			`}`
the beginning of a new interpreter 2020-06-01 01:19:26 +08:00			`#endif`