Nasal-Interpreter/version0.7/nasal_parser.h

661 lines
19 KiB
C++

#ifndef __NASAL_PARSER_H__
#define __NASAL_PARSER_H__
#include <stack>
enum token_type
{
__stack_end,
__null_end,// $
__equal,// =
__cmp_equal,// ==
__cmp_not_equal,// !=
__cmp_less,__cmp_less_or_equal,// < <=
__cmp_more,__cmp_more_or_equal,// > >=
__and_operator,__or_operator,__nor_operator,// and or !
__add_operator,__sub_operator,__mul_operator,__div_operator,__link_operator,// + - * / ~
__add_equal,__sub_equal,__mul_equal,__div_equal,__link_equal,// += -= *= /= ~=
__left_brace,__right_brace,// {}
__left_bracket,__right_bracket,// []
__left_curve,__right_curve,// ()
__semi,// ;
__comma,// ,
__colon,// :
__dot,// .
__var,// var reserve word
__func,// func reserve word
__id,
__identifier,__identifiers,__identifier_end,__lacked_identifier,
__parameter,__parameters,__parameter_end,
__number,__string,
__scalar,__scalars,__scalar_end,
__list,__list_end,
__hash,__hash_end,
__hash_member,__hash_members,__hash_member_end,__hash_search,
__statement,__statements,
__function,__call_function,__call_function_end,//function(){}
__definition,
__assignment,__pre_assignment,
__calculation,
__loop,__continue,__break,__for,__forindex,__foreach,__while,// for()while() continue; break;
__choose,__if,__elsif,__else,// if else if else
__return
};
void print_token_type(int type)
{
std::string context="";
switch(type)
{
case __stack_end:
context="__stack_end";
break;
case __equal:
context="=";
break;
case __cmp_equal:
context="==";
break;
case __cmp_not_equal:
context="!=";
break;
case __cmp_less:
context="<";
break;
case __cmp_less_or_equal:
context="<=";
break;
case __cmp_more:
context=">";
break;
case __cmp_more_or_equal:
context=">=";
break;
case __and_operator:
context="and";
break;
case __or_operator:
context="or";
break;
case __nor_operator:
context="!";
break;
case __add_operator:
context="+";
break;
case __sub_operator:
context="-";
break;
case __mul_operator:
context="*";
break;
case __div_operator:
context="/";
break;
case __link_operator:
context="~";
break;
case __add_equal:
context="+=";
break;
case __sub_equal:
context="-=";
break;
case __mul_equal:
context="*=";
break;
case __div_equal:
context="/=";
break;
case __link_equal:
context="~=";
break;
case __left_brace:
context="{";
break;
case __right_brace:
context="}";
break;
case __left_bracket:
context="[";
break;
case __right_bracket:
context="]";
break;
case __left_curve:
context="(";
break;
case __right_curve:
context=")";
break;
case __semi:
context=";";
break;
case __comma:
context=",";
break;
case __colon:
context=":";
break;
case __dot:
context=".";
break;
case __var:
context="var";
break;
case __func:
context="func";
break;
case __id:
context="id";
break;
case __identifier:
context="identifier";
break;
case __identifiers:
context="identifiers";
break;
case __scalar:
context="scalar";
break;
case __scalars:
context="scalars";
break;
case __list:
context="list";
break;
case __hash:
context="hash";
break;
case __hash_member:
context="hash_member";
break;
case __hash_members:
context="hash_members";
break;
case __continue:
context="continue";
break;
case __break:
context="break";
break;
case __for:
context="for";
break;
case __forindex:
context="forindex";
break;
case __foreach:
context="foreach";
break;
case __while:
context="while";
break;
case __if:
context="if";
break;
case __elsif:
context="elsif";
break;
case __else:
context="else";
break;
case __return:
context="return";
break;
case __calculation:
context="calculation";
break;
case __function:
context="function_module";
break;
case __definition:
context="definition";
break;
case __number:
context="number";
break;
case __string:
context="string";
break;
}
std::cout<<context;
return;
}
const int max_token_len=20;
struct cmp_seq
{
int tokens[max_token_len];
int res;
}par[]=
{
{{__left_bracket,__list_end}, __list},
{{__number,__list_end}, __list_end},
{{__id,__list_end}, __list_end},
{{__identifier,__list_end}, __list_end},
{{__string,__list_end}, __list_end},
{{__list,__list_end}, __list_end},
{{__hash,__list_end}, __list_end},
{{__comma,__number,__list_end}, __list_end},
{{__comma,__id,__list_end}, __list_end},
{{__comma,__identifier,__list_end}, __list_end},
{{__comma,__string,__list_end}, __list_end},
{{__comma,__list,__list_end}, __list_end},
{{__comma,__hash,__list_end}, __list_end},
{{__right_bracket}, __list_end},
{{__id,__colon,__number}, __hash_member},
{{__id,__colon,__id}, __hash_member},
{{__id,__colon,__identifier}, __hash_member},
{{__id,__colon,__string}, __hash_member},
{{__id,__colon,__list}, __hash_member},
{{__id,__colon,__hash}, __hash_member},
{{__left_brace,__hash_end}, __hash},
{{__hash_member,__hash_end}, __hash_end},
{{__comma,__hash_member,__hash_end}, __hash_end},
{{__right_brace}, __hash_end},
{{__id,__dot,__hash_search}, __hash_search},
{{__identifier,__dot,__hash_search}, __hash_search},
{{__identifier}, __hash_search},
{{__id}, __hash_search},
{{__call_function}, __identifier},
{{__id,__left_bracket,__number,__right_bracket}, __identifier},
{{__id,__left_bracket,__id,__right_bracket}, __identifier},
{{__id,__left_bracket,__identifier,__right_bracket}, __identifier},
{{__id,__left_curve,__call_function_end}, __call_function},
{{__number,__call_function_end}, __call_function_end},
{{__id,__call_function_end}, __call_function_end},
{{__identifier,__call_function_end}, __call_function_end},
{{__string,__call_function_end}, __call_function_end},
{{__list,__call_function_end}, __call_function_end},
{{__hash,__call_function_end}, __call_function_end},
{{__function,__call_function_end}, __call_function_end},
{{__comma,__number,__call_function_end}, __call_function_end},
{{__comma,__id,__call_function_end}, __call_function_end},
{{__comma,__identifier,__call_function_end}, __call_function_end},
{{__comma,__string,__call_function_end}, __call_function_end},
{{__comma,__list,__call_function_end}, __call_function_end},
{{__comma,__hash,__call_function_end}, __call_function_end},
{{__comma,__function,__call_function_end}, __call_function_end},
{{__right_curve}, __call_function_end},
{{__var,__id,__equal,__number,__semi}, __definition},
{{__var,__id,__equal,__id,__semi}, __definition},
{{__var,__id,__equal,__identifier,__semi}, __definition},
{{__var,__id,__equal,__string,__semi}, __definition},
{{__var,__id,__equal,__list,__semi}, __definition},
{{__var,__id,__equal,__hash,__semi}, __definition},
{{__var,__id,__equal,__hash_search,__semi}, __definition},
{{__var,__id,__equal,__number,__semi}, __assignment},
{{__var,__id,__equal,__id,__semi}, __assignment},
{{__var,__id,__equal,__identifier,__semi}, __assignment},
{{__var,__id,__equal,__string,__semi}, __assignment},
{{__var,__id,__equal,__list,__semi}, __assignment},
{{__var,__id,__equal,__hash,__semi}, __assignment},
{{__var,__id,__equal,__hash_search,__semi}, __assignment},
{{__definition}, __statement},
{{__assignment}, __statement}
};
int num_of_par=sizeof(par)/sizeof(cmp_seq);
struct parse_unit
{
int line;
int type;
};
class PDA
{
private:
std::stack<parse_unit> main_stack;
std::stack<parse_unit> error_stack;
bool stack_running_problem_check;
public:
PDA()
{
stack_running_problem_check=false;
//when the DFS searched too deep (over 1024 levels) the stack_running_problem_check
//will be set to true then return ro the main_progress
//when main_progress finds that the stack_running_problem_check is true
//then main_progress ends with an error : stack out of range
//and return the line where the error occurred
}
void stack_input(std::stack<parse_unit>& temp)
{
while(!temp.empty())
{
main_stack.push(temp.top());
temp.pop();
}
return;
}
void print_error()
{
if(!error_stack.empty())
{
std::stack<parse_unit> temp;
while(!error_stack.empty())
{
temp.push(error_stack.top());
error_stack.pop();
}
std::cout<<">>[Error] Parse error:";
int now_line=0;
while(!temp.empty())
{
if(temp.top().line!=now_line)
{
now_line=temp.top().line;
std::cout<<std::endl<<"line "<<now_line<<" ";
}
print_token_type(temp.top().type);
std::cout<<" ";
temp.pop();
}
std::cout<<std::endl;
}
else
std::cout<<">>[Parse] 0 error occurred."<<std::endl;
return;
}
bool extend_comp_progress(const int type)
{
static int depth=0;
++depth;
if(stack_running_problem_check)
return false;
if(depth==1024)
{
stack_running_problem_check=true;
return false;
}
std::stack<parse_unit> recognized_stack;
std::stack<parse_unit> comp_stack;
parse_unit temp;
temp.line=0;
temp.type=0;
for(int i=0;i<num_of_par;++i)
{
if(par[i].res==type)
{
while(!comp_stack.empty())
comp_stack.pop();
for(int j=max_token_len-1;j>=0;--j)
{
if(par[i].tokens[j])
{
temp.type=par[i].tokens[j];
comp_stack.push(temp);
}
}
while((!comp_stack.empty()) && (!main_stack.empty()))
{
if(comp_stack.top().type==main_stack.top().type)
{
comp_stack.pop();
recognized_stack.push(main_stack.top());
main_stack.pop();
}
else if(comp_stack.top().type!=main_stack.top().type)
{
if(!extend_comp_progress(comp_stack.top().type))
{
//if after all search the real type
//can not be found,then recognized_stack
//returns all the tokens in it
//to make sure the next comparation works
while(!recognized_stack.empty())
{
main_stack.push(recognized_stack.top());
recognized_stack.pop();
}
break;
}
else
comp_stack.pop();
}
}
if(comp_stack.empty())
{
--depth;
return true;
}
}
}
--depth;
return false;
}
void main_comp_progress()
{
std::stack<parse_unit> recognized_stack;
std::stack<parse_unit> comp_stack;
parse_unit temp;
temp.line=0;
temp.type=0;
while(!main_stack.empty())
{
bool ispar=false;
for(int i=0;i<num_of_par;++i)
{
while(!comp_stack.empty())
comp_stack.pop();
for(int j=max_token_len-1;j>=0;--j)
{
if(par[i].tokens[j])
{
temp.type=par[i].tokens[j];
comp_stack.push(temp);
}
}
while((!comp_stack.empty()) && (!main_stack.empty()))
{
//because of the check !omp_stack.empty()
//every token sequence should not be null
if(comp_stack.top().type==main_stack.top().type)
{
comp_stack.pop();
recognized_stack.push(main_stack.top());
main_stack.pop();
}
else if(comp_stack.top().type!=main_stack.top().type)
{
if(!extend_comp_progress(comp_stack.top().type))
{
if(stack_running_problem_check)
{
std::cout<<">>[Parse] Stack out of range in line "<<main_stack.top().line<<std::endl;
return;
}
while(!recognized_stack.empty())
{
main_stack.push(recognized_stack.top());
recognized_stack.pop();
}
break;
}
else
comp_stack.pop();
}
}
if(comp_stack.empty())
{
while(!recognized_stack.empty())
recognized_stack.pop();
ispar=true;
break;
}
}
if(!ispar && !main_stack.empty())
{
error_stack.push(main_stack.top());
main_stack.pop();
}
}
std::cout<<"[Parse] Complete checking."<<std::endl;
print_error();
return;
}
};
class nasal_parser
{
private:
std::stack<parse_unit> parser;
public:
void parse_process(std::list<token>& lexer)
{
while(!parser.empty())
parser.pop();
for(std::list<token>::iterator i=lexer.begin();i!=lexer.end();++i)
{
parse_unit temp_parse;
temp_parse.line=(*i).line;
if(((*i).content=="var") || ((*i).content=="func") || ((*i).content=="return") || ((*i).content=="nil") || ((*i).content=="continue") || ((*i).content=="break") || ((*i).content=="and") || ((*i).content=="or"))
{
if((*i).content=="var")
temp_parse.type=__var;
else if((*i).content=="func")
temp_parse.type=__func;
else if((*i).content=="return")
temp_parse.type=__return;
else if((*i).content=="nil")
temp_parse.type=__scalar;
else if((*i).content=="continue")
temp_parse.type=__continue;
else if((*i).content=="break")
temp_parse.type=__break;
else if((*i).content=="and")
temp_parse.type=__and_operator;
else if((*i).content=="or")
temp_parse.type=__or_operator;
}
else if((*i).type==IDENTIFIER)
{
temp_parse.type=__id;
}
else if(((*i).content=="for") || ((*i).content=="foreach") || ((*i).content=="while") || ((*i).content=="forindex"))
{
if((*i).content=="for")
temp_parse.type=__for;
else if((*i).content=="forindex")
temp_parse.type=__forindex;
else if((*i).content=="foreach")
temp_parse.type=__foreach;
else if((*i).content=="while")
temp_parse.type=__while;
}
else if(((*i).content=="if") || ((*i).content=="else") || ((*i).content=="elsif"))
{
if((*i).content=="if")
temp_parse.type=__if;
else if((*i).content=="else")
temp_parse.type=__else;
else if((*i).content=="elsif")
temp_parse.type=__elsif;
}
else if(((*i).content=="==") || ((*i).content=="!=") || ((*i).content==">") || ((*i).content==">=") || ((*i).content=="<") || ((*i).content=="<="))
{
if((*i).content=="==")
temp_parse.type=__cmp_equal;
else if((*i).content=="!=")
temp_parse.type=__cmp_not_equal;
else if((*i).content==">")
temp_parse.type=__cmp_more;
else if((*i).content==">=")
temp_parse.type=__cmp_more_or_equal;
else if((*i).content=="<")
temp_parse.type=__cmp_less;
else if((*i).content=="<=")
temp_parse.type=__cmp_less_or_equal;
}
else if(((*i).content==";") || ((*i).content==",") || ((*i).content=="=") || ((*i).content==":") || ((*i).content=="."))
{
if((*i).content==";")
temp_parse.type=__semi;
else if((*i).content==",")
temp_parse.type=__comma;
else if((*i).content=="=")
temp_parse.type=__equal;
else if((*i).content==":")
temp_parse.type=__colon;
else if((*i).content==".")
temp_parse.type=__dot;
}
else if(((*i).type==NUMBER) || ((*i).type==STRING))
{
if((*i).type==NUMBER)
temp_parse.type=__number;
else
temp_parse.type=__string;
}
else if(((*i).content=="+") || ((*i).content=="-") || ((*i).content=="*") || ((*i).content=="/") || ((*i).content=="~") || ((*i).content=="!"))
{
if((*i).content=="+")
temp_parse.type=__add_operator;
else if((*i).content=="-")
temp_parse.type=__sub_operator;
else if((*i).content=="*")
temp_parse.type=__mul_operator;
else if((*i).content=="/")
temp_parse.type=__div_operator;
else if((*i).content=="~")
temp_parse.type=__link_operator;
else if((*i).content=="!")
temp_parse.type=__nor_operator;
}
else if(((*i).content=="+=") || ((*i).content=="-=") || ((*i).content=="*=") || ((*i).content=="/=") || ((*i).content=="~="))
{
if((*i).content=="+=")
temp_parse.type=__add_equal;
else if((*i).content=="-=")
temp_parse.type=__sub_equal;
else if((*i).content=="*=")
temp_parse.type=__mul_equal;
else if((*i).content=="/=")
temp_parse.type=__div_equal;
else if((*i).content=="~=")
temp_parse.type=__link_equal;
}
else if(((*i).content=="(") || ((*i).content==")") || ((*i).content=="[") || ((*i).content=="]") || ((*i).content=="{") || ((*i).content=="}"))
{
char c=(*i).content[0];
switch(c)
{
case '(':
temp_parse.type=__left_curve;
break;
case ')':
temp_parse.type=__right_curve;
break;
case '[':
temp_parse.type=__left_bracket;
break;
case ']':
temp_parse.type=__right_bracket;
break;
case '{':
temp_parse.type=__left_brace;
break;
case '}':
temp_parse.type=__right_brace;
break;
}
}
parser.push(temp_parse);//push this into stack
}
PDA automata;
automata.stack_input(parser);
automata.main_comp_progress();
return;
}
};
#endif