✨ add utf-8 identifier check in nasal_lexer & fix printf format at windows platform & add test file utf8chk.nas
This commit is contained in:
parent
de262980cc
commit
51a1279110
29
nasal.h
29
nasal.h
|
@ -32,6 +32,16 @@
|
|||
#include <sys/wait.h>
|
||||
#endif
|
||||
|
||||
#ifndef _WIN32
|
||||
#define PRTHEX64 "0x%lx"
|
||||
#define PRTHEX64_8 "0x%.8lx"
|
||||
#define PRTINT64 "%ld"
|
||||
#else
|
||||
#define PRTHEX64 "0x%llx"
|
||||
#define PRTHEX64_8 "0x%.8llx"
|
||||
#define PRTINT64 "%lld"
|
||||
#endif
|
||||
|
||||
inline double hex_to_double(const char* str)
|
||||
{
|
||||
double ret=0;
|
||||
|
@ -112,6 +122,25 @@ double str2num(const char* str)
|
|||
return is_negative?-ret_num:ret_num;
|
||||
}
|
||||
|
||||
int utf8_hdchk(char head)
|
||||
{
|
||||
uint8_t c=(uint8_t)head;
|
||||
uint32_t nbytes=0;
|
||||
if((c>>5)==0x06) // 110x xxxx (10xx xxxx)^1
|
||||
nbytes=1;
|
||||
if((c>>4)==0x0e) // 1110 xxxx (10xx xxxx)^2
|
||||
nbytes=2;
|
||||
if((c>>3)==0x1e) // 1111 0xxx (10xx xxxx)^3
|
||||
nbytes=3;
|
||||
if((c>>2)==0x3e) // 1111 10xx (10xx xxxx)^4
|
||||
nbytes=4;
|
||||
if((c>>1)==0x7e) // 1111 110x (10xx xxxx)^5
|
||||
nbytes=5;
|
||||
if(c==0xfe) // 1111 1110 (10xx xxxx)^6
|
||||
nbytes=6;
|
||||
return nbytes;
|
||||
}
|
||||
|
||||
std::string rawstr(const std::string& str)
|
||||
{
|
||||
std::string ret("");
|
||||
|
|
|
@ -1287,7 +1287,7 @@ void nasal_codegen::print_op(uint32_t index)
|
|||
case op_calll: case op_mcalll: case op_loadl:
|
||||
printf("0x%x\n",c.num);break;
|
||||
case op_callb:
|
||||
printf("0x%x <%s@0x%lx>\n",c.num,builtin[c.num].name,(uint64_t)builtin[c.num].func);break;
|
||||
printf("0x%x <%s@" PRTHEX64 ">\n",c.num,builtin[c.num].name,(uint64_t)builtin[c.num].func);break;
|
||||
case op_upval:case op_mupval: case op_loadu:
|
||||
printf("0x%x[0x%x]\n",(c.num>>16)&0xffff,c.num&0xffff);break;
|
||||
case op_happ: case op_pstr:
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
#ifndef __NASAL_LEXER_H__
|
||||
#define __NASAL_LEXER_H__
|
||||
|
||||
#define ID(c) ((c=='_')||('a'<=c && c<='z')||('A'<=c&&c<='Z'))
|
||||
#define ID(c) ((c=='_')||('a'<=c && c<='z')||('A'<=c&&c<='Z')||(c<0))
|
||||
#define HEX(c) (('0'<=c&&c<='9')||('a'<=c&&c<='f')||('A'<=c && c<='F'))
|
||||
#define OCT(c) ('0'<=c&&c<='7')
|
||||
#define DIGIT(c) ('0'<=c&&c<='9')
|
||||
|
@ -114,6 +114,7 @@ private:
|
|||
uint32_t get_type(const std::string&);
|
||||
void die(std::string info){nerr.err("lexer",line,column,info);};
|
||||
void open(const std::string&);
|
||||
std::string utf8_gen();
|
||||
std::string id_gen();
|
||||
std::string num_gen();
|
||||
std::string str_gen();
|
||||
|
@ -151,12 +152,43 @@ uint32_t nasal_lexer::get_type(const std::string& tk_str)
|
|||
return tok_null;
|
||||
}
|
||||
|
||||
std::string nasal_lexer::utf8_gen()
|
||||
{
|
||||
std::string str="";
|
||||
while(ptr<res.size() && res[ptr]<0)
|
||||
{
|
||||
std::string tmp="";
|
||||
uint32_t nbytes=utf8_hdchk(res[ptr]);
|
||||
if(nbytes)
|
||||
{
|
||||
tmp+=res[ptr++];
|
||||
for(uint32_t i=0;i<nbytes;++i,++ptr)
|
||||
if(ptr<res.size() && (res[ptr]&0xc0)==0x80)
|
||||
tmp+=res[ptr];
|
||||
if(tmp.length()!=1+nbytes)
|
||||
die("invalid utf-8 character here");
|
||||
str+=tmp;
|
||||
++column;
|
||||
}
|
||||
else
|
||||
++ptr;
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
std::string nasal_lexer::id_gen()
|
||||
{
|
||||
std::string str="";
|
||||
while(ptr<res.size() && (ID(res[ptr])||DIGIT(res[ptr])))
|
||||
str+=res[ptr++];
|
||||
column+=str.length();
|
||||
{
|
||||
if(res[ptr]<0) // utf-8
|
||||
str+=utf8_gen();
|
||||
else // ascii
|
||||
{
|
||||
str+=res[ptr++];
|
||||
++column;
|
||||
}
|
||||
}
|
||||
return str;
|
||||
}
|
||||
|
||||
|
@ -283,7 +315,7 @@ void nasal_lexer::scan(const std::string& file)
|
|||
std::string str;
|
||||
while(ptr<res.size())
|
||||
{
|
||||
while(ptr<res.size() && (res[ptr]==' ' || res[ptr]=='\n' || res[ptr]=='\t' || res[ptr]=='\r' || res[ptr]<0))
|
||||
while(ptr<res.size() && (res[ptr]==' ' || res[ptr]=='\n' || res[ptr]=='\t' || res[ptr]=='\r'))// || res[ptr]<0))
|
||||
{
|
||||
// these characters will be ignored, and '\n' will cause ++line
|
||||
++column;
|
||||
|
|
34
nasal_vm.h
34
nasal_vm.h
|
@ -148,20 +148,20 @@ void nasal_vm::valinfo(nasal_ref& val)
|
|||
{
|
||||
case vm_none: printf("| null |\n");break;
|
||||
case vm_ret: printf("| pc | 0x%x\n",val.ret());break;
|
||||
case vm_addr: printf("| addr | 0x%lx\n",(uint64_t)val.addr());break;
|
||||
case vm_cnt: printf("| cnt | %ld\n",val.cnt());break;
|
||||
case vm_addr: printf("| addr | " PRTHEX64 "\n",(uint64_t)val.addr());break;
|
||||
case vm_cnt: printf("| cnt | " PRTINT64 "\n",val.cnt());break;
|
||||
case vm_nil: printf("| nil |\n");break;
|
||||
case vm_num: printf("| num | ");std::cout<<val.num()<<'\n';break;
|
||||
case vm_str:
|
||||
{
|
||||
std::string tmp=rawstr(val.str());
|
||||
printf("| str | <0x%lx> %.16s%s\n",(uint64_t)p,tmp.c_str(),tmp.length()>16?"...":"");
|
||||
printf("| str | <" PRTHEX64 "> %.16s%s\n",(uint64_t)p,tmp.c_str(),tmp.length()>16?"...":"");
|
||||
}break;
|
||||
case vm_func: printf("| func | <0x%lx> entry:0x%x\n",(uint64_t)p,val.func().entry);break;
|
||||
case vm_vec: printf("| vec | <0x%lx> [%zu val]\n",(uint64_t)p,val.vec().size());break;
|
||||
case vm_hash: printf("| hash | <0x%lx> {%zu val}\n",(uint64_t)p,val.hash().size());break;
|
||||
case vm_obj: printf("| obj | <0x%lx> obj:0x%lx\n",(uint64_t)p,(uint64_t)val.obj().ptr);break;
|
||||
default: printf("| err | <0x%lx> unknown object\n",(uint64_t)p);break;
|
||||
case vm_func: printf("| func | <" PRTHEX64 "> entry:0x%x\n",(uint64_t)p,val.func().entry);break;
|
||||
case vm_vec: printf("| vec | <" PRTHEX64 "> [%zu val]\n",(uint64_t)p,val.vec().size());break;
|
||||
case vm_hash: printf("| hash | <" PRTHEX64 "> {%zu val}\n",(uint64_t)p,val.hash().size());break;
|
||||
case vm_obj: printf("| obj | <" PRTHEX64 "> obj:" PRTHEX64 "\n",(uint64_t)p,(uint64_t)val.obj().ptr);break;
|
||||
default: printf("| err | <" PRTHEX64 "> unknown object\n",(uint64_t)p);break;
|
||||
}
|
||||
}
|
||||
void nasal_vm::bytecodeinfo(const char* header,const uint32_t p)
|
||||
|
@ -202,7 +202,7 @@ void nasal_vm::bytecodeinfo(const char* header,const uint32_t p)
|
|||
case op_calll: case op_mcalll: case op_loadl:
|
||||
printf("0x%x",c.num);break;
|
||||
case op_callb:
|
||||
printf("0x%x <%s@0x%lx>",c.num,builtin[c.num].name,(uint64_t)builtin[c.num].func);break;
|
||||
printf("0x%x <%s@" PRTHEX64 ">",c.num,builtin[c.num].name,(uint64_t)builtin[c.num].func);break;
|
||||
case op_upval: case op_mupval: case op_loadu:
|
||||
printf(" (0x%x[0x%x])",(c.num>>16)&0xffff,c.num&0xffff);break;
|
||||
case op_happ: case op_pstr:
|
||||
|
@ -251,16 +251,16 @@ void nasal_vm::stackinfo(const uint32_t limit=10)
|
|||
uint32_t gsize=bytecode[0].num;
|
||||
nasal_ref* top=gc.top;
|
||||
nasal_ref* bottom=gc.stack+gsize;
|
||||
printf("vm stack(0x%lx<sp+%u>, limit %d, total ",(uint64_t)bottom,gsize,limit);
|
||||
printf("vm stack(" PRTHEX64 "<sp+%u>, limit %d, total ",(uint64_t)bottom,gsize,limit);
|
||||
if(top<bottom)
|
||||
{
|
||||
printf("0)\n");
|
||||
return;
|
||||
}
|
||||
printf("%ld):\n",top-bottom+1);
|
||||
printf("" PRTINT64 "):\n",top-bottom+1);
|
||||
for(uint32_t i=0;i<limit && top>=bottom;++i,--top)
|
||||
{
|
||||
printf(" 0x%.8lx",top-gc.stack);
|
||||
printf(" " PRTHEX64_8 "",top-gc.stack);
|
||||
valinfo(top[0]);
|
||||
}
|
||||
}
|
||||
|
@ -268,7 +268,7 @@ void nasal_vm::global_state()
|
|||
{
|
||||
if(!bytecode[0].num || gc.stack[0].type==vm_none) // bytecode[0].op is op_intg
|
||||
return;
|
||||
printf("global(0x%lx<sp+0>):\n",(uint64_t)gc.stack);
|
||||
printf("global(" PRTHEX64 "<sp+0>):\n",(uint64_t)gc.stack);
|
||||
for(uint32_t i=0;i<bytecode[0].num;++i)
|
||||
{
|
||||
printf(" 0x%.8x",i);
|
||||
|
@ -280,7 +280,7 @@ void nasal_vm::local_state()
|
|||
if(!localr || !gc.funcr.func().lsize)
|
||||
return;
|
||||
uint32_t lsize=gc.funcr.func().lsize;
|
||||
printf("local(0x%lx<sp+%ld>):\n",(uint64_t)localr,localr-gc.stack);
|
||||
printf("local(" PRTHEX64 "<sp+" PRTINT64 ">):\n",(uint64_t)localr,localr-gc.stack);
|
||||
for(uint32_t i=0;i<lsize;++i)
|
||||
{
|
||||
printf(" 0x%.8x",i);
|
||||
|
@ -306,12 +306,12 @@ void nasal_vm::upval_state()
|
|||
}
|
||||
void nasal_vm::detail()
|
||||
{
|
||||
printf("maddr:\n (0x%lx)\n",(uint64_t)mem_addr);
|
||||
printf("localr:\n (0x%lx)\n",(uint64_t)localr);
|
||||
printf("maddr:\n (" PRTHEX64 ")\n",(uint64_t)mem_addr);
|
||||
printf("localr:\n (" PRTHEX64 ")\n",(uint64_t)localr);
|
||||
if(gc.funcr.type==vm_nil)
|
||||
printf("funcr:\n (nil)\n");
|
||||
else
|
||||
printf("funcr:\n (<0x%lx> entry:0x%x)\n",
|
||||
printf("funcr:\n (<" PRTHEX64 "> entry:0x%x)\n",
|
||||
(uint64_t)gc.funcr.value.gcobj,
|
||||
gc.funcr.func().entry);
|
||||
global_state();
|
||||
|
|
|
@ -65,8 +65,9 @@ var testfile=[
|
|||
"test/tetris.nas ",
|
||||
"test/trait.nas ",
|
||||
"test/turingmachine.nas",
|
||||
"test/ycombinator.nas ",
|
||||
"test/wavecollapse.nas "
|
||||
"test/utf8chk.nas ",
|
||||
"test/wavecollapse.nas ",
|
||||
"test/ycombinator.nas "
|
||||
];
|
||||
|
||||
var module=[
|
||||
|
|
|
@ -0,0 +1,11 @@
|
|||
var 输出=print;
|
||||
var 这是unicode测试=func(){
|
||||
var 测试成功=[
|
||||
"unicode: utf-8支持测试成功",
|
||||
"目前仅支持utf-8以及ascii格式文件",
|
||||
"注意: windows系统请开启chcp 65001代码页"
|
||||
];
|
||||
foreach(var 内容;测试成功)
|
||||
输出(内容~"\n");
|
||||
}
|
||||
这是unicode测试();
|
Loading…
Reference in New Issue