diff --git a/nasal.h b/nasal.h index 29d7f9e..67d436f 100644 --- a/nasal.h +++ b/nasal.h @@ -32,6 +32,16 @@ #include #endif +#ifndef _WIN32 +#define PRTHEX64 "0x%lx" +#define PRTHEX64_8 "0x%.8lx" +#define PRTINT64 "%ld" +#else +#define PRTHEX64 "0x%llx" +#define PRTHEX64_8 "0x%.8llx" +#define PRTINT64 "%lld" +#endif + inline double hex_to_double(const char* str) { double ret=0; @@ -112,6 +122,25 @@ double str2num(const char* str) return is_negative?-ret_num:ret_num; } +int utf8_hdchk(char head) +{ + uint8_t c=(uint8_t)head; + uint32_t nbytes=0; + if((c>>5)==0x06) // 110x xxxx (10xx xxxx)^1 + nbytes=1; + if((c>>4)==0x0e) // 1110 xxxx (10xx xxxx)^2 + nbytes=2; + if((c>>3)==0x1e) // 1111 0xxx (10xx xxxx)^3 + nbytes=3; + if((c>>2)==0x3e) // 1111 10xx (10xx xxxx)^4 + nbytes=4; + if((c>>1)==0x7e) // 1111 110x (10xx xxxx)^5 + nbytes=5; + if(c==0xfe) // 1111 1110 (10xx xxxx)^6 + nbytes=6; + return nbytes; +} + std::string rawstr(const std::string& str) { std::string ret(""); diff --git a/nasal_codegen.h b/nasal_codegen.h index 1683888..f46453c 100644 --- a/nasal_codegen.h +++ b/nasal_codegen.h @@ -1287,7 +1287,7 @@ void nasal_codegen::print_op(uint32_t index) case op_calll: case op_mcalll: case op_loadl: printf("0x%x\n",c.num);break; case op_callb: - printf("0x%x <%s@0x%lx>\n",c.num,builtin[c.num].name,(uint64_t)builtin[c.num].func);break; + printf("0x%x <%s@" PRTHEX64 ">\n",c.num,builtin[c.num].name,(uint64_t)builtin[c.num].func);break; case op_upval:case op_mupval: case op_loadu: printf("0x%x[0x%x]\n",(c.num>>16)&0xffff,c.num&0xffff);break; case op_happ: case op_pstr: diff --git a/nasal_lexer.h b/nasal_lexer.h index 44b7299..84d4568 100644 --- a/nasal_lexer.h +++ b/nasal_lexer.h @@ -1,7 +1,7 @@ #ifndef __NASAL_LEXER_H__ #define __NASAL_LEXER_H__ -#define ID(c) ((c=='_')||('a'<=c && c<='z')||('A'<=c&&c<='Z')) +#define ID(c) ((c=='_')||('a'<=c && c<='z')||('A'<=c&&c<='Z')||(c<0)) #define HEX(c) (('0'<=c&&c<='9')||('a'<=c&&c<='f')||('A'<=c && c<='F')) #define OCT(c) ('0'<=c&&c<='7') #define DIGIT(c) ('0'<=c&&c<='9') @@ -114,6 +114,7 @@ private: uint32_t get_type(const std::string&); void die(std::string info){nerr.err("lexer",line,column,info);}; void open(const std::string&); + std::string utf8_gen(); std::string id_gen(); std::string num_gen(); std::string str_gen(); @@ -151,12 +152,43 @@ uint32_t nasal_lexer::get_type(const std::string& tk_str) return tok_null; } +std::string nasal_lexer::utf8_gen() +{ + std::string str=""; + while(ptr %.16s%s\n",(uint64_t)p,tmp.c_str(),tmp.length()>16?"...":""); + printf("| str | <" PRTHEX64 "> %.16s%s\n",(uint64_t)p,tmp.c_str(),tmp.length()>16?"...":""); }break; - case vm_func: printf("| func | <0x%lx> entry:0x%x\n",(uint64_t)p,val.func().entry);break; - case vm_vec: printf("| vec | <0x%lx> [%zu val]\n",(uint64_t)p,val.vec().size());break; - case vm_hash: printf("| hash | <0x%lx> {%zu val}\n",(uint64_t)p,val.hash().size());break; - case vm_obj: printf("| obj | <0x%lx> obj:0x%lx\n",(uint64_t)p,(uint64_t)val.obj().ptr);break; - default: printf("| err | <0x%lx> unknown object\n",(uint64_t)p);break; + case vm_func: printf("| func | <" PRTHEX64 "> entry:0x%x\n",(uint64_t)p,val.func().entry);break; + case vm_vec: printf("| vec | <" PRTHEX64 "> [%zu val]\n",(uint64_t)p,val.vec().size());break; + case vm_hash: printf("| hash | <" PRTHEX64 "> {%zu val}\n",(uint64_t)p,val.hash().size());break; + case vm_obj: printf("| obj | <" PRTHEX64 "> obj:" PRTHEX64 "\n",(uint64_t)p,(uint64_t)val.obj().ptr);break; + default: printf("| err | <" PRTHEX64 "> unknown object\n",(uint64_t)p);break; } } void nasal_vm::bytecodeinfo(const char* header,const uint32_t p) @@ -202,7 +202,7 @@ void nasal_vm::bytecodeinfo(const char* header,const uint32_t p) case op_calll: case op_mcalll: case op_loadl: printf("0x%x",c.num);break; case op_callb: - printf("0x%x <%s@0x%lx>",c.num,builtin[c.num].name,(uint64_t)builtin[c.num].func);break; + printf("0x%x <%s@" PRTHEX64 ">",c.num,builtin[c.num].name,(uint64_t)builtin[c.num].func);break; case op_upval: case op_mupval: case op_loadu: printf(" (0x%x[0x%x])",(c.num>>16)&0xffff,c.num&0xffff);break; case op_happ: case op_pstr: @@ -251,16 +251,16 @@ void nasal_vm::stackinfo(const uint32_t limit=10) uint32_t gsize=bytecode[0].num; nasal_ref* top=gc.top; nasal_ref* bottom=gc.stack+gsize; - printf("vm stack(0x%lx, limit %d, total ",(uint64_t)bottom,gsize,limit); + printf("vm stack(" PRTHEX64 ", limit %d, total ",(uint64_t)bottom,gsize,limit); if(top=bottom;++i,--top) { - printf(" 0x%.8lx",top-gc.stack); + printf(" " PRTHEX64_8 "",top-gc.stack); valinfo(top[0]); } } @@ -268,7 +268,7 @@ void nasal_vm::global_state() { if(!bytecode[0].num || gc.stack[0].type==vm_none) // bytecode[0].op is op_intg return; - printf("global(0x%lx):\n",(uint64_t)gc.stack); + printf("global(" PRTHEX64 "):\n",(uint64_t)gc.stack); for(uint32_t i=0;i):\n",(uint64_t)localr,localr-gc.stack); + printf("local(" PRTHEX64 "):\n",(uint64_t)localr,localr-gc.stack); for(uint32_t i=0;i entry:0x%x)\n", + printf("funcr:\n (<" PRTHEX64 "> entry:0x%x)\n", (uint64_t)gc.funcr.value.gcobj, gc.funcr.func().entry); global_state(); diff --git a/test/calc.nas b/test/calc.nas index 35f1a0d..505c104 100644 --- a/test/calc.nas +++ b/test/calc.nas @@ -65,8 +65,9 @@ var testfile=[ "test/tetris.nas ", "test/trait.nas ", "test/turingmachine.nas", - "test/ycombinator.nas ", - "test/wavecollapse.nas " + "test/utf8chk.nas ", + "test/wavecollapse.nas ", + "test/ycombinator.nas " ]; var module=[ diff --git a/test/utf8chk.nas b/test/utf8chk.nas new file mode 100644 index 0000000..e89cef5 --- /dev/null +++ b/test/utf8chk.nas @@ -0,0 +1,11 @@ +var 输出=print; +var 这是unicode测试=func(){ + var 测试成功=[ + "unicode: utf-8支持测试成功", + "目前仅支持utf-8以及ascii格式文件", + "注意: windows系统请开启chcp 65001代码页" + ]; + foreach(var 内容;测试成功) + 输出(内容~"\n"); +} +这是unicode测试(); \ No newline at end of file