genmai/data/Pedt/clangTokenize.py

42 lines
1.5 KiB
Python
Executable File

import clang.cindex as cindex
from debug import Debug
def print_cursor(cursor, indent=''):
# 打印节点类型和名称
Debug.info(f'{indent}{cursor.kind.name}: {cursor.spelling}')
# 递归打印子节点
for child in cursor.get_children():
print_cursor(child, indent + ' ')
class Tokenize:
def __init__(self,code,type):
self.code=code
self.index = cindex.Index.create()
if type == 'c':
self.tu = self.index.parse('input.c',
args=['-std=c11'],
unsaved_files=[('input.c',code)])
elif type == 'c++':
self.tu = self.index.parse('input.cpp',
args=['-std=c++11'],
unsaved_files=[('input.cpp',code)])
# 获取词法、语法和语义信息
#print('词法分析结果:')
#for token in self.tu.get_tokens(extent=self.tu.cursor.extent):
# print(f'{token.kind.name}: {token.spelling}')
Debug.info('\n语法分析结果:')
print_cursor(self.tu.cursor)
#if type == 'c':
# unsaved_file=['input.c',code]
# self.tu=cindex.TranslationUnit.from_source('input.c',args=['-x', 'c', '-std=c11'],unsaved_files=unsaved_file)
#elif type == 'c++':
# unsaved_file=['input.cpp',code]
# self.tu=cindex.TranslationUnit.from_source('input.cpp',args=['-x', 'c++', '-std=c++11'],unsaved_files=unsaved_file)
def get_tokens(self):
return self.tu.get_tokens()