import os import re import sys import datetime import logging import clang.cindex as cindex from clangTokenize import Tokenize from debug import Debug from codeStrip import CodeStrip def print_dict(lst, indent=0): for item in lst: if isinstance(item, list): print(' ' * indent + '[') print_dict(item, indent + 2) print(' ' * indent + ']') elif isinstance(item, dict): print(' ' * indent + '{') for key, value in item.items(): print(' ' * (indent + 2) + f"'{key}': ", end='') if isinstance(value, (list, dict)): print() print_dict([value], indent + 4) else: print(repr(value)) print(' ' * indent + '}') else: print(' ' * indent + repr(item)) def parse_patch(patch_file): if not os.path.exists(patch_file) or not os.path.isfile(patch_file): return "补丁文件不存在" with open(patch_file, 'r') as file: patch_lines = file.readlines() #Debug.debug(patch_content) #patch_lines = patch_content.split("\n") patch_changes = [] patch_list=[] patch_fragment="" patch_start=False start_line=0 num_lines=0 start_str="" for line in patch_lines: if line.startswith("--- a/"): if patch_start: patch_start=False if patch_list: patch_changes[-1]["changes"].append({"start_line": start_line,"num_lines": num_lines,"start_str":start_str,"patch_line":patch_list,"patch_fragment":patch_fragment}) patch_list=[] patch_fragment="" elif line.startswith("+++ b/"): # 提取文件路径 file_path = line[6:].rstrip("\n") patch_changes.append({"file": file_path, "changes": []}) patch_start=True patch_list=[] patch_fragment="" elif line.startswith("@@"): if patch_list: patch_changes[-1]["changes"].append({"start_line": start_line,"num_lines": num_lines,"start_str":start_str,"patch_line":patch_list,"patch_fragment":patch_fragment}) patch_list=[] patch_fragment="" # 提取代码更改行号范围 #pattern = r"@@ .+? \+(\d+),(\d+) @@ (.+)" pattern = r"@@ .+? \+(\d+),(\d+) @@(.*)?" match = re.search(pattern, line) if match: # 提取加号后面的两个数字 start_line = int(match.group(1)) # 475 num_lines = int(match.group(2)) # 17 # 提取第二个@@后面的字符串 start_str = match.group(3).strip() # main (int argc, char *argv[]) #Debug.debug("start_line:", start_line) #Debug.debug("num_lines:", num_lines) #Debug.debug("start_str:", start_str) elif patch_start: #Debug.debug(line) patch_list.append(line) if line.startswith('+'): patch_fragment+=line[1:] elif line.startswith('-'): continue else: patch_fragment+=line[1:] if patch_start: if patch_list: patch_changes[-1]["changes"].append({"start_line": start_line,"num_lines": num_lines,"start_str":start_str,"patch_line":patch_list,"patch_fragment":patch_fragment}) patch_list=[] patch_fragment="" return patch_changes def fuzzy_compare(source_code,patch): return False def linebyline_compare(source_code,patch): return False def tokens_compare(code_type,source_code,patch): tokens=Tokenize(patch,code_type).get_tokens() Debug.info(tokens) return False def simple_compare(source_code, patch): if patch in source_code: Debug.debug("patch find") return True else: Debug.debug("patch not find") return False def source_strip(code_type,file_content,fragment): file_content_strip="" fragment_strip="" file_content_strip=CodeStrip.code_strip(file_content,code_type) fragment_strip=CodeStrip.code_strip(fragment,code_type) return file_content_strip,fragment_strip def compare_patch(source_file,patch): code_type="" if source_file.endswith(".c"): code_type="c" elif source_file.endswith(".cpp"): code_type="cpp" with open(source_file, 'r') as file: file_content = file.read() for code_change in patch["changes"]: start_line = code_change["start_line"] num_lines = code_change["num_lines"] fragment = code_change["patch_fragment"] Debug.debug("#####################src######################") Debug.debug(file_content) Debug.debug("#####################src end######################") Debug.debug("#####################patch######################") Debug.debug(fragment) Debug.debug("#####################patch end######################") if simple_compare(file_content,fragment): continue file_content_strip,fragment_strip=source_strip(code_type,file_content,fragment) if file_content_strip and fragment_strip: if simple_compare(file_content_strip,fragment_strip): continue if code_type: if tokens_compare(code_type,file_content_strip,fragment_strip): continue if linebyline_compare(file_content_strip,fragment_strip): continue if fuzzy_compare(file_content,fragment): continue return False return True def sacn_dir(project_dir,patch_changes): # 遍历项目目录下的所有文件 #0 初始状态 1找到补丁 -1未找到补丁 find_patch=0 for root, dirs, files in os.walk(project_dir): for file in files: file_path = os.path.join(root, file) for change in patch_changes: relative_path=file_path[len(project_dir):] if change["file"] == relative_path: Debug.debug(f'relative_path={relative_path},change={change["file"]}') result =compare_patch(file_path,change) if result: Debug.debug("find patch code") if find_patch != -1: find_patch=1 else: Debug.debug("not find patch code") find_patch=-1 if find_patch == 1: return True else: return False def pedt_scan(project_dir, patch_file): if not os.path.exists(project_dir) or not os.path.isdir(project_dir): return "项目目录不存在" Debug.info(f"开始检测补丁: {patch_file}") patch_changes=parse_patch(patch_file) #print_dict(patch_changes) result = sacn_dir(project_dir,patch_changes) if result == 1: Debug.info(f"补丁检测成功: {patch_file}") else: Debug.info(f"补丁检测失败: {patch_file}") if __name__ == '__main__': if len(sys.argv) <= 2: print('need source_dir patch_dir' ) exit(0) now = datetime.datetime.now() timestamp = now.strftime('%Y-%m-%d_%H-%M-%S') Debug.setup_logger(f'./genmai_pedt_{timestamp}.log',logging.INFO,True) for filename in os.listdir(sys.argv[2]): file_path = os.path.join(sys.argv[2], filename) if os.path.isfile(file_path) and file_path.endswith(".patch"): pedt_scan(sys.argv[1],file_path)