add pedt

2024-06-17 14:18:34 +08:00 · 2024-06-17 14:18:34 +08:00 · a69cd3cf18
parent efa79edf6f
commit a69cd3cf18
1 changed files with 202 additions and 0 deletions
--- a/data/Pedt/pedt.py
+++ b/data/Pedt/pedt.py
@ -0,0 +1,202 @@
+import os
+import re
+import sys
+import datetime
+import logging
+import clang.cindex as cindex
+from clangTokenize import Tokenize
+from debug import Debug
+from codeStrip import CodeStrip
+
+def print_dict(lst, indent=0):
+    for item in lst:
+        if isinstance(item, list):
+            print(' ' * indent + '[')
+            print_dict(item, indent + 2)
+            print(' ' * indent + ']')
+        elif isinstance(item, dict):
+            print(' ' * indent + '{')
+            for key, value in item.items():
+                print(' ' * (indent + 2) + f"'{key}': ", end='')
+                if isinstance(value, (list, dict)):
+                    print()
+                    print_dict([value], indent + 4)
+                else:
+                    print(repr(value))
+            print(' ' * indent + '}')
+        else:
+            print(' ' * indent + repr(item))
+
+def parse_patch(patch_file):
+    if not os.path.exists(patch_file) or not os.path.isfile(patch_file):
+        return "补丁文件不存在"
+
+    with open(patch_file, 'r') as file:
+        patch_lines = file.readlines()
+    #Debug.debug(patch_content)
+    #patch_lines = patch_content.split("\n")
+    patch_changes = []
+    patch_list=[]
+    patch_fragment=""
+    patch_start=False
+    start_line=0
+    num_lines=0
+    start_str=""
+    for line in patch_lines:
+        if line.startswith("--- a/"):
+            if patch_start:
+                patch_start=False
+                if patch_list:
+                    patch_changes[-1]["changes"].append({"start_line": start_line,"num_lines": num_lines,"start_str":start_str,"patch_line":patch_list,"patch_fragment":patch_fragment})
+                    patch_list=[]
+                    patch_fragment=""
+        elif line.startswith("+++ b/"):
+            # 提取文件路径
+            file_path = line[6:].rstrip("\n")
+            patch_changes.append({"file": file_path, "changes": []})
+            patch_start=True
+            patch_list=[]
+            patch_fragment=""
+        elif line.startswith("@@"):
+            if patch_list:
+                patch_changes[-1]["changes"].append({"start_line": start_line,"num_lines": num_lines,"start_str":start_str,"patch_line":patch_list,"patch_fragment":patch_fragment})
+                patch_list=[]
+                patch_fragment=""
+            # 提取代码更改行号范围
+            #pattern = r"@@ .+? \+(\d+),(\d+) @@ (.+)"
+            pattern = r"@@ .+? \+(\d+),(\d+) @@(.*)?"
+            match = re.search(pattern, line)
+            if match:
+                # 提取加号后面的两个数字
+                start_line = int(match.group(1))  # 475
+                num_lines = int(match.group(2))  # 17
+                # 提取第二个@@后面的字符串
+                start_str = match.group(3).strip()  # main (int argc, char *argv[])
+                #Debug.debug("start_line:", start_line)
+                #Debug.debug("num_lines:", num_lines)
+                #Debug.debug("start_str:", start_str)
+
+        elif patch_start:
+            #Debug.debug(line)
+            patch_list.append(line)
+            if line.startswith('+'):
+                patch_fragment+=line[1:]
+            elif line.startswith('-'):
+                continue
+            else:
+                patch_fragment+=line[1:]
+    if patch_start:
+        if patch_list:
+            patch_changes[-1]["changes"].append({"start_line": start_line,"num_lines": num_lines,"start_str":start_str,"patch_line":patch_list,"patch_fragment":patch_fragment})
+            patch_list=[]
+            patch_fragment=""
+    return patch_changes
+
+def strip_compare(source_code,patch):
+    Debug.info("################before strip##############")
+    Debug.info(source_code)
+    Debug.info("#########################################")
+    code=CodeStrip.remove_comments_and_spaces(source_code)
+    source_code=CodeStrip.remove_if0_blocks(source_code)
+    Debug.info("################after strip##############")
+    Debug.info(source_code)
+    Debug.info("#########################################")
+    return False
+
+def linebyline_compare(source_code,patch):
+    return False
+
+def tokens_compare(source_file,source_code,patch):
+    Debug.info(source_file)
+    if source_file.endswith(".c"):
+        tokens=Tokenize(patch,"c").get_tokens()
+    elif source_file.endswith(".cpp"):
+        tokens=Tokenize(patch,"cpp").get_tokens()
+    else:
+        return False
+    Debug.info(tokens)
+    return False
+
+# 简单匹配
+def simple_compare(source_code, patch):
+    #Debug.debug("#####################src######################")
+    #Debug.debug(source_code)
+    #Debug.debug("#####################patch######################")
+    #Debug.debug(patch)
+    if patch in source_code:
+        Debug.debug("patch find")
+        return True
+    else:
+        Debug.debug("patch not find")
+        return False
+
+def compare_patch(source_file,patch):
+    with open(source_file, 'r') as file:
+        file_content = file.read()
+    for code_change in patch["changes"]:
+        start_line = code_change["start_line"]
+        num_lines = code_change["num_lines"]
+        fragment = code_change["patch_fragment"]
+        if simple_compare(file_content,fragment):
+            return True
+        elif strip_compare(file_content,fragment):
+            return True
+        elif tokens_compare(source_file,file_content,fragment):
+            return True
+        elif linebyline_compare(file_content,fragment):
+            return True
+        else:
+            return False
+
+def sacn_dir(project_dir,patch_changes):
+    # 遍历项目目录下的所有文件
+    #0 初始状态  1找到补丁  -1未找到补丁
+    find_patch=0
+    for root, dirs, files in os.walk(project_dir):
+        for file in files:
+            file_path = os.path.join(root, file)
+            for change in patch_changes:
+                relative_path=file_path[len(project_dir):]
+                if change["file"] == relative_path:
+                    Debug.debug(f'relative_path={relative_path},change={change["file"]}')
+                    result =compare_patch(file_path,change)
+                    if result:
+                        Debug.debug("find patch code")
+                        if find_patch != -1:
+                            find_patch=1
+                    else:
+                        Debug.debug("not find patch code")
+                        find_patch=-1
+    if find_patch == 1:
+        return True
+    else:
+        return False
+
+def pedt_scan(project_dir, patch_file):
+
+    if not os.path.exists(project_dir) or not os.path.isdir(project_dir):
+        return "项目目录不存在"
+
+    Debug.info(f"开始检测补丁: {patch_file}")
+    patch_changes=parse_patch(patch_file)
+    #print_dict(patch_changes)
+    result = sacn_dir(project_dir,patch_changes)
+    if result == 1:
+        Debug.info(f"补丁检测成功: {patch_file}")
+    else:
+        Debug.info(f"补丁检测失败: {patch_file}")
+
+
+
+if __name__ == '__main__':
+    if len(sys.argv) <= 2:
+        print('need source_dir patch_dir' )
+        exit(0)
+    now = datetime.datetime.now()
+    timestamp = now.strftime('%Y-%m-%d_%H-%M-%S')
+    Debug.setup_logger(f'./genmai_pedt_{timestamp}.log',logging.INFO,True)
+    for filename in os.listdir(sys.argv[2]):
+        file_path = os.path.join(sys.argv[2], filename)
+        if os.path.isfile(file_path) and file_path.endswith(".patch"):
+            pedt_scan(sys.argv[1],file_path)
+