This commit is contained in:
xujian 2024-06-17 14:18:34 +08:00
parent efa79edf6f
commit a69cd3cf18
1 changed files with 202 additions and 0 deletions

202
data/Pedt/pedt.py Executable file
View File

@ -0,0 +1,202 @@
import os
import re
import sys
import datetime
import logging
import clang.cindex as cindex
from clangTokenize import Tokenize
from debug import Debug
from codeStrip import CodeStrip
def print_dict(lst, indent=0):
for item in lst:
if isinstance(item, list):
print(' ' * indent + '[')
print_dict(item, indent + 2)
print(' ' * indent + ']')
elif isinstance(item, dict):
print(' ' * indent + '{')
for key, value in item.items():
print(' ' * (indent + 2) + f"'{key}': ", end='')
if isinstance(value, (list, dict)):
print()
print_dict([value], indent + 4)
else:
print(repr(value))
print(' ' * indent + '}')
else:
print(' ' * indent + repr(item))
def parse_patch(patch_file):
if not os.path.exists(patch_file) or not os.path.isfile(patch_file):
return "补丁文件不存在"
with open(patch_file, 'r') as file:
patch_lines = file.readlines()
#Debug.debug(patch_content)
#patch_lines = patch_content.split("\n")
patch_changes = []
patch_list=[]
patch_fragment=""
patch_start=False
start_line=0
num_lines=0
start_str=""
for line in patch_lines:
if line.startswith("--- a/"):
if patch_start:
patch_start=False
if patch_list:
patch_changes[-1]["changes"].append({"start_line": start_line,"num_lines": num_lines,"start_str":start_str,"patch_line":patch_list,"patch_fragment":patch_fragment})
patch_list=[]
patch_fragment=""
elif line.startswith("+++ b/"):
# 提取文件路径
file_path = line[6:].rstrip("\n")
patch_changes.append({"file": file_path, "changes": []})
patch_start=True
patch_list=[]
patch_fragment=""
elif line.startswith("@@"):
if patch_list:
patch_changes[-1]["changes"].append({"start_line": start_line,"num_lines": num_lines,"start_str":start_str,"patch_line":patch_list,"patch_fragment":patch_fragment})
patch_list=[]
patch_fragment=""
# 提取代码更改行号范围
#pattern = r"@@ .+? \+(\d+),(\d+) @@ (.+)"
pattern = r"@@ .+? \+(\d+),(\d+) @@(.*)?"
match = re.search(pattern, line)
if match:
# 提取加号后面的两个数字
start_line = int(match.group(1)) # 475
num_lines = int(match.group(2)) # 17
# 提取第二个@@后面的字符串
start_str = match.group(3).strip() # main (int argc, char *argv[])
#Debug.debug("start_line:", start_line)
#Debug.debug("num_lines:", num_lines)
#Debug.debug("start_str:", start_str)
elif patch_start:
#Debug.debug(line)
patch_list.append(line)
if line.startswith('+'):
patch_fragment+=line[1:]
elif line.startswith('-'):
continue
else:
patch_fragment+=line[1:]
if patch_start:
if patch_list:
patch_changes[-1]["changes"].append({"start_line": start_line,"num_lines": num_lines,"start_str":start_str,"patch_line":patch_list,"patch_fragment":patch_fragment})
patch_list=[]
patch_fragment=""
return patch_changes
def strip_compare(source_code,patch):
Debug.info("################before strip##############")
Debug.info(source_code)
Debug.info("#########################################")
code=CodeStrip.remove_comments_and_spaces(source_code)
source_code=CodeStrip.remove_if0_blocks(source_code)
Debug.info("################after strip##############")
Debug.info(source_code)
Debug.info("#########################################")
return False
def linebyline_compare(source_code,patch):
return False
def tokens_compare(source_file,source_code,patch):
Debug.info(source_file)
if source_file.endswith(".c"):
tokens=Tokenize(patch,"c").get_tokens()
elif source_file.endswith(".cpp"):
tokens=Tokenize(patch,"cpp").get_tokens()
else:
return False
Debug.info(tokens)
return False
# 简单匹配
def simple_compare(source_code, patch):
#Debug.debug("#####################src######################")
#Debug.debug(source_code)
#Debug.debug("#####################patch######################")
#Debug.debug(patch)
if patch in source_code:
Debug.debug("patch find")
return True
else:
Debug.debug("patch not find")
return False
def compare_patch(source_file,patch):
with open(source_file, 'r') as file:
file_content = file.read()
for code_change in patch["changes"]:
start_line = code_change["start_line"]
num_lines = code_change["num_lines"]
fragment = code_change["patch_fragment"]
if simple_compare(file_content,fragment):
return True
elif strip_compare(file_content,fragment):
return True
elif tokens_compare(source_file,file_content,fragment):
return True
elif linebyline_compare(file_content,fragment):
return True
else:
return False
def sacn_dir(project_dir,patch_changes):
# 遍历项目目录下的所有文件
#0 初始状态 1找到补丁 -1未找到补丁
find_patch=0
for root, dirs, files in os.walk(project_dir):
for file in files:
file_path = os.path.join(root, file)
for change in patch_changes:
relative_path=file_path[len(project_dir):]
if change["file"] == relative_path:
Debug.debug(f'relative_path={relative_path},change={change["file"]}')
result =compare_patch(file_path,change)
if result:
Debug.debug("find patch code")
if find_patch != -1:
find_patch=1
else:
Debug.debug("not find patch code")
find_patch=-1
if find_patch == 1:
return True
else:
return False
def pedt_scan(project_dir, patch_file):
if not os.path.exists(project_dir) or not os.path.isdir(project_dir):
return "项目目录不存在"
Debug.info(f"开始检测补丁: {patch_file}")
patch_changes=parse_patch(patch_file)
#print_dict(patch_changes)
result = sacn_dir(project_dir,patch_changes)
if result == 1:
Debug.info(f"补丁检测成功: {patch_file}")
else:
Debug.info(f"补丁检测失败: {patch_file}")
if __name__ == '__main__':
if len(sys.argv) <= 2:
print('need source_dir patch_dir' )
exit(0)
now = datetime.datetime.now()
timestamp = now.strftime('%Y-%m-%d_%H-%M-%S')
Debug.setup_logger(f'./genmai_pedt_{timestamp}.log',logging.INFO,True)
for filename in os.listdir(sys.argv[2]):
file_path = os.path.join(sys.argv[2], filename)
if os.path.isfile(file_path) and file_path.endswith(".patch"):
pedt_scan(sys.argv[1],file_path)