genmai/data/Pedt/pedt.py

214 lines
7.5 KiB
Python
Executable File

import os
import re
import sys
import datetime
import logging
import clang.cindex as cindex
from clangTokenize import Tokenize
from debug import Debug
from codeStrip import CodeStrip
def print_dict(lst, indent=0):
for item in lst:
if isinstance(item, list):
print(' ' * indent + '[')
print_dict(item, indent + 2)
print(' ' * indent + ']')
elif isinstance(item, dict):
print(' ' * indent + '{')
for key, value in item.items():
print(' ' * (indent + 2) + f"'{key}': ", end='')
if isinstance(value, (list, dict)):
print()
print_dict([value], indent + 4)
else:
print(repr(value))
print(' ' * indent + '}')
else:
print(' ' * indent + repr(item))
def parse_patch(patch_file):
if not os.path.exists(patch_file) or not os.path.isfile(patch_file):
return "补丁文件不存在"
with open(patch_file, 'r') as file:
patch_lines = file.readlines()
#Debug.debug(patch_content)
#patch_lines = patch_content.split("\n")
patch_changes = []
patch_list=[]
patch_fragment=""
patch_start=False
start_line=0
num_lines=0
start_str=""
for line in patch_lines:
if line.startswith("--- a/"):
if patch_start:
patch_start=False
if patch_list:
patch_changes[-1]["changes"].append({"start_line": start_line,"num_lines": num_lines,"start_str":start_str,"patch_line":patch_list,"patch_fragment":patch_fragment})
patch_list=[]
patch_fragment=""
elif line.startswith("+++ b/"):
# 提取文件路径
file_path = line[6:].rstrip("\n")
patch_changes.append({"file": file_path, "changes": []})
patch_start=True
patch_list=[]
patch_fragment=""
elif line.startswith("@@"):
if patch_list:
patch_changes[-1]["changes"].append({"start_line": start_line,"num_lines": num_lines,"start_str":start_str,"patch_line":patch_list,"patch_fragment":patch_fragment})
patch_list=[]
patch_fragment=""
# 提取代码更改行号范围
#pattern = r"@@ .+? \+(\d+),(\d+) @@ (.+)"
pattern = r"@@ .+? \+(\d+),(\d+) @@(.*)?"
match = re.search(pattern, line)
if match:
# 提取加号后面的两个数字
start_line = int(match.group(1)) # 475
num_lines = int(match.group(2)) # 17
# 提取第二个@@后面的字符串
start_str = match.group(3).strip() # main (int argc, char *argv[])
#Debug.debug("start_line:", start_line)
#Debug.debug("num_lines:", num_lines)
#Debug.debug("start_str:", start_str)
elif patch_start:
#Debug.debug(line)
patch_list.append(line)
if line.startswith('+'):
patch_fragment+=line[1:]
elif line.startswith('-'):
continue
else:
patch_fragment+=line[1:]
if patch_start:
if patch_list:
patch_changes[-1]["changes"].append({"start_line": start_line,"num_lines": num_lines,"start_str":start_str,"patch_line":patch_list,"patch_fragment":patch_fragment})
patch_list=[]
patch_fragment=""
return patch_changes
def fuzzy_compare(source_code,patch):
return False
def linebyline_compare(source_code,patch):
return False
def tokens_compare(code_type,source_code,patch):
tokens=Tokenize(patch,code_type).get_tokens()
Debug.info(tokens)
return False
def simple_compare(source_code, patch):
if patch in source_code:
Debug.debug("patch find")
return True
else:
Debug.debug("patch not find")
return False
def source_strip(code_type,file_content,fragment):
file_content_strip=""
fragment_strip=""
file_content_strip=CodeStrip.code_strip(file_content,code_type)
fragment_strip=CodeStrip.code_strip(fragment,code_type)
return file_content_strip,fragment_strip
def compare_patch(source_file,patch):
code_type=""
if source_file.endswith(".c"):
code_type="c"
elif source_file.endswith(".cpp"):
code_type="cpp"
with open(source_file, 'r') as file:
file_content = file.read()
for code_change in patch["changes"]:
start_line = code_change["start_line"]
num_lines = code_change["num_lines"]
fragment = code_change["patch_fragment"]
Debug.debug("#####################src######################")
Debug.debug(file_content)
Debug.debug("#####################src end######################")
Debug.debug("#####################patch######################")
Debug.debug(fragment)
Debug.debug("#####################patch end######################")
if simple_compare(file_content,fragment):
continue
file_content_strip,fragment_strip=source_strip(code_type,file_content,fragment)
if file_content_strip and fragment_strip:
if simple_compare(file_content_strip,fragment_strip):
continue
if code_type:
if tokens_compare(code_type,file_content_strip,fragment_strip):
continue
if linebyline_compare(file_content_strip,fragment_strip):
continue
if fuzzy_compare(file_content,fragment):
continue
return False
return True
def sacn_dir(project_dir,patch_changes):
# 遍历项目目录下的所有文件
#0 初始状态 1找到补丁 -1未找到补丁
find_patch=0
for root, dirs, files in os.walk(project_dir):
for file in files:
file_path = os.path.join(root, file)
for change in patch_changes:
relative_path=file_path[len(project_dir):]
if change["file"] == relative_path:
Debug.debug(f'relative_path={relative_path},change={change["file"]}')
result =compare_patch(file_path,change)
if result:
Debug.debug("find patch code")
if find_patch != -1:
find_patch=1
else:
Debug.debug("not find patch code")
find_patch=-1
if find_patch == 1:
return True
else:
return False
def pedt_scan(project_dir, patch_file):
if not os.path.exists(project_dir) or not os.path.isdir(project_dir):
return "项目目录不存在"
Debug.info(f"开始检测补丁: {patch_file}")
patch_changes=parse_patch(patch_file)
#print_dict(patch_changes)
result = sacn_dir(project_dir,patch_changes)
if result == 1:
Debug.info(f"补丁检测成功: {patch_file}")
else:
Debug.info(f"补丁检测失败: {patch_file}")
if __name__ == '__main__':
if len(sys.argv) <= 2:
print('need source_dir patch_dir' )
exit(0)
now = datetime.datetime.now()
timestamp = now.strftime('%Y-%m-%d_%H-%M-%S')
Debug.setup_logger(f'./genmai_pedt_{timestamp}.log',logging.INFO,True)
for filename in os.listdir(sys.argv[2]):
file_path = os.path.join(sys.argv[2], filename)
if os.path.isfile(file_path) and file_path.endswith(".patch"):
pedt_scan(sys.argv[1],file_path)