genmai/data/Pedt/pedt.py

import os
import re
import sys
import datetime
import logging
import clang.cindex as cindex
from clangTokenize import Tokenize
from debug import Debug
from codeStrip import CodeStrip

def print_dict(lst, indent=0):
    for item in lst:
        if isinstance(item, list):
            print(' ' * indent + '[')
            print_dict(item, indent + 2)
            print(' ' * indent + ']')
        elif isinstance(item, dict):
            print(' ' * indent + '{')
            for key, value in item.items():
                print(' ' * (indent + 2) + f"'{key}': ", end='')
                if isinstance(value, (list, dict)):
                    print()
                    print_dict([value], indent + 4)
                else:
                    print(repr(value))
            print(' ' * indent + '}')
        else:
            print(' ' * indent + repr(item))

def parse_patch(patch_file):
    if not os.path.exists(patch_file) or not os.path.isfile(patch_file):
        return "补丁文件不存在"

    with open(patch_file, 'r') as file:
        patch_lines = file.readlines()
    #Debug.debug(patch_content)
    #patch_lines = patch_content.split("\n")
    patch_changes = []
    patch_list=[]
    patch_fragment=""
    patch_start=False
    start_line=0
    num_lines=0
    start_str=""
    for line in patch_lines:
        if line.startswith("--- a/"):
            if patch_start:
                patch_start=False
                if patch_list:
                    patch_changes[-1]["changes"].append({"start_line": start_line,"num_lines": num_lines,"start_str":start_str,"patch_line":patch_list,"patch_fragment":patch_fragment})
                    patch_list=[]
                    patch_fragment=""
        elif line.startswith("+++ b/"):
            # 提取文件路径
            file_path = line[6:].rstrip("\n")
            patch_changes.append({"file": file_path, "changes": []})
            patch_start=True
            patch_list=[]
            patch_fragment=""
        elif line.startswith("@@"):
            if patch_list:
                patch_changes[-1]["changes"].append({"start_line": start_line,"num_lines": num_lines,"start_str":start_str,"patch_line":patch_list,"patch_fragment":patch_fragment})
                patch_list=[]
                patch_fragment=""
            # 提取代码更改行号范围
            #pattern = r"@@ .+? \+(\d+),(\d+) @@ (.+)"
            pattern = r"@@ .+? \+(\d+),(\d+) @@(.*)?"
            match = re.search(pattern, line)
            if match:
                # 提取加号后面的两个数字
                start_line = int(match.group(1))  # 475
                num_lines = int(match.group(2))  # 17
                # 提取第二个@@后面的字符串
                start_str = match.group(3).strip()  # main (int argc, char *argv[])
                #Debug.debug("start_line:", start_line)
                #Debug.debug("num_lines:", num_lines)
                #Debug.debug("start_str:", start_str)

        elif patch_start:
            #Debug.debug(line)
            patch_list.append(line)
            if line.startswith('+'):
                patch_fragment+=line[1:]
            elif line.startswith('-'):
                continue
            else:
                patch_fragment+=line[1:]
    if patch_start:
        if patch_list:
            patch_changes[-1]["changes"].append({"start_line": start_line,"num_lines": num_lines,"start_str":start_str,"patch_line":patch_list,"patch_fragment":patch_fragment})
            patch_list=[]
            patch_fragment=""
    return patch_changes

def fuzzy_compare(source_code,patch):
    return False

def linebyline_compare(source_code,patch):
    return False

def tokens_compare(code_type,source_code,patch):
    tokens=Tokenize(patch,code_type).get_tokens()
    Debug.info(tokens)
    return False

def simple_compare(source_code, patch):
    if patch in source_code:
        Debug.debug("patch find")
        return True
    else:
        Debug.debug("patch not find")
        return False

def source_strip(code_type,file_content,fragment):
    file_content_strip=""
    fragment_strip=""
    file_content_strip=CodeStrip.code_strip(file_content,code_type)
    fragment_strip=CodeStrip.code_strip(fragment,code_type)
    return file_content_strip,fragment_strip

def compare_patch(source_file,patch):
    code_type=""
    if source_file.endswith(".c"):
        code_type="c"
    elif source_file.endswith(".cpp"):
        code_type="cpp"
    with open(source_file, 'r') as file:
        file_content = file.read()
    for code_change in patch["changes"]:
        start_line = code_change["start_line"]
        num_lines = code_change["num_lines"]
        fragment = code_change["patch_fragment"]

        Debug.debug("#####################src######################")
        Debug.debug(file_content)
        Debug.debug("#####################src end######################")
        Debug.debug("#####################patch######################")
        Debug.debug(fragment)
        Debug.debug("#####################patch end######################")

        if simple_compare(file_content,fragment):
            continue

        file_content_strip,fragment_strip=source_strip(code_type,file_content,fragment)

        if file_content_strip and fragment_strip:
            if simple_compare(file_content_strip,fragment_strip):
                continue
            if code_type:
                if tokens_compare(code_type,file_content_strip,fragment_strip):
                    continue
            if linebyline_compare(file_content_strip,fragment_strip):
                continue

        if fuzzy_compare(file_content,fragment):
            continue

        return False

    return True

def sacn_dir(project_dir,patch_changes):
    # 遍历项目目录下的所有文件
    #0 初始状态  1找到补丁  -1未找到补丁
    find_patch=0
    for root, dirs, files in os.walk(project_dir):
        for file in files:
            file_path = os.path.join(root, file)
            for change in patch_changes:
                relative_path=file_path[len(project_dir):]
                if change["file"] == relative_path:
                    Debug.debug(f'relative_path={relative_path},change={change["file"]}')
                    result =compare_patch(file_path,change)
                    if result:
                        Debug.debug("find patch code")
                        if find_patch != -1:
                            find_patch=1
                    else:
                        Debug.debug("not find patch code")
                        find_patch=-1
    if find_patch == 1:
        return True
    else:
        return False

def pedt_scan(project_dir, patch_file):

    if not os.path.exists(project_dir) or not os.path.isdir(project_dir):
        return "项目目录不存在"

    Debug.info(f"开始检测补丁: {patch_file}")
    patch_changes=parse_patch(patch_file)
    #print_dict(patch_changes)
    result = sacn_dir(project_dir,patch_changes)
    if result == 1:
        Debug.info(f"补丁检测成功: {patch_file}")
    else:
        Debug.info(f"补丁检测失败: {patch_file}")


if __name__ == '__main__':
    if len(sys.argv) <= 2:
        print('need source_dir patch_dir' )
        exit(0)
    now = datetime.datetime.now()
    timestamp = now.strftime('%Y-%m-%d_%H-%M-%S')
    Debug.setup_logger(f'./genmai_pedt_{timestamp}.log',logging.INFO,True)
    for filename in os.listdir(sys.argv[2]):
        file_path = os.path.join(sys.argv[2], filename)
        if os.path.isfile(file_path) and file_path.endswith(".patch"):
            pedt_scan(sys.argv[1],file_path)