project1/eval.py

95 lines
3.8 KiB
Python
Raw Normal View History

2024-11-20 12:20:08 +08:00
import torch.nn as nn
import json
import torch
import argparse
from datasets.coco_eval import CocoEvaluator
from util import box_ops
from datasets import build_dataset, get_coco_api_from_dataset
def get_args_parser():
parser = argparse.ArgumentParser('Deformable DETR Detector', add_help=False)
parser.add_argument('--num_ref_frames', default=3, type=int, help='number of reference frames')
parser.add_argument('--sgd', action='store_true')
parser.add_argument('--interval1', default=20, type=int)
parser.add_argument('--interval2', default=60, type=int)
parser.add_argument('--masks', action='store_true',
help="Train segmentation head if the flag is provided")
# dataset parameters
parser.add_argument('--vid_path', default='./data/vid', type=str)
parser.add_argument('--input_result_path', default='./test_save.json', type=str)
parser.add_argument('--output_dir', default='',
help='path where to save, empty for no saving')
parser.add_argument('--eval', action='store_true')
parser.add_argument('--cache_mode', default=False, action='store_true', help='whether to cache images on memory')
return parser
class PostProcess(nn.Module):
""" This module converts the model's output into the format expected by the coco api"""
@torch.no_grad()
def forward(self, outputs, target_sizes):
""" Perform the computation
Parameters:
outputs: raw outputs of the model
target_sizes: tensor of dimension [batch_size x 2] containing the size of each images of the batch
For evaluation, this must be the original image size (before any data augmentation)
For visualization, this should be the image size after data augment, but before padding
"""
out_logits, out_bbox = outputs['pred_logits'], outputs['pred_boxes']
assert len(out_logits) == len(target_sizes)
assert target_sizes.shape[1] == 2
prob = out_logits.sigmoid()
topk_values, topk_indexes = torch.topk(prob.view(out_logits.shape[0], -1), 100, dim=1)
scores = topk_values
topk_boxes = topk_indexes // out_logits.shape[2]
labels = topk_indexes % out_logits.shape[2]
boxes = box_ops.box_cxcywh_to_xyxy(out_bbox)
boxes = torch.gather(boxes, 1, topk_boxes.unsqueeze(-1).repeat(1,1,4))
# and from relative [0, 1] to absolute [0, height] coordinates
img_h, img_w = target_sizes.unbind(1)
scale_fct = torch.stack([img_w, img_h, img_w, img_h], dim=1)
boxes = boxes * scale_fct[:, None, :]
results = [{'scores': s, 'labels': l, 'boxes': b} for s, l, b in zip(scores, labels, boxes)]
return results
def eval(path):
with open(path, 'r') as f:
dataset = json.load(f)
postprocessors = {'bbox': PostProcess()}
dataset_val = build_dataset(image_set='test', args=args)
base_ds = get_coco_api_from_dataset(dataset_val)
iou_types = tuple(k for k in ('segm', 'bbox') if k in postprocessors.keys())
coco_evaluator = CocoEvaluator(base_ds, iou_types)
for data in dataset:
res = {}
for image_id in data.keys():
image_id_int = int(image_id)
res[image_id_int] = {}
for k, v in data[image_id].items():
res[image_id_int][k] = torch.tensor(v)
coco_evaluator.update(res)
if coco_evaluator is not None:
coco_evaluator.synchronize_between_processes()
# accumulate predictions from all images
if coco_evaluator is not None:
coco_evaluator.accumulate()
coco_evaluator.summarize()
if __name__ == '__main__':
parser = argparse.ArgumentParser('Deformable DETR training and evaluation script', parents=[get_args_parser()])
args = parser.parse_args()
eval(args.input_result_path)