95 lines
3.8 KiB
Python
95 lines
3.8 KiB
Python
|
import torch.nn as nn
|
||
|
import json
|
||
|
import torch
|
||
|
import argparse
|
||
|
|
||
|
from datasets.coco_eval import CocoEvaluator
|
||
|
from util import box_ops
|
||
|
from datasets import build_dataset, get_coco_api_from_dataset
|
||
|
|
||
|
def get_args_parser():
|
||
|
parser = argparse.ArgumentParser('Deformable DETR Detector', add_help=False)
|
||
|
|
||
|
parser.add_argument('--num_ref_frames', default=3, type=int, help='number of reference frames')
|
||
|
parser.add_argument('--sgd', action='store_true')
|
||
|
parser.add_argument('--interval1', default=20, type=int)
|
||
|
parser.add_argument('--interval2', default=60, type=int)
|
||
|
|
||
|
parser.add_argument('--masks', action='store_true',
|
||
|
help="Train segmentation head if the flag is provided")
|
||
|
|
||
|
# dataset parameters
|
||
|
parser.add_argument('--vid_path', default='./data/vid', type=str)
|
||
|
parser.add_argument('--input_result_path', default='./test_save.json', type=str)
|
||
|
parser.add_argument('--output_dir', default='',
|
||
|
help='path where to save, empty for no saving')
|
||
|
parser.add_argument('--eval', action='store_true')
|
||
|
parser.add_argument('--cache_mode', default=False, action='store_true', help='whether to cache images on memory')
|
||
|
|
||
|
return parser
|
||
|
|
||
|
class PostProcess(nn.Module):
|
||
|
""" This module converts the model's output into the format expected by the coco api"""
|
||
|
|
||
|
@torch.no_grad()
|
||
|
def forward(self, outputs, target_sizes):
|
||
|
""" Perform the computation
|
||
|
Parameters:
|
||
|
outputs: raw outputs of the model
|
||
|
target_sizes: tensor of dimension [batch_size x 2] containing the size of each images of the batch
|
||
|
For evaluation, this must be the original image size (before any data augmentation)
|
||
|
For visualization, this should be the image size after data augment, but before padding
|
||
|
"""
|
||
|
out_logits, out_bbox = outputs['pred_logits'], outputs['pred_boxes']
|
||
|
|
||
|
assert len(out_logits) == len(target_sizes)
|
||
|
assert target_sizes.shape[1] == 2
|
||
|
|
||
|
prob = out_logits.sigmoid()
|
||
|
topk_values, topk_indexes = torch.topk(prob.view(out_logits.shape[0], -1), 100, dim=1)
|
||
|
scores = topk_values
|
||
|
topk_boxes = topk_indexes // out_logits.shape[2]
|
||
|
labels = topk_indexes % out_logits.shape[2]
|
||
|
boxes = box_ops.box_cxcywh_to_xyxy(out_bbox)
|
||
|
boxes = torch.gather(boxes, 1, topk_boxes.unsqueeze(-1).repeat(1,1,4))
|
||
|
|
||
|
# and from relative [0, 1] to absolute [0, height] coordinates
|
||
|
img_h, img_w = target_sizes.unbind(1)
|
||
|
scale_fct = torch.stack([img_w, img_h, img_w, img_h], dim=1)
|
||
|
boxes = boxes * scale_fct[:, None, :]
|
||
|
|
||
|
results = [{'scores': s, 'labels': l, 'boxes': b} for s, l, b in zip(scores, labels, boxes)]
|
||
|
|
||
|
return results
|
||
|
|
||
|
def eval(path):
|
||
|
with open(path, 'r') as f:
|
||
|
dataset = json.load(f)
|
||
|
|
||
|
postprocessors = {'bbox': PostProcess()}
|
||
|
dataset_val = build_dataset(image_set='test', args=args)
|
||
|
base_ds = get_coco_api_from_dataset(dataset_val)
|
||
|
iou_types = tuple(k for k in ('segm', 'bbox') if k in postprocessors.keys())
|
||
|
coco_evaluator = CocoEvaluator(base_ds, iou_types)
|
||
|
for data in dataset:
|
||
|
|
||
|
res = {}
|
||
|
for image_id in data.keys():
|
||
|
image_id_int = int(image_id)
|
||
|
res[image_id_int] = {}
|
||
|
for k, v in data[image_id].items():
|
||
|
res[image_id_int][k] = torch.tensor(v)
|
||
|
coco_evaluator.update(res)
|
||
|
|
||
|
if coco_evaluator is not None:
|
||
|
coco_evaluator.synchronize_between_processes()
|
||
|
|
||
|
# accumulate predictions from all images
|
||
|
if coco_evaluator is not None:
|
||
|
coco_evaluator.accumulate()
|
||
|
coco_evaluator.summarize()
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
parser = argparse.ArgumentParser('Deformable DETR training and evaluation script', parents=[get_args_parser()])
|
||
|
args = parser.parse_args()
|
||
|
eval(args.input_result_path)
|