project1/datasets/coco_video_parser.py

151 lines
5.3 KiB
Python
Raw Normal View History

2024-11-20 12:20:08 +08:00
from collections import defaultdict
import numpy as np
from pycocotools.coco import COCO, _isArrayLike
import random
class CocoVID(COCO):
"""Inherit official COCO class in order to parse the annotations of bbox-
related video tasks.
Args:
annotation_file (str): location of annotation file. Defaults to None.
load_img_as_vid (bool): If True, convert image data to video data,
which means each image is converted to a video. Defaults to False.
"""
def __init__(self, annotation_file=None, load_img_as_vid=False):
assert annotation_file, 'Annotation file must be provided.'
self.load_img_as_vid = load_img_as_vid
super(CocoVID, self).__init__(annotation_file=annotation_file)
def convert_img_to_vid(self, dataset):
"""Convert image data to video data."""
if 'images' in self.dataset:
videos = []
for i, img in enumerate(self.dataset['images']):
videos.append(dict(id=img['id'], name=img['file_name']))
img['video_id'] = img['id']
img['frame_id'] = 0
dataset['videos'] = videos
if 'annotations' in self.dataset:
for i, ann in enumerate(self.dataset['annotations']):
ann['video_id'] = ann['image_id']
ann['instance_id'] = ann['id']
return dataset
def createIndex(self):
"""Create index."""
print('creating index...')
anns, cats, imgs, vids = {}, {}, {}, {}
(imgToAnns, catToImgs, vidToImgs, vidToInstances,
instancesToImgs) = defaultdict(list), defaultdict(list), defaultdict(
list), defaultdict(list), defaultdict(list)
if 'videos' not in self.dataset and self.load_img_as_vid:
self.dataset = self.convert_img_to_vid(self.dataset)
if 'videos' in self.dataset:
for video in self.dataset['videos']:
vids[video['id']] = video
if 'annotations' in self.dataset:
for ann in self.dataset['annotations']:
imgToAnns[ann['image_id']].append(ann)
anns[ann['id']] = ann
if 'instance_id' in ann:
instancesToImgs[ann['instance_id']].append(ann['image_id'])
if 'video_id' in ann and \
ann['instance_id'] not in \
vidToInstances[ann['video_id']]:
vidToInstances[ann['video_id']].append(
ann['instance_id'])
if 'images' in self.dataset:
for img in self.dataset['images']:
vidToImgs[img['video_id']].append(img)
imgs[img['id']] = img
if 'categories' in self.dataset:
for cat in self.dataset['categories']:
cats[cat['id']] = cat
if 'annotations' in self.dataset and 'categories' in self.dataset:
for ann in self.dataset['annotations']:
catToImgs[ann['category_id']].append(ann['image_id'])
print('index created!')
self.anns = anns
self.imgToAnns = imgToAnns
self.catToImgs = catToImgs
self.imgs = imgs
self.cats = cats
self.videos = vids
self.vidToImgs = vidToImgs
self.vidToInstances = vidToInstances
self.instancesToImgs = instancesToImgs
def get_vid_ids(self, vidIds=[]):
"""Get video ids that satisfy given filter conditions.
Default return all video ids.
Args:
vidIds (list[int]): The given video ids. Defaults to [].
Returns:
list[int]: Video ids.
"""
vidIds = vidIds if _isArrayLike(vidIds) else [vidIds]
if len(vidIds) == 0:
ids = self.videos.keys()
else:
ids = set(vidIds)
return list(ids)
def get_img_ids_from_vid(self, vidId):
"""Get image ids from given video id.
Args:
vidId (int): The given video id.
Returns:
list[int]: Image ids of given video id.
"""
img_infos = self.vidToImgs[vidId]
ids = list(np.zeros([len(img_infos)], dtype=int))
for i, img_info in enumerate(img_infos):
ids[i] = img_info["id"]
# for img_info in img_infos:
# ids[img_info['frame_id']] = img_info['id']
return ids
def get_ins_ids_from_vid(self, vidId):
"""Get instance ids from given video id.
Args:
vidId (int): The given video id.
Returns:
list[int]: Instance ids of given video id.
"""
return self.vidToInstances[vidId]
def get_img_ids_from_ins_id(self, insId):
"""Get image ids from given instance id.
Args:
insId (int): The given instance id.
Returns:
list[int]: Image ids of given instance id.
"""
return self.instancesToImgs[insId]
def load_vids(self, ids=[]):
"""Get video information of given video ids.
Default return all videos information.
Args:
ids (list[int]): The given video ids. Defaults to [].
Returns:
list[dict]: List of video information.
"""
if _isArrayLike(ids):
return [self.videos[id] for id in ids]
elif type(ids) == int:
return [self.videos[ids]]