project1/tools/covert2coco.py

136 lines
4.2 KiB
Python
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
@Time : 11:01AM
@Author : Zhao kunlong
@Description:
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
import json
import os
import cv2 as cv
from glob import glob
from collections import defaultdict
from tqdm import tqdm
CLASSES = ('uav',)
CLASSES_ENCODES = ('None',)
cats_id_maps = {}
for k, v in enumerate(CLASSES_ENCODES, 1):
cats_id_maps[v] = k
def decode_gt(gts: str):
ir_gt = gts.split('\n')[0].split(',')
obj_num = int(ir_gt[1])
frame_id = int(ir_gt[0])
ir_gt_dict = {}
for j in range(obj_num):
obj_id = int(ir_gt[2 + j * 6])
ir_gt_dict[obj_id] = [float(v) for v in ir_gt[3 + j * 6:7 + j * 6]]
return ir_gt_dict, frame_id
def conver2coco(root, VID:defaultdict(list), split:str='train'):
records = dict(
vid_id=1,
img_id=1,
ann_id=1,
global_instance_id=1,
num_vid_train_frames=0,
num_no_objects=0)
trained_frames = 0
videos = glob(os.path.join(root, split,'*'))
pdr = tqdm(videos, desc='processing...')
for _video in pdr:
video_name = os.path.join(*_video.split('/')[-3:])
vid_train_frames = []
gt_path = os.path.join(_video, 'rgb_gt.txt')
with open(gt_path, 'r') as f:
rgb_gts = f.readlines()
rgb_gt_dicts = {}
for rgb_gt in rgb_gts:
rgb_gt_dict, frame_id = decode_gt(rgb_gt)
rgb_gt_dicts[frame_id] = rgb_gt_dict
if len(rgb_gt_dict) != 0:
vid_train_frames.append(frame_id)
trained_frames += len(vid_train_frames)
if split in ['val', 'val_train', 'test']:
video = dict(
id=records['vid_id'],
name=video_name,
vid_train_frames=[])
else:
video = dict(
id=records['vid_id'],
name=video_name,
vid_train_frames=vid_train_frames)
VID['videos'].append(video)
for frame_id in vid_train_frames:
image_path = os.path.join(_video, 'rgb', '{:06d}.jpg'.format(frame_id))
try:
img = cv.imread(image_path)
size = img.shape
height, width, _ = size
except Exception as e:
print(e)
print(image_path)
image = dict(
file_name=os.path.join(*image_path.split('/')[-5:]),
height=height,
width=width,
id=records['img_id'],
frame_id=frame_id,
video_id=records['vid_id'],
is_vid_train_frame=True)
VID['images'].append(image)
if split != 'test ':
obj_annos = rgb_gt_dicts[frame_id]
for obj_anno in obj_annos.values():
x1, y1, x2, y2 = obj_anno
w, h = x2 - x1, y2 - y1
x1, y1, w, h = int(x1), int(y1), int(w), int(h)
ann = dict(
id=records['ann_id'],
video_id=records['vid_id'],
image_id=records['img_id'],
category_id=1,
instance_id=-1,
bbox=[x1, y1, w, h],
area=w * h,
iscrowd=False,
occluded=False,
generated=False)
VID['annotations'].append(ann)
records['ann_id'] += 1
records['img_id'] += 1
records['vid_id'] += 1
assert trained_frames == records['img_id'] - 1
with open(f'sky_data_vid_{split}.json', 'w') as f:
json.dump(VID, f)
if __name__ == '__main__':
categories = []
for k, v in enumerate(CLASSES, 1):
categories.append(
dict(id=k, name=v, encode_name=CLASSES_ENCODES[k - 1]))
VID_train = defaultdict(list)
VID_train['categories'] = categories
# root = '/home/baode/project/data/sky_data1'
root = '/home/fty/Documents/fty/mvsod/data/sky_data3'
# conver2coco(root, VID_train, split='train')
conver2coco(root, VID_train, split='val')