project1/tools/covert2coco.py

136 lines
4.2 KiB
Python
Raw Permalink Normal View History

2024-11-20 12:20:08 +08:00
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
@Time : 11:01AM
@Author : Zhao kunlong
@Description:
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
import json
import os
import cv2 as cv
from glob import glob
from collections import defaultdict
from tqdm import tqdm
CLASSES = ('uav',)
CLASSES_ENCODES = ('None',)
cats_id_maps = {}
for k, v in enumerate(CLASSES_ENCODES, 1):
cats_id_maps[v] = k
def decode_gt(gts: str):
ir_gt = gts.split('\n')[0].split(',')
obj_num = int(ir_gt[1])
frame_id = int(ir_gt[0])
ir_gt_dict = {}
for j in range(obj_num):
obj_id = int(ir_gt[2 + j * 6])
ir_gt_dict[obj_id] = [float(v) for v in ir_gt[3 + j * 6:7 + j * 6]]
return ir_gt_dict, frame_id
def conver2coco(root, VID:defaultdict(list), split:str='train'):
records = dict(
vid_id=1,
img_id=1,
ann_id=1,
global_instance_id=1,
num_vid_train_frames=0,
num_no_objects=0)
trained_frames = 0
videos = glob(os.path.join(root, split,'*'))
pdr = tqdm(videos, desc='processing...')
for _video in pdr:
video_name = os.path.join(*_video.split('/')[-3:])
vid_train_frames = []
gt_path = os.path.join(_video, 'rgb_gt.txt')
with open(gt_path, 'r') as f:
rgb_gts = f.readlines()
rgb_gt_dicts = {}
for rgb_gt in rgb_gts:
rgb_gt_dict, frame_id = decode_gt(rgb_gt)
rgb_gt_dicts[frame_id] = rgb_gt_dict
if len(rgb_gt_dict) != 0:
vid_train_frames.append(frame_id)
trained_frames += len(vid_train_frames)
if split in ['val', 'val_train', 'test']:
video = dict(
id=records['vid_id'],
name=video_name,
vid_train_frames=[])
else:
video = dict(
id=records['vid_id'],
name=video_name,
vid_train_frames=vid_train_frames)
VID['videos'].append(video)
for frame_id in vid_train_frames:
image_path = os.path.join(_video, 'rgb', '{:06d}.jpg'.format(frame_id))
try:
img = cv.imread(image_path)
size = img.shape
height, width, _ = size
except Exception as e:
print(e)
print(image_path)
image = dict(
file_name=os.path.join(*image_path.split('/')[-5:]),
height=height,
width=width,
id=records['img_id'],
frame_id=frame_id,
video_id=records['vid_id'],
is_vid_train_frame=True)
VID['images'].append(image)
if split != 'test ':
obj_annos = rgb_gt_dicts[frame_id]
for obj_anno in obj_annos.values():
x1, y1, x2, y2 = obj_anno
w, h = x2 - x1, y2 - y1
x1, y1, w, h = int(x1), int(y1), int(w), int(h)
ann = dict(
id=records['ann_id'],
video_id=records['vid_id'],
image_id=records['img_id'],
category_id=1,
instance_id=-1,
bbox=[x1, y1, w, h],
area=w * h,
iscrowd=False,
occluded=False,
generated=False)
VID['annotations'].append(ann)
records['ann_id'] += 1
records['img_id'] += 1
records['vid_id'] += 1
assert trained_frames == records['img_id'] - 1
with open(f'sky_data_vid_{split}.json', 'w') as f:
json.dump(VID, f)
if __name__ == '__main__':
categories = []
for k, v in enumerate(CLASSES, 1):
categories.append(
dict(id=k, name=v, encode_name=CLASSES_ENCODES[k - 1]))
VID_train = defaultdict(list)
VID_train['categories'] = categories
# root = '/home/baode/project/data/sky_data1'
root = '/home/fty/Documents/fty/mvsod/data/sky_data3'
# conver2coco(root, VID_train, split='train')
conver2coco(root, VID_train, split='val')