project1/datasets/transforms_multi.py

531 lines
18 KiB
Python

"""
Transforms and data augmentation for sequence level images, bboxes and masks.
"""
import random
import PIL
import torch
import torchvision.transforms as T
import torchvision.transforms.functional as F
from util.box_ops import box_xyxy_to_cxcywh, box_iou
from util.misc import interpolate
import numpy as np
from numpy import random as rand
from PIL import Image
import cv2
def bbox_overlaps(bboxes1, bboxes2, mode='iou', eps=1e-6):
assert mode in ['iou', 'iof']
bboxes1 = bboxes1.astype(np.float32)
bboxes2 = bboxes2.astype(np.float32)
rows = bboxes1.shape[0]
cols = bboxes2.shape[0]
ious = np.zeros((rows, cols), dtype=np.float32)
if rows * cols == 0:
return ious
exchange = False
if bboxes1.shape[0] > bboxes2.shape[0]:
bboxes1, bboxes2 = bboxes2, bboxes1
ious = np.zeros((cols, rows), dtype=np.float32)
exchange = True
area1 = (bboxes1[:, 2] - bboxes1[:, 0]) * (bboxes1[:, 3] - bboxes1[:, 1])
area2 = (bboxes2[:, 2] - bboxes2[:, 0]) * (bboxes2[:, 3] - bboxes2[:, 1])
for i in range(bboxes1.shape[0]):
x_start = np.maximum(bboxes1[i, 0], bboxes2[:, 0])
y_start = np.maximum(bboxes1[i, 1], bboxes2[:, 1])
x_end = np.minimum(bboxes1[i, 2], bboxes2[:, 2])
y_end = np.minimum(bboxes1[i, 3], bboxes2[:, 3])
overlap = np.maximum(x_end - x_start, 0) * np.maximum(y_end - y_start, 0)
if mode == 'iou':
union = area1[i] + area2 - overlap
else:
union = area1[i] if not exchange else area2
union = np.maximum(union, eps)
ious[i, :] = overlap / union
if exchange:
ious = ious.T
return ious
def crop(clip, target, region):
cropped_image = []
for image in clip:
cropped_image.append(F.crop(image, *region))
target = target.copy()
i, j, h, w = region
# should we do something wrt the original size?
target["size"] = torch.tensor([h, w])
fields = ["labels", "area", "iscrowd"]
if "boxes" in target:
boxes = target["boxes"]
max_size = torch.as_tensor([w, h], dtype=torch.float32)
cropped_boxes = boxes - torch.as_tensor([j, i, j, i])
cropped_boxes = torch.min(cropped_boxes.reshape(-1, 2, 2), max_size)
cropped_boxes = cropped_boxes.clamp(min=0)
area = (cropped_boxes[:, 1, :] - cropped_boxes[:, 0, :]).prod(dim=1)
target["boxes"] = cropped_boxes.reshape(-1, 4)
target["area"] = area
fields.append("boxes")
if "masks" in target:
# FIXME should we update the area here if there are no boxes?
target['masks'] = target['masks'][:, i:i + h, j:j + w]
fields.append("masks")
return cropped_image, target
def hflip(clip, target):
flipped_image = []
for image in clip:
flipped_image.append(F.hflip(image))
w, h = clip[0].size
target = target.copy()
if "boxes" in target:
boxes = target["boxes"]
boxes = boxes[:, [2, 1, 0, 3]] * torch.as_tensor([-1, 1, -1, 1]) + torch.as_tensor([w, 0, w, 0])
target["boxes"] = boxes
if "masks" in target:
target['masks'] = target['masks'].flip(-1)
return flipped_image, target
def vflip(image,target):
flipped_image = []
for image in clip:
flipped_image.append(F.vflip(image))
w, h = clip[0].size
target = target.copy()
if "boxes" in target:
boxes = target["boxes"]
boxes = boxes[:, [0, 3, 2, 1]] * torch.as_tensor([1, -1, 1, -1]) + torch.as_tensor([0, h, 0, h])
target["boxes"] = boxes
if "masks" in target:
target['masks'] = target['masks'].flip(1)
return flipped_image, target
def resize(clip, target, size, max_size=None):
# size can be min_size (scalar) or (w, h) tuple
def get_size_with_aspect_ratio(image_size, size, max_size=None):
w, h = image_size
if max_size is not None:
min_original_size = float(min((w, h)))
max_original_size = float(max((w, h)))
if max_original_size / min_original_size * size > max_size:
size = int(round(max_size * min_original_size / max_original_size))
if (w <= h and w == size) or (h <= w and h == size):
return (h, w)
if w < h:
ow = size
oh = int(size * h / w)
else:
oh = size
ow = int(size * w / h)
return (oh, ow)
def get_size(image_size, size, max_size=None):
if isinstance(size, (list, tuple)):
return size[::-1]
else:
return get_size_with_aspect_ratio(image_size, size, max_size)
size = get_size(clip[0].size, size, max_size)
rescaled_image = []
for image in clip:
rescaled_image.append(F.resize(image, size))
if target is None:
return rescaled_image, None
ratios = tuple(float(s) / float(s_orig) for s, s_orig in zip(rescaled_image[0].size, clip[0].size))
ratio_width, ratio_height = ratios
target = target.copy()
if "boxes" in target:
boxes = target["boxes"]
scaled_boxes = boxes * torch.as_tensor([ratio_width, ratio_height, ratio_width, ratio_height])
target["boxes"] = scaled_boxes
if "area" in target:
area = target["area"]
scaled_area = area * (ratio_width * ratio_height)
target["area"] = scaled_area
h, w = size
target["size"] = torch.tensor([h, w])
if "masks" in target:
if target['masks'].shape[0]>0:
target['masks'] = interpolate(
target['masks'][:, None].float(), size, mode="nearest")[:, 0] > 0.5
else:
target['masks'] = torch.zeros((target['masks'].shape[0],h,w))
return rescaled_image, target
def pad(clip, target, padding):
# assumes that we only pad on the bottom right corners
padded_image = []
for image in clip:
padded_image.append(F.pad(image, (0, 0, padding[0], padding[1])))
if target is None:
return padded_image, None
target = target.copy()
# should we do something wrt the original size?
target["size"] = torch.tensor(padded_image[0].size[::-1])
if "masks" in target:
target['masks'] = torch.nn.functional.pad(target['masks'], (0, padding[0], 0, padding[1]))
return padded_image, target
class RandomCrop(object):
def __init__(self, size):
self.size = size
def __call__(self, img, target):
region = T.RandomCrop.get_params(img, self.size)
return crop(img, target, region)
class RandomSizeCrop(object):
def __init__(self, min_size: int, max_size: int):
self.min_size = min_size
self.max_size = max_size
def __call__(self, img: PIL.Image.Image, target: dict):
w = random.randint(self.min_size, min(img[0].width, self.max_size))
h = random.randint(self.min_size, min(img[0].height, self.max_size))
region = T.RandomCrop.get_params(img[0], [h, w])
return crop(img, target, region)
class CenterCrop(object):
def __init__(self, size):
self.size = size
def __call__(self, img, target):
image_width, image_height = img.size
crop_height, crop_width = self.size
crop_top = int(round((image_height - crop_height) / 2.))
crop_left = int(round((image_width - crop_width) / 2.))
return crop(img, target, (crop_top, crop_left, crop_height, crop_width))
class MinIoURandomCrop(object):
def __init__(self, min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3):
self.min_ious = min_ious
self.sample_mode = (1, *min_ious, 0)
self.min_crop_size = min_crop_size
def __call__(self, img, target):
w,h = img.size
while True:
mode = random.choice(self.sample_mode)
self.mode = mode
if mode == 1:
return img,target
min_iou = mode
boxes = target['boxes'].numpy()
labels = target['labels']
for i in range(50):
new_w = rand.uniform(self.min_crop_size * w, w)
new_h = rand.uniform(self.min_crop_size * h, h)
if new_h / new_w < 0.5 or new_h / new_w > 2:
continue
left = rand.uniform(w - new_w)
top = rand.uniform(h - new_h)
patch = np.array((int(left), int(top), int(left + new_w), int(top + new_h)))
if patch[2] == patch[0] or patch[3] == patch[1]:
continue
overlaps = bbox_overlaps(patch.reshape(-1, 4), boxes.reshape(-1, 4)).reshape(-1)
if len(overlaps) > 0 and overlaps.min() < min_iou:
continue
if len(overlaps) > 0:
def is_center_of_bboxes_in_patch(boxes, patch):
center = (boxes[:, :2] + boxes[:, 2:]) / 2
mask = ((center[:, 0] > patch[0]) * (center[:, 1] > patch[1]) * (center[:, 0] < patch[2]) * (center[:, 1] < patch[3]))
return mask
mask = is_center_of_bboxes_in_patch(boxes, patch)
if False in mask:
continue
#TODO: use no center boxes
#if not mask.any():
# continue
boxes[:, 2:] = boxes[:, 2:].clip(max=patch[2:])
boxes[:, :2] = boxes[:, :2].clip(min=patch[:2])
boxes -= np.tile(patch[:2], 2)
target['boxes'] = torch.tensor(boxes)
img = np.asarray(img)[patch[1]:patch[3], patch[0]:patch[2]]
img = Image.fromarray(img)
width, height = img.size
target['orig_size'] = torch.tensor([height,width])
target['size'] = torch.tensor([height,width])
return img,target
class RandomContrast(object):
def __init__(self, lower=0.5, upper=1.5):
self.lower = lower
self.upper = upper
assert self.upper >= self.lower, "contrast upper must be >= lower."
assert self.lower >= 0, "contrast lower must be non-negative."
def __call__(self, image, target):
if rand.randint(2):
alpha = rand.uniform(self.lower, self.upper)
image *= alpha
return image, target
class RandomBrightness(object):
def __init__(self, delta=32):
assert delta >= 0.0
assert delta <= 255.0
self.delta = delta
def __call__(self, image, target):
if rand.randint(2):
delta = rand.uniform(-self.delta, self.delta)
image += delta
return image, target
class RandomSaturation(object):
def __init__(self, lower=0.5, upper=1.5):
self.lower = lower
self.upper = upper
assert self.upper >= self.lower, "contrast upper must be >= lower."
assert self.lower >= 0, "contrast lower must be non-negative."
def __call__(self, image, target):
if rand.randint(2):
image[:, :, 1] *= rand.uniform(self.lower, self.upper)
return image, target
class RandomHue(object): #
def __init__(self, delta=18.0):
assert delta >= 0.0 and delta <= 360.0
self.delta = delta
def __call__(self, image, target):
if rand.randint(2):
image[:, :, 0] += rand.uniform(-self.delta, self.delta)
image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0
image[:, :, 0][image[:, :, 0] < 0.0] += 360.0
return image, target
class RandomLightingNoise(object):
def __init__(self):
self.perms = ((0, 1, 2), (0, 2, 1),
(1, 0, 2), (1, 2, 0),
(2, 0, 1), (2, 1, 0))
def __call__(self, image, target):
if rand.randint(2):
swap = self.perms[rand.randint(len(self.perms))]
shuffle = SwapChannels(swap) # shuffle channels
image = shuffle(image)
return image, target
class ConvertColor(object):
def __init__(self, current='BGR', transform='HSV'):
self.transform = transform
self.current = current
def __call__(self, image, target):
if self.current == 'BGR' and self.transform == 'HSV':
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
elif self.current == 'HSV' and self.transform == 'BGR':
image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)
else:
raise NotImplementedError
return image, target
class SwapChannels(object):
def __init__(self, swaps):
self.swaps = swaps
def __call__(self, image):
image = image[:, :, self.swaps]
return image
class PhotometricDistort(object):
def __init__(self):
self.pd = [
RandomContrast(),
ConvertColor(transform='HSV'),
RandomSaturation(),
RandomHue(),
ConvertColor(current='HSV', transform='BGR'),
RandomContrast()
]
self.rand_brightness = RandomBrightness()
self.rand_light_noise = RandomLightingNoise()
def __call__(self,clip,target):
imgs = []
for img in clip:
img = np.asarray(img).astype('float32')
img, target = self.rand_brightness(img, target)
if rand.randint(2):
distort = Compose(self.pd[:-1])
else:
distort = Compose(self.pd[1:])
img, target = distort(img, target)
img, target = self.rand_light_noise(img, target)
imgs.append(Image.fromarray(img.astype('uint8')))
return imgs, target
#NOTICE: if used for mask, need to change
class Expand(object):
def __init__(self, mean):
self.mean = mean
def __call__(self, clip, target):
if rand.randint(2):
return clip,target
imgs = []
masks = []
image = np.asarray(clip[0]).astype('float32')
height, width, depth = image.shape
ratio = rand.uniform(1, 4)
left = rand.uniform(0, width*ratio - width)
top = rand.uniform(0, height*ratio - height)
for i in range(len(clip)):
image = np.asarray(clip[i]).astype('float32')
expand_image = np.zeros((int(height*ratio), int(width*ratio), depth),dtype=image.dtype)
expand_image[:, :, :] = self.mean
expand_image[int(top):int(top + height),int(left):int(left + width)] = image
imgs.append(Image.fromarray(expand_image.astype('uint8')))
expand_mask = torch.zeros((int(height*ratio), int(width*ratio)),dtype=torch.uint8)
expand_mask[int(top):int(top + height),int(left):int(left + width)] = target['masks'][i]
masks.append(expand_mask)
boxes = target['boxes'].numpy()
boxes[:, :2] += (int(left), int(top))
boxes[:, 2:] += (int(left), int(top))
target['boxes'] = torch.tensor(boxes)
target['masks']=torch.stack(masks)
return imgs, target
class RandomHorizontalFlip(object):
def __init__(self, p=0.5):
self.p = p
def __call__(self, img, target):
if random.random() < self.p:
return hflip(img, target)
return img, target
class RandomVerticalFlip(object):
def __init__(self, p=0.5):
self.p = p
def __call__(self, img, target):
if random.random() < self.p:
return vflip(img, target)
return img, target
class RandomResize(object):
def __init__(self, sizes, max_size=None):
assert isinstance(sizes, (list, tuple))
self.sizes = sizes
self.max_size = max_size
def __call__(self, img, target=None):
size = random.choice(self.sizes)
return resize(img, target, size, self.max_size)
class RandomPad(object):
def __init__(self, max_pad):
self.max_pad = max_pad
def __call__(self, img, target):
pad_x = random.randint(0, self.max_pad)
pad_y = random.randint(0, self.max_pad)
return pad(img, target, (pad_x, pad_y))
class RandomSelect(object):
"""
Randomly selects between transforms1 and transforms2,
with probability p for transforms1 and (1 - p) for transforms2
"""
def __init__(self, transforms1, transforms2, p=0.5):
self.transforms1 = transforms1
self.transforms2 = transforms2
self.p = p
def __call__(self, img, target):
if random.random() < self.p:
return self.transforms1(img, target)
return self.transforms2(img, target)
class ToTensor(object):
def __call__(self, clip, target):
img = []
for im in clip:
img.append(F.to_tensor(im))
return img, target
class RandomErasing(object):
def __init__(self, *args, **kwargs):
self.eraser = T.RandomErasing(*args, **kwargs)
def __call__(self, img, target):
return self.eraser(img), target
class Normalize(object):
def __init__(self, mean, std):
self.mean = mean
self.std = std
def __call__(self, clip, target=None):
image = []
for im in clip:
image.append(F.normalize(im, mean=self.mean, std=self.std))
if target is None:
return image, None
target = target.copy()
h, w = image[0].shape[-2:]
if "boxes" in target:
boxes = target["boxes"]
boxes = box_xyxy_to_cxcywh(boxes)
boxes = boxes / torch.tensor([w, h, w, h], dtype=torch.float32)
target["boxes"] = boxes
return image, target
class Compose(object):
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, image, target):
for t in self.transforms:
image, target = t(image, target)
return image, target
def __repr__(self):
format_string = self.__class__.__name__ + "("
for t in self.transforms:
format_string += "\n"
format_string += " {0}".format(t)
format_string += "\n)"
return format_string