PulseFocusPlatform/static/ppdet/modeling/ops.py

1711 lines
64 KiB
Python
Raw Normal View History

2022-06-01 11:18:00 +08:00
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from numbers import Integral
import math
import six
import paddle
from paddle import fluid
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.initializer import NumpyArrayInitializer
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.regularizer import L2Decay
from ppdet.core.workspace import register, serializable
from ppdet.utils.bbox_utils import bbox_overlaps, box_to_delta
__all__ = [
'AnchorGenerator', 'AnchorGrid', 'DropBlock', 'RPNTargetAssign',
'GenerateProposals', 'MultiClassNMS', 'BBoxAssigner', 'MaskAssigner',
'RoIAlign', 'RoIPool', 'MultiBoxHead', 'SSDLiteMultiBoxHead',
'SSDOutputDecoder', 'RetinaTargetAssign', 'RetinaOutputDecoder', 'ConvNorm',
'DeformConvNorm', 'MultiClassSoftNMS', 'MatrixNMS', 'LibraBBoxAssigner',
'DeformConv'
]
def _conv_offset(input, filter_size, stride, padding, act=None, name=None):
out_channel = filter_size * filter_size * 3
out = fluid.layers.conv2d(
input,
num_filters=out_channel,
filter_size=filter_size,
stride=stride,
padding=padding,
param_attr=ParamAttr(
initializer=fluid.initializer.Constant(0), name=name + ".w_0"),
bias_attr=ParamAttr(
initializer=fluid.initializer.Constant(0),
learning_rate=2.,
regularizer=L2Decay(0.),
name=name + ".b_0"),
act=act,
name=name)
return out
def DeformConv(input,
num_filters,
filter_size,
stride=1,
groups=1,
dilation=1,
lr_scale=1,
initializer=None,
bias_attr=False,
name=None):
if bias_attr:
bias_para = ParamAttr(
name=name + "_bias",
initializer=fluid.initializer.Constant(0),
regularizer=L2Decay(0.),
learning_rate=lr_scale * 2)
else:
bias_para = False
offset_mask = _conv_offset(
input=input,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
act=None,
name=name + "_conv_offset")
offset_channel = filter_size**2 * 2
mask_channel = filter_size**2
offset, mask = fluid.layers.split(
input=offset_mask,
num_or_sections=[offset_channel, mask_channel],
dim=1)
mask = fluid.layers.sigmoid(mask)
conv = fluid.layers.deformable_conv(
input=input,
offset=offset,
mask=mask,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2 * dilation,
dilation=dilation,
groups=groups,
deformable_groups=1,
im2col_step=1,
param_attr=ParamAttr(
name=name + "_weights",
initializer=initializer,
learning_rate=lr_scale),
bias_attr=bias_para,
name=name + ".conv2d.output.1")
return conv
def DeformConvNorm(input,
num_filters,
filter_size,
stride=1,
groups=1,
norm_decay=0.,
norm_type='affine_channel',
norm_groups=32,
dilation=1,
lr_scale=1,
freeze_norm=False,
act=None,
norm_name=None,
initializer=None,
bias_attr=False,
name=None):
assert norm_type in ['bn', 'sync_bn', 'affine_channel', 'gn']
conv = DeformConv(input, num_filters, filter_size, stride, groups, dilation,
lr_scale, initializer, bias_attr, name)
norm_lr = 0. if freeze_norm else 1.
pattr = ParamAttr(
name=norm_name + '_scale',
learning_rate=norm_lr * lr_scale,
regularizer=L2Decay(norm_decay))
battr = ParamAttr(
name=norm_name + '_offset',
learning_rate=norm_lr * lr_scale,
regularizer=L2Decay(norm_decay))
if norm_type in ['bn', 'sync_bn']:
global_stats = True if freeze_norm else False
out = fluid.layers.batch_norm(
input=conv,
act=act,
name=norm_name + '.output.1',
param_attr=pattr,
bias_attr=battr,
moving_mean_name=norm_name + '_mean',
moving_variance_name=norm_name + '_variance',
use_global_stats=global_stats)
scale = fluid.framework._get_var(pattr.name)
bias = fluid.framework._get_var(battr.name)
elif norm_type == 'gn':
out = fluid.layers.group_norm(
input=conv,
act=act,
name=norm_name + '.output.1',
groups=norm_groups,
param_attr=pattr,
bias_attr=battr)
scale = fluid.framework._get_var(pattr.name)
bias = fluid.framework._get_var(battr.name)
elif norm_type == 'affine_channel':
scale = fluid.layers.create_parameter(
shape=[conv.shape[1]],
dtype=conv.dtype,
attr=pattr,
default_initializer=fluid.initializer.Constant(1.))
bias = fluid.layers.create_parameter(
shape=[conv.shape[1]],
dtype=conv.dtype,
attr=battr,
default_initializer=fluid.initializer.Constant(0.))
out = fluid.layers.affine_channel(
x=conv, scale=scale, bias=bias, act=act)
if freeze_norm:
scale.stop_gradient = True
bias.stop_gradient = True
return out
def ConvNorm(input,
num_filters,
filter_size,
stride=1,
groups=1,
norm_decay=0.,
norm_type='affine_channel',
norm_groups=32,
dilation=1,
lr_scale=1,
freeze_norm=False,
act=None,
norm_name=None,
initializer=None,
bias_attr=False,
name=None):
fan = num_filters
if bias_attr:
bias_para = ParamAttr(
name=name + "_bias",
initializer=fluid.initializer.Constant(value=0),
learning_rate=lr_scale * 2)
else:
bias_para = False
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=((filter_size - 1) // 2) * dilation,
dilation=dilation,
groups=groups,
act=None,
param_attr=ParamAttr(
name=name + "_weights",
initializer=initializer,
learning_rate=lr_scale),
bias_attr=bias_para,
name=name + '.conv2d.output.1')
norm_lr = 0. if freeze_norm else 1.
pattr = ParamAttr(
name=norm_name + '_scale',
learning_rate=norm_lr * lr_scale,
regularizer=L2Decay(norm_decay))
battr = ParamAttr(
name=norm_name + '_offset',
learning_rate=norm_lr * lr_scale,
regularizer=L2Decay(norm_decay))
if norm_type in ['bn', 'sync_bn']:
global_stats = True if freeze_norm else False
out = fluid.layers.batch_norm(
input=conv,
act=act,
name=norm_name + '.output.1',
param_attr=pattr,
bias_attr=battr,
moving_mean_name=norm_name + '_mean',
moving_variance_name=norm_name + '_variance',
use_global_stats=global_stats)
scale = fluid.framework._get_var(pattr.name)
bias = fluid.framework._get_var(battr.name)
elif norm_type == 'gn':
out = fluid.layers.group_norm(
input=conv,
act=act,
name=norm_name + '.output.1',
groups=norm_groups,
param_attr=pattr,
bias_attr=battr)
scale = fluid.framework._get_var(pattr.name)
bias = fluid.framework._get_var(battr.name)
elif norm_type == 'affine_channel':
scale = fluid.layers.create_parameter(
shape=[conv.shape[1]],
dtype=conv.dtype,
attr=pattr,
default_initializer=fluid.initializer.Constant(1.))
bias = fluid.layers.create_parameter(
shape=[conv.shape[1]],
dtype=conv.dtype,
attr=battr,
default_initializer=fluid.initializer.Constant(0.))
out = fluid.layers.affine_channel(
x=conv, scale=scale, bias=bias, act=act)
if freeze_norm:
scale.stop_gradient = True
bias.stop_gradient = True
return out
def DropBlock(input, block_size, keep_prob, is_test):
if is_test:
return input
def CalculateGamma(input, block_size, keep_prob):
input_shape = fluid.layers.shape(input)
feat_shape_tmp = fluid.layers.slice(input_shape, [0], [3], [4])
feat_shape_tmp = fluid.layers.cast(feat_shape_tmp, dtype="float32")
feat_shape_t = fluid.layers.reshape(feat_shape_tmp, [1, 1, 1, 1])
feat_area = fluid.layers.pow(feat_shape_t, factor=2)
block_shape_t = fluid.layers.fill_constant(
shape=[1, 1, 1, 1], value=block_size, dtype='float32')
block_area = fluid.layers.pow(block_shape_t, factor=2)
useful_shape_t = feat_shape_t - block_shape_t + 1
useful_area = fluid.layers.pow(useful_shape_t, factor=2)
upper_t = feat_area * (1 - keep_prob)
bottom_t = block_area * useful_area
output = upper_t / bottom_t
return output
gamma = CalculateGamma(input, block_size=block_size, keep_prob=keep_prob)
input_shape = fluid.layers.shape(input)
p = fluid.layers.expand_as(gamma, input)
input_shape_tmp = fluid.layers.cast(input_shape, dtype="int64")
random_matrix = fluid.layers.uniform_random(
input_shape_tmp, dtype='float32', min=0.0, max=1.0)
one_zero_m = fluid.layers.less_than(random_matrix, p)
one_zero_m.stop_gradient = True
one_zero_m = fluid.layers.cast(one_zero_m, dtype="float32")
mask_flag = fluid.layers.pool2d(
one_zero_m,
pool_size=block_size,
pool_type='max',
pool_stride=1,
pool_padding=block_size // 2)
mask = 1.0 - mask_flag
elem_numel = fluid.layers.reduce_prod(input_shape)
elem_numel_m = fluid.layers.cast(elem_numel, dtype="float32")
elem_numel_m.stop_gradient = True
elem_sum = fluid.layers.reduce_sum(mask)
elem_sum_m = fluid.layers.cast(elem_sum, dtype="float32")
elem_sum_m.stop_gradient = True
output = fluid.layers.elementwise_mul(input,
mask) * elem_numel_m / elem_sum_m
return output
@register
@serializable
class AnchorGenerator(object):
__op__ = fluid.layers.anchor_generator
__append_doc__ = True
def __init__(self,
stride=[16.0, 16.0],
anchor_sizes=[32, 64, 128, 256, 512],
aspect_ratios=[0.5, 1., 2.],
variance=[1., 1., 1., 1.]):
super(AnchorGenerator, self).__init__()
self.anchor_sizes = anchor_sizes
self.aspect_ratios = aspect_ratios
self.variance = variance
self.stride = stride
@register
@serializable
class AnchorGrid(object):
"""Generate anchor grid
Args:
image_size (int or list): input image size, may be a single integer or
list of [h, w]. Default: 512
min_level (int): min level of the feature pyramid. Default: 3
max_level (int): max level of the feature pyramid. Default: 7
anchor_base_scale: base anchor scale. Default: 4
num_scales: number of anchor scales. Default: 3
aspect_ratios: aspect ratios. default: [[1, 1], [1.4, 0.7], [0.7, 1.4]]
"""
def __init__(self,
image_size=512,
min_level=3,
max_level=7,
anchor_base_scale=4,
num_scales=3,
aspect_ratios=[[1, 1], [1.4, 0.7], [0.7, 1.4]]):
super(AnchorGrid, self).__init__()
if isinstance(image_size, Integral):
self.image_size = [image_size, image_size]
else:
self.image_size = image_size
for dim in self.image_size:
assert dim % 2 ** max_level == 0, \
"image size should be multiple of the max level stride"
self.min_level = min_level
self.max_level = max_level
self.anchor_base_scale = anchor_base_scale
self.num_scales = num_scales
self.aspect_ratios = aspect_ratios
@property
def base_cell(self):
if not hasattr(self, '_base_cell'):
self._base_cell = self.make_cell()
return self._base_cell
def make_cell(self):
scales = [2**(i / self.num_scales) for i in range(self.num_scales)]
scales = np.array(scales)
ratios = np.array(self.aspect_ratios)
ws = np.outer(scales, ratios[:, 0]).reshape(-1, 1)
hs = np.outer(scales, ratios[:, 1]).reshape(-1, 1)
anchors = np.hstack((-0.5 * ws, -0.5 * hs, 0.5 * ws, 0.5 * hs))
return anchors
def make_grid(self, stride):
cell = self.base_cell * stride * self.anchor_base_scale
x_steps = np.arange(stride // 2, self.image_size[1], stride)
y_steps = np.arange(stride // 2, self.image_size[0], stride)
offset_x, offset_y = np.meshgrid(x_steps, y_steps)
offset_x = offset_x.flatten()
offset_y = offset_y.flatten()
offsets = np.stack((offset_x, offset_y, offset_x, offset_y), axis=-1)
offsets = offsets[:, np.newaxis, :]
return (cell + offsets).reshape(-1, 4)
def generate(self):
return [
self.make_grid(2**l)
for l in range(self.min_level, self.max_level + 1)
]
def __call__(self):
if not hasattr(self, '_anchor_vars'):
anchor_vars = []
helper = LayerHelper('anchor_grid')
for idx, l in enumerate(range(self.min_level, self.max_level + 1)):
stride = 2**l
anchors = self.make_grid(stride)
var = helper.create_parameter(
attr=ParamAttr(name='anchors_{}'.format(idx)),
shape=anchors.shape,
dtype='float32',
stop_gradient=True,
default_initializer=NumpyArrayInitializer(anchors))
anchor_vars.append(var)
var.persistable = True
self._anchor_vars = anchor_vars
return self._anchor_vars
@register
@serializable
class RPNTargetAssign(object):
__op__ = fluid.layers.rpn_target_assign
__append_doc__ = True
def __init__(self,
rpn_batch_size_per_im=256,
rpn_straddle_thresh=0.,
rpn_fg_fraction=0.5,
rpn_positive_overlap=0.7,
rpn_negative_overlap=0.3,
use_random=True):
super(RPNTargetAssign, self).__init__()
self.rpn_batch_size_per_im = rpn_batch_size_per_im
self.rpn_straddle_thresh = rpn_straddle_thresh
self.rpn_fg_fraction = rpn_fg_fraction
self.rpn_positive_overlap = rpn_positive_overlap
self.rpn_negative_overlap = rpn_negative_overlap
self.use_random = use_random
@register
@serializable
class GenerateProposals(object):
__op__ = fluid.layers.generate_proposals
__append_doc__ = True
def __init__(self,
pre_nms_top_n=6000,
post_nms_top_n=1000,
nms_thresh=.5,
min_size=.1,
eta=1.):
super(GenerateProposals, self).__init__()
self.pre_nms_top_n = pre_nms_top_n
self.post_nms_top_n = post_nms_top_n
self.nms_thresh = nms_thresh
self.min_size = min_size
self.eta = eta
@register
class MaskAssigner(object):
__op__ = fluid.layers.generate_mask_labels
__append_doc__ = True
__shared__ = ['num_classes']
def __init__(self, num_classes=81, resolution=14):
super(MaskAssigner, self).__init__()
self.num_classes = num_classes
self.resolution = resolution
@register
@serializable
class MultiClassNMS(object):
__op__ = fluid.layers.multiclass_nms
__append_doc__ = True
def __init__(self,
score_threshold=.05,
nms_top_k=-1,
keep_top_k=100,
nms_threshold=.5,
normalized=False,
nms_eta=1.0,
background_label=0):
super(MultiClassNMS, self).__init__()
self.score_threshold = score_threshold
self.nms_top_k = nms_top_k
self.keep_top_k = keep_top_k
self.nms_threshold = nms_threshold
self.normalized = normalized
self.nms_eta = nms_eta
self.background_label = background_label
@register
@serializable
class MatrixNMS(object):
__op__ = 'paddle.fluid.layers.matrix_nms'
__append_doc__ = True
def __init__(self,
score_threshold=.05,
post_threshold=.05,
nms_top_k=-1,
keep_top_k=100,
use_gaussian=False,
gaussian_sigma=2.,
normalized=False,
background_label=0):
super(MatrixNMS, self).__init__()
self.score_threshold = score_threshold
self.post_threshold = post_threshold
self.nms_top_k = nms_top_k
self.keep_top_k = keep_top_k
self.normalized = normalized
self.use_gaussian = use_gaussian
self.gaussian_sigma = gaussian_sigma
self.background_label = background_label
@register
@serializable
class MultiClassSoftNMS(object):
def __init__(
self,
score_threshold=0.01,
keep_top_k=300,
softnms_sigma=0.5,
normalized=False,
background_label=0, ):
super(MultiClassSoftNMS, self).__init__()
self.score_threshold = score_threshold
self.keep_top_k = keep_top_k
self.softnms_sigma = softnms_sigma
self.normalized = normalized
self.background_label = background_label
def __call__(self, bboxes, scores):
def create_tmp_var(program, name, dtype, shape, lod_level):
return program.current_block().create_var(
name=name, dtype=dtype, shape=shape, lod_level=lod_level)
def _soft_nms_for_cls(dets, sigma, thres):
"""soft_nms_for_cls"""
dets_final = []
while len(dets) > 0:
maxpos = np.argmax(dets[:, 0])
dets_final.append(dets[maxpos].copy())
ts, tx1, ty1, tx2, ty2 = dets[maxpos]
scores = dets[:, 0]
# force remove bbox at maxpos
scores[maxpos] = -1
x1 = dets[:, 1]
y1 = dets[:, 2]
x2 = dets[:, 3]
y2 = dets[:, 4]
eta = 0 if self.normalized else 1
areas = (x2 - x1 + eta) * (y2 - y1 + eta)
xx1 = np.maximum(tx1, x1)
yy1 = np.maximum(ty1, y1)
xx2 = np.minimum(tx2, x2)
yy2 = np.minimum(ty2, y2)
w = np.maximum(0.0, xx2 - xx1 + eta)
h = np.maximum(0.0, yy2 - yy1 + eta)
inter = w * h
ovr = inter / (areas + areas[maxpos] - inter)
weight = np.exp(-(ovr * ovr) / sigma)
scores = scores * weight
idx_keep = np.where(scores >= thres)
dets[:, 0] = scores
dets = dets[idx_keep]
dets_final = np.array(dets_final).reshape(-1, 5)
return dets_final
def _soft_nms(bboxes, scores):
class_nums = scores.shape[-1]
softnms_thres = self.score_threshold
softnms_sigma = self.softnms_sigma
keep_top_k = self.keep_top_k
cls_boxes = [[] for _ in range(class_nums)]
cls_ids = [[] for _ in range(class_nums)]
start_idx = 1 if self.background_label == 0 else 0
for j in range(start_idx, class_nums):
inds = np.where(scores[:, j] >= softnms_thres)[0]
scores_j = scores[inds, j]
rois_j = bboxes[inds, j, :] if len(
bboxes.shape) > 2 else bboxes[inds, :]
dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype(
np.float32, copy=False)
cls_rank = np.argsort(-dets_j[:, 0])
dets_j = dets_j[cls_rank]
cls_boxes[j] = _soft_nms_for_cls(
dets_j, sigma=softnms_sigma, thres=softnms_thres)
cls_ids[j] = np.array([j] * cls_boxes[j].shape[0]).reshape(-1,
1)
cls_boxes = np.vstack(cls_boxes[start_idx:])
cls_ids = np.vstack(cls_ids[start_idx:])
pred_result = np.hstack([cls_ids, cls_boxes])
# Limit to max_per_image detections **over all classes**
image_scores = cls_boxes[:, 0]
if len(image_scores) > keep_top_k:
image_thresh = np.sort(image_scores)[-keep_top_k]
keep = np.where(cls_boxes[:, 0] >= image_thresh)[0]
pred_result = pred_result[keep, :]
return pred_result
def _batch_softnms(bboxes, scores):
batch_offsets = bboxes.lod()
bboxes = np.array(bboxes)
scores = np.array(scores)
out_offsets = [0]
pred_res = []
if len(batch_offsets) > 0:
batch_offset = batch_offsets[0]
for i in range(len(batch_offset) - 1):
s, e = batch_offset[i], batch_offset[i + 1]
pred = _soft_nms(bboxes[s:e], scores[s:e])
out_offsets.append(pred.shape[0] + out_offsets[-1])
pred_res.append(pred)
else:
assert len(bboxes.shape) == 3
assert len(scores.shape) == 3
for i in range(bboxes.shape[0]):
pred = _soft_nms(bboxes[i], scores[i])
out_offsets.append(pred.shape[0] + out_offsets[-1])
pred_res.append(pred)
res = fluid.LoDTensor()
res.set_lod([out_offsets])
if len(pred_res) == 0:
pred_res = np.array([[1]], dtype=np.float32)
res.set(np.vstack(pred_res).astype(np.float32), fluid.CPUPlace())
return res
pred_result = create_tmp_var(
fluid.default_main_program(),
name='softnms_pred_result',
dtype='float32',
shape=[-1, 6],
lod_level=1)
fluid.layers.py_func(
func=_batch_softnms, x=[bboxes, scores], out=pred_result)
return pred_result
@register
@serializable
class MultiClassDiouNMS(object):
def __init__(
self,
score_threshold=0.05,
keep_top_k=100,
nms_threshold=0.5,
normalized=False,
background_label=0, ):
super(MultiClassDiouNMS, self).__init__()
self.score_threshold = score_threshold
self.nms_threshold = nms_threshold
self.keep_top_k = keep_top_k
self.normalized = normalized
self.background_label = background_label
def __call__(self, bboxes, scores):
def create_tmp_var(program, name, dtype, shape, lod_level):
return program.current_block().create_var(
name=name, dtype=dtype, shape=shape, lod_level=lod_level)
def _calc_diou_term(dets1, dets2):
eps = 1.e-10
eta = 0 if self.normalized else 1
x1, y1, x2, y2 = dets1[0], dets1[1], dets1[2], dets1[3]
x1g, y1g, x2g, y2g = dets2[0], dets2[1], dets2[2], dets2[3]
cx = (x1 + x2) / 2
cy = (y1 + y2) / 2
w = x2 - x1 + eta
h = y2 - y1 + eta
cxg = (x1g + x2g) / 2
cyg = (y1g + y2g) / 2
wg = x2g - x1g + eta
hg = y2g - y1g + eta
x2 = np.maximum(x1, x2)
y2 = np.maximum(y1, y2)
# A or B
xc1 = np.minimum(x1, x1g)
yc1 = np.minimum(y1, y1g)
xc2 = np.maximum(x2, x2g)
yc2 = np.maximum(y2, y2g)
# DIOU term
dist_intersection = (cx - cxg)**2 + (cy - cyg)**2
dist_union = (xc2 - xc1)**2 + (yc2 - yc1)**2
diou_term = (dist_intersection + eps) / (dist_union + eps)
return diou_term
def _diou_nms_for_cls(dets, thres):
"""_diou_nms_for_cls"""
scores = dets[:, 0]
x1 = dets[:, 1]
y1 = dets[:, 2]
x2 = dets[:, 3]
y2 = dets[:, 4]
eta = 0 if self.normalized else 1
areas = (x2 - x1 + eta) * (y2 - y1 + eta)
dt_num = dets.shape[0]
order = np.array(range(dt_num))
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + eta)
h = np.maximum(0.0, yy2 - yy1 + eta)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
diou_term = _calc_diou_term([x1[i], y1[i], x2[i], y2[i]], [
x1[order[1:]], y1[order[1:]], x2[order[1:]], y2[order[1:]]
])
inds = np.where(ovr - diou_term <= thres)[0]
order = order[inds + 1]
dets_final = dets[keep]
return dets_final
def _diou_nms(bboxes, scores):
bboxes = np.array(bboxes)
scores = np.array(scores)
class_nums = scores.shape[-1]
score_threshold = self.score_threshold
nms_threshold = self.nms_threshold
keep_top_k = self.keep_top_k
cls_boxes = [[] for _ in range(class_nums)]
cls_ids = [[] for _ in range(class_nums)]
start_idx = 1 if self.background_label == 0 else 0
for j in range(start_idx, class_nums):
inds = np.where(scores[:, j] >= score_threshold)[0]
scores_j = scores[inds, j]
rois_j = bboxes[inds, j, :]
dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype(
np.float32, copy=False)
cls_rank = np.argsort(-dets_j[:, 0])
dets_j = dets_j[cls_rank]
cls_boxes[j] = _diou_nms_for_cls(dets_j, thres=nms_threshold)
cls_ids[j] = np.array([j] * cls_boxes[j].shape[0]).reshape(-1,
1)
cls_boxes = np.vstack(cls_boxes[start_idx:])
cls_ids = np.vstack(cls_ids[start_idx:])
pred_result = np.hstack([cls_ids, cls_boxes]).astype(np.float32)
# Limit to max_per_image detections **over all classes**
image_scores = cls_boxes[:, 0]
if len(image_scores) > keep_top_k:
image_thresh = np.sort(image_scores)[-keep_top_k]
keep = np.where(cls_boxes[:, 0] >= image_thresh)[0]
pred_result = pred_result[keep, :]
res = fluid.LoDTensor()
res.set_lod([[0, pred_result.shape[0]]])
if pred_result.shape[0] == 0:
pred_result = np.array([[1]], dtype=np.float32)
res.set(pred_result, fluid.CPUPlace())
return res
pred_result = create_tmp_var(
fluid.default_main_program(),
name='diou_nms_pred_result',
dtype='float32',
shape=[-1, 6],
lod_level=0)
fluid.layers.py_func(
func=_diou_nms, x=[bboxes, scores], out=pred_result)
return pred_result
@register
class BBoxAssigner(object):
__op__ = fluid.layers.generate_proposal_labels
__append_doc__ = True
__shared__ = ['num_classes']
def __init__(self,
batch_size_per_im=512,
fg_fraction=.25,
fg_thresh=.5,
bg_thresh_hi=.5,
bg_thresh_lo=0.,
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
num_classes=81,
shuffle_before_sample=True):
super(BBoxAssigner, self).__init__()
self.batch_size_per_im = batch_size_per_im
self.fg_fraction = fg_fraction
self.fg_thresh = fg_thresh
self.bg_thresh_hi = bg_thresh_hi
self.bg_thresh_lo = bg_thresh_lo
self.bbox_reg_weights = bbox_reg_weights
self.class_nums = num_classes
self.use_random = shuffle_before_sample
@register
class LibraBBoxAssigner(object):
__shared__ = ['num_classes']
def __init__(self,
batch_size_per_im=512,
fg_fraction=.25,
fg_thresh=.5,
bg_thresh_hi=.5,
bg_thresh_lo=0.,
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
num_classes=81,
shuffle_before_sample=True,
is_cls_agnostic=False,
num_bins=3):
super(LibraBBoxAssigner, self).__init__()
self.batch_size_per_im = batch_size_per_im
self.fg_fraction = fg_fraction
self.fg_thresh = fg_thresh
self.bg_thresh_hi = bg_thresh_hi
self.bg_thresh_lo = bg_thresh_lo
self.bbox_reg_weights = bbox_reg_weights
self.class_nums = num_classes
self.use_random = shuffle_before_sample
self.is_cls_agnostic = is_cls_agnostic
self.num_bins = num_bins
def __call__(
self,
rpn_rois,
gt_classes,
is_crowd,
gt_boxes,
im_info, ):
return self.generate_proposal_label_libra(
rpn_rois=rpn_rois,
gt_classes=gt_classes,
is_crowd=is_crowd,
gt_boxes=gt_boxes,
im_info=im_info,
batch_size_per_im=self.batch_size_per_im,
fg_fraction=self.fg_fraction,
fg_thresh=self.fg_thresh,
bg_thresh_hi=self.bg_thresh_hi,
bg_thresh_lo=self.bg_thresh_lo,
bbox_reg_weights=self.bbox_reg_weights,
class_nums=self.class_nums,
use_random=self.use_random,
is_cls_agnostic=self.is_cls_agnostic,
is_cascade_rcnn=False)
def generate_proposal_label_libra(
self, rpn_rois, gt_classes, is_crowd, gt_boxes, im_info,
batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi,
bg_thresh_lo, bbox_reg_weights, class_nums, use_random,
is_cls_agnostic, is_cascade_rcnn):
num_bins = self.num_bins
def create_tmp_var(program, name, dtype, shape, lod_level=None):
return program.current_block().create_var(
name=name, dtype=dtype, shape=shape, lod_level=lod_level)
def _sample_pos(max_overlaps, max_classes, pos_inds, num_expected):
if len(pos_inds) <= num_expected:
return pos_inds
else:
unique_gt_inds = np.unique(max_classes[pos_inds])
num_gts = len(unique_gt_inds)
num_per_gt = int(round(num_expected / float(num_gts)) + 1)
sampled_inds = []
for i in unique_gt_inds:
inds = np.nonzero(max_classes == i)[0]
before_len = len(inds)
inds = list(set(inds) & set(pos_inds))
after_len = len(inds)
if len(inds) > num_per_gt:
inds = np.random.choice(
inds, size=num_per_gt, replace=False)
sampled_inds.extend(list(inds)) # combine as a new sampler
if len(sampled_inds) < num_expected:
num_extra = num_expected - len(sampled_inds)
extra_inds = np.array(
list(set(pos_inds) - set(sampled_inds)))
assert len(sampled_inds)+len(extra_inds) == len(pos_inds), \
"sum of sampled_inds({}) and extra_inds({}) length must be equal with pos_inds({})!".format(
len(sampled_inds), len(extra_inds), len(pos_inds))
if len(extra_inds) > num_extra:
extra_inds = np.random.choice(
extra_inds, size=num_extra, replace=False)
sampled_inds.extend(extra_inds.tolist())
elif len(sampled_inds) > num_expected:
sampled_inds = np.random.choice(
sampled_inds, size=num_expected, replace=False)
return sampled_inds
def sample_via_interval(max_overlaps, full_set, num_expected, floor_thr,
num_bins, bg_thresh_hi):
max_iou = max_overlaps.max()
iou_interval = (max_iou - floor_thr) / num_bins
per_num_expected = int(num_expected / num_bins)
sampled_inds = []
for i in range(num_bins):
start_iou = floor_thr + i * iou_interval
end_iou = floor_thr + (i + 1) * iou_interval
tmp_set = set(
np.where(
np.logical_and(max_overlaps >= start_iou, max_overlaps <
end_iou))[0])
tmp_inds = list(tmp_set & full_set)
if len(tmp_inds) > per_num_expected:
tmp_sampled_set = np.random.choice(
tmp_inds, size=per_num_expected, replace=False)
else:
tmp_sampled_set = np.array(tmp_inds, dtype=np.int)
sampled_inds.append(tmp_sampled_set)
sampled_inds = np.concatenate(sampled_inds)
if len(sampled_inds) < num_expected:
num_extra = num_expected - len(sampled_inds)
extra_inds = np.array(list(full_set - set(sampled_inds)))
assert len(sampled_inds)+len(extra_inds) == len(full_set), \
"sum of sampled_inds({}) and extra_inds({}) length must be equal with full_set({})!".format(
len(sampled_inds), len(extra_inds), len(full_set))
if len(extra_inds) > num_extra:
extra_inds = np.random.choice(
extra_inds, num_extra, replace=False)
sampled_inds = np.concatenate([sampled_inds, extra_inds])
return sampled_inds
def _sample_neg(max_overlaps,
max_classes,
neg_inds,
num_expected,
floor_thr=-1,
floor_fraction=0,
num_bins=3,
bg_thresh_hi=0.5):
if len(neg_inds) <= num_expected:
return neg_inds
else:
# balance sampling for negative samples
neg_set = set(neg_inds)
if floor_thr > 0:
floor_set = set(
np.where(
np.logical_and(max_overlaps >= 0, max_overlaps <
floor_thr))[0])
iou_sampling_set = set(
np.where(max_overlaps >= floor_thr)[0])
elif floor_thr == 0:
floor_set = set(np.where(max_overlaps == 0)[0])
iou_sampling_set = set(
np.where(max_overlaps > floor_thr)[0])
else:
floor_set = set()
iou_sampling_set = set(
np.where(max_overlaps > floor_thr)[0])
floor_thr = 0
floor_neg_inds = list(floor_set & neg_set)
iou_sampling_neg_inds = list(iou_sampling_set & neg_set)
num_expected_iou_sampling = int(num_expected *
(1 - floor_fraction))
if len(iou_sampling_neg_inds) > num_expected_iou_sampling:
if num_bins >= 2:
iou_sampled_inds = sample_via_interval(
max_overlaps,
set(iou_sampling_neg_inds),
num_expected_iou_sampling, floor_thr, num_bins,
bg_thresh_hi)
else:
iou_sampled_inds = np.random.choice(
iou_sampling_neg_inds,
size=num_expected_iou_sampling,
replace=False)
else:
iou_sampled_inds = np.array(
iou_sampling_neg_inds, dtype=np.int)
num_expected_floor = num_expected - len(iou_sampled_inds)
if len(floor_neg_inds) > num_expected_floor:
sampled_floor_inds = np.random.choice(
floor_neg_inds, size=num_expected_floor, replace=False)
else:
sampled_floor_inds = np.array(floor_neg_inds, dtype=np.int)
sampled_inds = np.concatenate(
(sampled_floor_inds, iou_sampled_inds))
if len(sampled_inds) < num_expected:
num_extra = num_expected - len(sampled_inds)
extra_inds = np.array(list(neg_set - set(sampled_inds)))
if len(extra_inds) > num_extra:
extra_inds = np.random.choice(
extra_inds, size=num_extra, replace=False)
sampled_inds = np.concatenate((sampled_inds, extra_inds))
return sampled_inds
def _sample_rois(rpn_rois, gt_classes, is_crowd, gt_boxes, im_info,
batch_size_per_im, fg_fraction, fg_thresh,
bg_thresh_hi, bg_thresh_lo, bbox_reg_weights,
class_nums, use_random, is_cls_agnostic,
is_cascade_rcnn):
rois_per_image = int(batch_size_per_im)
fg_rois_per_im = int(np.round(fg_fraction * rois_per_image))
# Roidb
im_scale = im_info[2]
inv_im_scale = 1. / im_scale
rpn_rois = rpn_rois * inv_im_scale
if is_cascade_rcnn:
rpn_rois = rpn_rois[gt_boxes.shape[0]:, :]
boxes = np.vstack([gt_boxes, rpn_rois])
gt_overlaps = np.zeros((boxes.shape[0], class_nums))
box_to_gt_ind_map = np.zeros((boxes.shape[0]), dtype=np.int32)
if len(gt_boxes) > 0:
proposal_to_gt_overlaps = bbox_overlaps(boxes, gt_boxes)
overlaps_argmax = proposal_to_gt_overlaps.argmax(axis=1)
overlaps_max = proposal_to_gt_overlaps.max(axis=1)
# Boxes which with non-zero overlap with gt boxes
overlapped_boxes_ind = np.where(overlaps_max > 0)[0]
overlapped_boxes_gt_classes = gt_classes[overlaps_argmax[
overlapped_boxes_ind]]
for idx in range(len(overlapped_boxes_ind)):
gt_overlaps[overlapped_boxes_ind[
idx], overlapped_boxes_gt_classes[idx]] = overlaps_max[
overlapped_boxes_ind[idx]]
box_to_gt_ind_map[overlapped_boxes_ind[
idx]] = overlaps_argmax[overlapped_boxes_ind[idx]]
crowd_ind = np.where(is_crowd)[0]
gt_overlaps[crowd_ind] = -1
max_overlaps = gt_overlaps.max(axis=1)
max_classes = gt_overlaps.argmax(axis=1)
# Cascade RCNN Decode Filter
if is_cascade_rcnn:
ws = boxes[:, 2] - boxes[:, 0] + 1
hs = boxes[:, 3] - boxes[:, 1] + 1
keep = np.where((ws > 0) & (hs > 0))[0]
boxes = boxes[keep]
max_overlaps = max_overlaps[keep]
fg_inds = np.where(max_overlaps >= fg_thresh)[0]
bg_inds = np.where((max_overlaps < bg_thresh_hi) & (
max_overlaps >= bg_thresh_lo))[0]
fg_rois_per_this_image = fg_inds.shape[0]
bg_rois_per_this_image = bg_inds.shape[0]
else:
# Foreground
fg_inds = np.where(max_overlaps >= fg_thresh)[0]
fg_rois_per_this_image = np.minimum(fg_rois_per_im,
fg_inds.shape[0])
# Sample foreground if there are too many
if fg_inds.shape[0] > fg_rois_per_this_image:
if use_random:
fg_inds = _sample_pos(max_overlaps, max_classes,
fg_inds, fg_rois_per_this_image)
fg_inds = fg_inds[:fg_rois_per_this_image]
# Background
bg_inds = np.where((max_overlaps < bg_thresh_hi) & (
max_overlaps >= bg_thresh_lo))[0]
bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
bg_inds.shape[0])
assert bg_rois_per_this_image >= 0, "bg_rois_per_this_image must be >= 0 but got {}".format(
bg_rois_per_this_image)
# Sample background if there are too many
if bg_inds.shape[0] > bg_rois_per_this_image:
if use_random:
# libra neg sample
bg_inds = _sample_neg(
max_overlaps,
max_classes,
bg_inds,
bg_rois_per_this_image,
num_bins=num_bins,
bg_thresh_hi=bg_thresh_hi)
bg_inds = bg_inds[:bg_rois_per_this_image]
keep_inds = np.append(fg_inds, bg_inds)
sampled_labels = max_classes[keep_inds] # N x 1
sampled_labels[fg_rois_per_this_image:] = 0
sampled_boxes = boxes[keep_inds] # N x 324
sampled_gts = gt_boxes[box_to_gt_ind_map[keep_inds]]
sampled_gts[fg_rois_per_this_image:, :] = gt_boxes[0]
bbox_label_targets = _compute_targets(
sampled_boxes, sampled_gts, sampled_labels, bbox_reg_weights)
bbox_targets, bbox_inside_weights = _expand_bbox_targets(
bbox_label_targets, class_nums, is_cls_agnostic)
bbox_outside_weights = np.array(
bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype)
# Scale rois
sampled_rois = sampled_boxes * im_scale
# Faster RCNN blobs
frcn_blobs = dict(
rois=sampled_rois,
labels_int32=sampled_labels,
bbox_targets=bbox_targets,
bbox_inside_weights=bbox_inside_weights,
bbox_outside_weights=bbox_outside_weights)
return frcn_blobs
def _compute_targets(roi_boxes, gt_boxes, labels, bbox_reg_weights):
assert roi_boxes.shape[0] == gt_boxes.shape[0]
assert roi_boxes.shape[1] == 4
assert gt_boxes.shape[1] == 4
targets = np.zeros(roi_boxes.shape)
bbox_reg_weights = np.asarray(bbox_reg_weights)
targets = box_to_delta(
ex_boxes=roi_boxes, gt_boxes=gt_boxes, weights=bbox_reg_weights)
return np.hstack([labels[:, np.newaxis], targets]).astype(
np.float32, copy=False)
def _expand_bbox_targets(bbox_targets_input, class_nums,
is_cls_agnostic):
class_labels = bbox_targets_input[:, 0]
fg_inds = np.where(class_labels > 0)[0]
bbox_targets = np.zeros((class_labels.shape[0], 4 * class_nums
if not is_cls_agnostic else 4 * 2))
bbox_inside_weights = np.zeros(bbox_targets.shape)
for ind in fg_inds:
class_label = int(class_labels[
ind]) if not is_cls_agnostic else 1
start_ind = class_label * 4
end_ind = class_label * 4 + 4
bbox_targets[ind, start_ind:end_ind] = bbox_targets_input[ind,
1:]
bbox_inside_weights[ind, start_ind:end_ind] = (1.0, 1.0, 1.0,
1.0)
return bbox_targets, bbox_inside_weights
def generate_func(
rpn_rois,
gt_classes,
is_crowd,
gt_boxes,
im_info, ):
rpn_rois_lod = rpn_rois.lod()[0]
gt_classes_lod = gt_classes.lod()[0]
# convert
rpn_rois = np.array(rpn_rois)
gt_classes = np.array(gt_classes)
is_crowd = np.array(is_crowd)
gt_boxes = np.array(gt_boxes)
im_info = np.array(im_info)
rois = []
labels_int32 = []
bbox_targets = []
bbox_inside_weights = []
bbox_outside_weights = []
lod = [0]
for idx in range(len(rpn_rois_lod) - 1):
rois_si = rpn_rois_lod[idx]
rois_ei = rpn_rois_lod[idx + 1]
gt_si = gt_classes_lod[idx]
gt_ei = gt_classes_lod[idx + 1]
frcn_blobs = _sample_rois(
rpn_rois[rois_si:rois_ei], gt_classes[gt_si:gt_ei],
is_crowd[gt_si:gt_ei], gt_boxes[gt_si:gt_ei], im_info[idx],
batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi,
bg_thresh_lo, bbox_reg_weights, class_nums, use_random,
is_cls_agnostic, is_cascade_rcnn)
lod.append(frcn_blobs['rois'].shape[0] + lod[-1])
rois.append(frcn_blobs['rois'])
labels_int32.append(frcn_blobs['labels_int32'].reshape(-1, 1))
bbox_targets.append(frcn_blobs['bbox_targets'])
bbox_inside_weights.append(frcn_blobs['bbox_inside_weights'])
bbox_outside_weights.append(frcn_blobs['bbox_outside_weights'])
rois = np.vstack(rois)
labels_int32 = np.vstack(labels_int32)
bbox_targets = np.vstack(bbox_targets)
bbox_inside_weights = np.vstack(bbox_inside_weights)
bbox_outside_weights = np.vstack(bbox_outside_weights)
# create lod-tensor for return
# notice that the func create_lod_tensor does not work well here
ret_rois = fluid.LoDTensor()
ret_rois.set_lod([lod])
ret_rois.set(rois.astype("float32"), fluid.CPUPlace())
ret_labels_int32 = fluid.LoDTensor()
ret_labels_int32.set_lod([lod])
ret_labels_int32.set(labels_int32.astype("int32"), fluid.CPUPlace())
ret_bbox_targets = fluid.LoDTensor()
ret_bbox_targets.set_lod([lod])
ret_bbox_targets.set(
bbox_targets.astype("float32"), fluid.CPUPlace())
ret_bbox_inside_weights = fluid.LoDTensor()
ret_bbox_inside_weights.set_lod([lod])
ret_bbox_inside_weights.set(
bbox_inside_weights.astype("float32"), fluid.CPUPlace())
ret_bbox_outside_weights = fluid.LoDTensor()
ret_bbox_outside_weights.set_lod([lod])
ret_bbox_outside_weights.set(
bbox_outside_weights.astype("float32"), fluid.CPUPlace())
return ret_rois, ret_labels_int32, ret_bbox_targets, ret_bbox_inside_weights, ret_bbox_outside_weights
rois = create_tmp_var(
fluid.default_main_program(),
name=None,
dtype='float32',
shape=[-1, 4], )
bbox_inside_weights = create_tmp_var(
fluid.default_main_program(),
name=None,
dtype='float32',
shape=[-1, 8 if self.is_cls_agnostic else self.class_nums * 4], )
bbox_outside_weights = create_tmp_var(
fluid.default_main_program(),
name=None,
dtype='float32',
shape=[-1, 8 if self.is_cls_agnostic else self.class_nums * 4], )
bbox_targets = create_tmp_var(
fluid.default_main_program(),
name=None,
dtype='float32',
shape=[-1, 8 if self.is_cls_agnostic else self.class_nums * 4], )
labels_int32 = create_tmp_var(
fluid.default_main_program(),
name=None,
dtype='int32',
shape=[-1, 1], )
outs = [
rois, labels_int32, bbox_targets, bbox_inside_weights,
bbox_outside_weights
]
fluid.layers.py_func(
func=generate_func,
x=[rpn_rois, gt_classes, is_crowd, gt_boxes, im_info],
out=outs)
return outs
@register
class RoIAlign(object):
__op__ = fluid.layers.roi_align
__append_doc__ = True
def __init__(self, resolution=7, spatial_scale=1. / 16, sampling_ratio=0):
super(RoIAlign, self).__init__()
if isinstance(resolution, Integral):
resolution = [resolution, resolution]
self.pooled_height = resolution[0]
self.pooled_width = resolution[1]
self.spatial_scale = spatial_scale
self.sampling_ratio = sampling_ratio
@register
class RoIPool(object):
__op__ = fluid.layers.roi_pool
__append_doc__ = True
def __init__(self, resolution=7, spatial_scale=1. / 16):
super(RoIPool, self).__init__()
if isinstance(resolution, Integral):
resolution = [resolution, resolution]
self.pooled_height = resolution[0]
self.pooled_width = resolution[1]
self.spatial_scale = spatial_scale
@register
class MultiBoxHead(object):
__op__ = fluid.layers.multi_box_head
__append_doc__ = True
def __init__(self,
min_ratio=20,
max_ratio=90,
base_size=300,
min_sizes=[60.0, 105.0, 150.0, 195.0, 240.0, 285.0],
max_sizes=[[], 150.0, 195.0, 240.0, 285.0, 300.0],
aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.],
[2., 3.]],
steps=None,
offset=0.5,
flip=True,
min_max_aspect_ratios_order=False,
kernel_size=1,
pad=0):
super(MultiBoxHead, self).__init__()
self.min_ratio = min_ratio
self.max_ratio = max_ratio
self.base_size = base_size
self.min_sizes = min_sizes
self.max_sizes = max_sizes
self.aspect_ratios = aspect_ratios
self.steps = steps
self.offset = offset
self.flip = flip
self.min_max_aspect_ratios_order = min_max_aspect_ratios_order
self.kernel_size = kernel_size
self.pad = pad
@register
@serializable
class SSDLiteMultiBoxHead(object):
def __init__(self,
min_ratio=20,
max_ratio=90,
base_size=300,
min_sizes=None,
max_sizes=None,
aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.],
[2., 3.]],
steps=None,
offset=0.5,
flip=True,
clip=False,
pad=0,
conv_decay=0.0):
super(SSDLiteMultiBoxHead, self).__init__()
self.min_ratio = min_ratio
self.max_ratio = max_ratio
self.base_size = base_size
self.min_sizes = min_sizes
self.max_sizes = max_sizes
self.aspect_ratios = aspect_ratios
self.steps = steps
self.offset = offset
self.flip = flip
self.pad = pad
self.clip = clip
self.conv_decay = conv_decay
def _separable_conv(self, input, num_filters, name):
dwconv_param_attr = ParamAttr(
name=name + 'dw_weights', regularizer=L2Decay(self.conv_decay))
num_filter1 = input.shape[1]
depthwise_conv = fluid.layers.conv2d(
input=input,
num_filters=num_filter1,
filter_size=3,
stride=1,
padding="SAME",
groups=int(num_filter1),
act=None,
use_cudnn=False,
param_attr=dwconv_param_attr,
bias_attr=False)
bn_name = name + '_bn'
bn_param_attr = ParamAttr(
name=bn_name + "_scale", regularizer=L2Decay(0.0))
bn_bias_attr = ParamAttr(
name=bn_name + "_offset", regularizer=L2Decay(0.0))
bn = fluid.layers.batch_norm(
input=depthwise_conv,
param_attr=bn_param_attr,
bias_attr=bn_bias_attr,
moving_mean_name=bn_name + '_mean',
moving_variance_name=bn_name + '_variance')
bn = fluid.layers.relu6(bn)
pwconv_param_attr = ParamAttr(
name=name + 'pw_weights', regularizer=L2Decay(self.conv_decay))
pointwise_conv = fluid.layers.conv2d(
input=bn,
num_filters=num_filters,
filter_size=1,
stride=1,
act=None,
use_cudnn=True,
param_attr=pwconv_param_attr,
bias_attr=False)
return pointwise_conv
def __call__(self, inputs, image, num_classes):
def _permute_and_reshape(input, last_dim):
trans = fluid.layers.transpose(input, perm=[0, 2, 3, 1])
compile_shape = [0, -1, last_dim]
return fluid.layers.reshape(trans, shape=compile_shape)
def _is_list_or_tuple_(data):
return (isinstance(data, list) or isinstance(data, tuple))
if self.min_sizes is None and self.max_sizes is None:
num_layer = len(inputs)
self.min_sizes = []
self.max_sizes = []
step = int(
math.floor(((self.max_ratio - self.min_ratio)) / (num_layer - 2
)))
for ratio in six.moves.range(self.min_ratio, self.max_ratio + 1,
step):
self.min_sizes.append(self.base_size * ratio / 100.)
self.max_sizes.append(self.base_size * (ratio + step) / 100.)
self.min_sizes = [self.base_size * .10] + self.min_sizes
self.max_sizes = [self.base_size * .20] + self.max_sizes
locs, confs = [], []
boxes, mvars = [], []
for i, input in enumerate(inputs):
min_size = self.min_sizes[i]
max_size = self.max_sizes[i]
if not _is_list_or_tuple_(min_size):
min_size = [min_size]
if not _is_list_or_tuple_(max_size):
max_size = [max_size]
step = [
self.steps[i] if self.steps else 0.0, self.steps[i]
if self.steps else 0.0
]
box, var = fluid.layers.prior_box(
input,
image,
min_sizes=min_size,
max_sizes=max_size,
steps=step,
aspect_ratios=self.aspect_ratios[i],
variance=[0.1, 0.1, 0.2, 0.2],
clip=self.clip,
flip=self.flip,
offset=0.5)
num_boxes = box.shape[2]
box = fluid.layers.reshape(box, shape=[-1, 4])
var = fluid.layers.reshape(var, shape=[-1, 4])
num_loc_output = num_boxes * 4
num_conf_output = num_boxes * num_classes
# get loc
mbox_loc = self._separable_conv(input, num_loc_output,
"loc_{}".format(i + 1))
loc = _permute_and_reshape(mbox_loc, 4)
# get conf
mbox_conf = self._separable_conv(input, num_conf_output,
"conf_{}".format(i + 1))
conf = _permute_and_reshape(mbox_conf, num_classes)
locs.append(loc)
confs.append(conf)
boxes.append(box)
mvars.append(var)
ssd_mbox_loc = fluid.layers.concat(locs, axis=1)
ssd_mbox_conf = fluid.layers.concat(confs, axis=1)
prior_boxes = fluid.layers.concat(boxes)
box_vars = fluid.layers.concat(mvars)
prior_boxes.stop_gradient = True
box_vars.stop_gradient = True
return ssd_mbox_loc, ssd_mbox_conf, prior_boxes, box_vars
@register
@serializable
class SSDOutputDecoder(object):
__op__ = fluid.layers.detection_output
__append_doc__ = True
def __init__(self,
nms_threshold=0.45,
nms_top_k=400,
keep_top_k=200,
score_threshold=0.01,
nms_eta=1.0,
background_label=0,
return_index=False):
super(SSDOutputDecoder, self).__init__()
self.nms_threshold = nms_threshold
self.background_label = background_label
self.nms_top_k = nms_top_k
self.keep_top_k = keep_top_k
self.score_threshold = score_threshold
self.nms_eta = nms_eta
self.return_index = return_index
@register
@serializable
class RetinaTargetAssign(object):
__op__ = fluid.layers.retinanet_target_assign
__append_doc__ = True
def __init__(self, positive_overlap=0.5, negative_overlap=0.4):
super(RetinaTargetAssign, self).__init__()
self.positive_overlap = positive_overlap
self.negative_overlap = negative_overlap
@register
@serializable
class RetinaOutputDecoder(object):
__op__ = fluid.layers.retinanet_detection_output
__append_doc__ = True
def __init__(self,
score_thresh=0.05,
nms_thresh=0.3,
pre_nms_top_n=1000,
detections_per_im=100,
nms_eta=1.0):
super(RetinaOutputDecoder, self).__init__()
self.score_threshold = score_thresh
self.nms_threshold = nms_thresh
self.nms_top_k = pre_nms_top_n
self.keep_top_k = detections_per_im
self.nms_eta = nms_eta
@register
@serializable
class MaskMatrixNMS(object):
"""
Matrix NMS for multi-class masks.
Args:
update_threshold (float): Updated threshold of categroy score in second time.
pre_nms_top_n (int): Number of total instance to be kept per image before NMS
post_nms_top_n (int): Number of total instance to be kept per image after NMS.
kernel (str): 'linear' or 'gaussian'.
sigma (float): std in gaussian method.
Input:
seg_preds (Variable): shape (n, h, w), segmentation feature maps
seg_masks (Variable): shape (n, h, w), segmentation feature maps
cate_labels (Variable): shape (n), mask labels in descending order
cate_scores (Variable): shape (n), mask scores in descending order
sum_masks (Variable): a float tensor of the sum of seg_masks
Returns:
Variable: cate_scores, tensors of shape (n)
"""
def __init__(self,
update_threshold=0.05,
pre_nms_top_n=500,
post_nms_top_n=100,
kernel='gaussian',
sigma=2.0):
super(MaskMatrixNMS, self).__init__()
self.update_threshold = update_threshold
self.pre_nms_top_n = pre_nms_top_n
self.post_nms_top_n = post_nms_top_n
self.kernel = kernel
self.sigma = sigma
def _sort_score(self, scores, top_num):
self.case_scores = scores
def fn_1():
return fluid.layers.topk(self.case_scores, top_num)
def fn_2():
return fluid.layers.argsort(self.case_scores, descending=True)
sort_inds = fluid.layers.case(
pred_fn_pairs=[(fluid.layers.shape(scores)[0] > top_num, fn_1)],
default=fn_2)
return sort_inds
def __call__(self,
seg_preds,
seg_masks,
cate_labels,
cate_scores,
sum_masks=None):
# sort and keep top nms_pre
sort_inds = self._sort_score(cate_scores, self.pre_nms_top_n)
seg_masks = fluid.layers.gather(seg_masks, index=sort_inds[1])
seg_preds = fluid.layers.gather(seg_preds, index=sort_inds[1])
sum_masks = fluid.layers.gather(sum_masks, index=sort_inds[1])
cate_scores = sort_inds[0]
cate_labels = fluid.layers.gather(cate_labels, index=sort_inds[1])
seg_masks = paddle.flatten(seg_masks, start_axis=1, stop_axis=-1)
# inter.
inter_matrix = paddle.mm(seg_masks,
fluid.layers.transpose(seg_masks, [1, 0]))
n_samples = fluid.layers.shape(cate_labels)
# union.
sum_masks_x = fluid.layers.reshape(
fluid.layers.expand(
sum_masks, expand_times=[n_samples]),
shape=[n_samples, n_samples])
# iou.
iou_matrix = paddle.divide(inter_matrix,
paddle.subtract(
paddle.add(sum_masks_x,
fluid.layers.transpose(
sum_masks_x, [1, 0])),
inter_matrix))
iou_matrix = paddle.triu(iou_matrix, diagonal=1)
# label_specific matrix.
cate_labels_x = fluid.layers.reshape(
fluid.layers.expand(
cate_labels, expand_times=[n_samples]),
shape=[n_samples, n_samples])
label_matrix = fluid.layers.cast(
paddle.equal(cate_labels_x,
fluid.layers.transpose(cate_labels_x, [1, 0])),
'float32')
label_matrix = paddle.triu(label_matrix, diagonal=1)
# IoU compensation
compensate_iou = paddle.max(paddle.multiply(iou_matrix, label_matrix),
axis=0)
compensate_iou = fluid.layers.reshape(
fluid.layers.expand(
compensate_iou, expand_times=[n_samples]),
shape=[n_samples, n_samples])
compensate_iou = fluid.layers.transpose(compensate_iou, [1, 0])
# IoU decay
decay_iou = paddle.multiply(iou_matrix, label_matrix)
# matrix nms
if self.kernel == 'gaussian':
decay_matrix = fluid.layers.exp(-1 * self.sigma * (decay_iou**2))
compensate_matrix = fluid.layers.exp(-1 * self.sigma *
(compensate_iou**2))
decay_coefficient = paddle.min(
paddle.divide(decay_matrix, compensate_matrix), axis=0)
elif self.kernel == 'linear':
decay_matrix = (1 - decay_iou) / (1 - compensate_iou)
decay_coefficient = paddle.min(decay_matrix, axis=0)
else:
raise NotImplementedError
# update the score.
cate_scores = paddle.multiply(cate_scores, decay_coefficient)
keep = fluid.layers.where(cate_scores >= self.update_threshold)
keep = fluid.layers.squeeze(keep, axes=[1])
# Prevent empty and increase fake data
keep = fluid.layers.concat([
keep, fluid.layers.cast(
fluid.layers.shape(cate_scores)[0] - 1, 'int64')
])
seg_preds = fluid.layers.gather(seg_preds, index=keep)
cate_scores = fluid.layers.gather(cate_scores, index=keep)
cate_labels = fluid.layers.gather(cate_labels, index=keep)
# sort and keep top_k
sort_inds = self._sort_score(cate_scores, self.post_nms_top_n)
seg_preds = fluid.layers.gather(seg_preds, index=sort_inds[1])
cate_scores = sort_inds[0]
cate_labels = fluid.layers.gather(cate_labels, index=sort_inds[1])
return seg_preds, cate_scores, cate_labels