forked from PulseFocusPlatform/PulseFocusPlatform
1711 lines
64 KiB
Python
1711 lines
64 KiB
Python
|
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
||
|
#
|
||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
# you may not use this file except in compliance with the License.
|
||
|
# You may obtain a copy of the License at
|
||
|
#
|
||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
#
|
||
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
# See the License for the specific language governing permissions and
|
||
|
# limitations under the License.
|
||
|
|
||
|
import numpy as np
|
||
|
from numbers import Integral
|
||
|
import math
|
||
|
import six
|
||
|
|
||
|
import paddle
|
||
|
from paddle import fluid
|
||
|
from paddle.fluid.layer_helper import LayerHelper
|
||
|
from paddle.fluid.initializer import NumpyArrayInitializer
|
||
|
from paddle.fluid.param_attr import ParamAttr
|
||
|
from paddle.fluid.regularizer import L2Decay
|
||
|
from ppdet.core.workspace import register, serializable
|
||
|
from ppdet.utils.bbox_utils import bbox_overlaps, box_to_delta
|
||
|
|
||
|
__all__ = [
|
||
|
'AnchorGenerator', 'AnchorGrid', 'DropBlock', 'RPNTargetAssign',
|
||
|
'GenerateProposals', 'MultiClassNMS', 'BBoxAssigner', 'MaskAssigner',
|
||
|
'RoIAlign', 'RoIPool', 'MultiBoxHead', 'SSDLiteMultiBoxHead',
|
||
|
'SSDOutputDecoder', 'RetinaTargetAssign', 'RetinaOutputDecoder', 'ConvNorm',
|
||
|
'DeformConvNorm', 'MultiClassSoftNMS', 'MatrixNMS', 'LibraBBoxAssigner',
|
||
|
'DeformConv'
|
||
|
]
|
||
|
|
||
|
|
||
|
def _conv_offset(input, filter_size, stride, padding, act=None, name=None):
|
||
|
out_channel = filter_size * filter_size * 3
|
||
|
out = fluid.layers.conv2d(
|
||
|
input,
|
||
|
num_filters=out_channel,
|
||
|
filter_size=filter_size,
|
||
|
stride=stride,
|
||
|
padding=padding,
|
||
|
param_attr=ParamAttr(
|
||
|
initializer=fluid.initializer.Constant(0), name=name + ".w_0"),
|
||
|
bias_attr=ParamAttr(
|
||
|
initializer=fluid.initializer.Constant(0),
|
||
|
learning_rate=2.,
|
||
|
regularizer=L2Decay(0.),
|
||
|
name=name + ".b_0"),
|
||
|
act=act,
|
||
|
name=name)
|
||
|
return out
|
||
|
|
||
|
|
||
|
def DeformConv(input,
|
||
|
num_filters,
|
||
|
filter_size,
|
||
|
stride=1,
|
||
|
groups=1,
|
||
|
dilation=1,
|
||
|
lr_scale=1,
|
||
|
initializer=None,
|
||
|
bias_attr=False,
|
||
|
name=None):
|
||
|
if bias_attr:
|
||
|
bias_para = ParamAttr(
|
||
|
name=name + "_bias",
|
||
|
initializer=fluid.initializer.Constant(0),
|
||
|
regularizer=L2Decay(0.),
|
||
|
learning_rate=lr_scale * 2)
|
||
|
else:
|
||
|
bias_para = False
|
||
|
offset_mask = _conv_offset(
|
||
|
input=input,
|
||
|
filter_size=filter_size,
|
||
|
stride=stride,
|
||
|
padding=(filter_size - 1) // 2,
|
||
|
act=None,
|
||
|
name=name + "_conv_offset")
|
||
|
offset_channel = filter_size**2 * 2
|
||
|
mask_channel = filter_size**2
|
||
|
offset, mask = fluid.layers.split(
|
||
|
input=offset_mask,
|
||
|
num_or_sections=[offset_channel, mask_channel],
|
||
|
dim=1)
|
||
|
mask = fluid.layers.sigmoid(mask)
|
||
|
conv = fluid.layers.deformable_conv(
|
||
|
input=input,
|
||
|
offset=offset,
|
||
|
mask=mask,
|
||
|
num_filters=num_filters,
|
||
|
filter_size=filter_size,
|
||
|
stride=stride,
|
||
|
padding=(filter_size - 1) // 2 * dilation,
|
||
|
dilation=dilation,
|
||
|
groups=groups,
|
||
|
deformable_groups=1,
|
||
|
im2col_step=1,
|
||
|
param_attr=ParamAttr(
|
||
|
name=name + "_weights",
|
||
|
initializer=initializer,
|
||
|
learning_rate=lr_scale),
|
||
|
bias_attr=bias_para,
|
||
|
name=name + ".conv2d.output.1")
|
||
|
|
||
|
return conv
|
||
|
|
||
|
|
||
|
def DeformConvNorm(input,
|
||
|
num_filters,
|
||
|
filter_size,
|
||
|
stride=1,
|
||
|
groups=1,
|
||
|
norm_decay=0.,
|
||
|
norm_type='affine_channel',
|
||
|
norm_groups=32,
|
||
|
dilation=1,
|
||
|
lr_scale=1,
|
||
|
freeze_norm=False,
|
||
|
act=None,
|
||
|
norm_name=None,
|
||
|
initializer=None,
|
||
|
bias_attr=False,
|
||
|
name=None):
|
||
|
assert norm_type in ['bn', 'sync_bn', 'affine_channel', 'gn']
|
||
|
conv = DeformConv(input, num_filters, filter_size, stride, groups, dilation,
|
||
|
lr_scale, initializer, bias_attr, name)
|
||
|
|
||
|
norm_lr = 0. if freeze_norm else 1.
|
||
|
pattr = ParamAttr(
|
||
|
name=norm_name + '_scale',
|
||
|
learning_rate=norm_lr * lr_scale,
|
||
|
regularizer=L2Decay(norm_decay))
|
||
|
battr = ParamAttr(
|
||
|
name=norm_name + '_offset',
|
||
|
learning_rate=norm_lr * lr_scale,
|
||
|
regularizer=L2Decay(norm_decay))
|
||
|
|
||
|
if norm_type in ['bn', 'sync_bn']:
|
||
|
global_stats = True if freeze_norm else False
|
||
|
out = fluid.layers.batch_norm(
|
||
|
input=conv,
|
||
|
act=act,
|
||
|
name=norm_name + '.output.1',
|
||
|
param_attr=pattr,
|
||
|
bias_attr=battr,
|
||
|
moving_mean_name=norm_name + '_mean',
|
||
|
moving_variance_name=norm_name + '_variance',
|
||
|
use_global_stats=global_stats)
|
||
|
scale = fluid.framework._get_var(pattr.name)
|
||
|
bias = fluid.framework._get_var(battr.name)
|
||
|
elif norm_type == 'gn':
|
||
|
out = fluid.layers.group_norm(
|
||
|
input=conv,
|
||
|
act=act,
|
||
|
name=norm_name + '.output.1',
|
||
|
groups=norm_groups,
|
||
|
param_attr=pattr,
|
||
|
bias_attr=battr)
|
||
|
scale = fluid.framework._get_var(pattr.name)
|
||
|
bias = fluid.framework._get_var(battr.name)
|
||
|
elif norm_type == 'affine_channel':
|
||
|
scale = fluid.layers.create_parameter(
|
||
|
shape=[conv.shape[1]],
|
||
|
dtype=conv.dtype,
|
||
|
attr=pattr,
|
||
|
default_initializer=fluid.initializer.Constant(1.))
|
||
|
bias = fluid.layers.create_parameter(
|
||
|
shape=[conv.shape[1]],
|
||
|
dtype=conv.dtype,
|
||
|
attr=battr,
|
||
|
default_initializer=fluid.initializer.Constant(0.))
|
||
|
out = fluid.layers.affine_channel(
|
||
|
x=conv, scale=scale, bias=bias, act=act)
|
||
|
|
||
|
if freeze_norm:
|
||
|
scale.stop_gradient = True
|
||
|
bias.stop_gradient = True
|
||
|
return out
|
||
|
|
||
|
|
||
|
def ConvNorm(input,
|
||
|
num_filters,
|
||
|
filter_size,
|
||
|
stride=1,
|
||
|
groups=1,
|
||
|
norm_decay=0.,
|
||
|
norm_type='affine_channel',
|
||
|
norm_groups=32,
|
||
|
dilation=1,
|
||
|
lr_scale=1,
|
||
|
freeze_norm=False,
|
||
|
act=None,
|
||
|
norm_name=None,
|
||
|
initializer=None,
|
||
|
bias_attr=False,
|
||
|
name=None):
|
||
|
fan = num_filters
|
||
|
if bias_attr:
|
||
|
bias_para = ParamAttr(
|
||
|
name=name + "_bias",
|
||
|
initializer=fluid.initializer.Constant(value=0),
|
||
|
learning_rate=lr_scale * 2)
|
||
|
else:
|
||
|
bias_para = False
|
||
|
conv = fluid.layers.conv2d(
|
||
|
input=input,
|
||
|
num_filters=num_filters,
|
||
|
filter_size=filter_size,
|
||
|
stride=stride,
|
||
|
padding=((filter_size - 1) // 2) * dilation,
|
||
|
dilation=dilation,
|
||
|
groups=groups,
|
||
|
act=None,
|
||
|
param_attr=ParamAttr(
|
||
|
name=name + "_weights",
|
||
|
initializer=initializer,
|
||
|
learning_rate=lr_scale),
|
||
|
bias_attr=bias_para,
|
||
|
name=name + '.conv2d.output.1')
|
||
|
|
||
|
norm_lr = 0. if freeze_norm else 1.
|
||
|
pattr = ParamAttr(
|
||
|
name=norm_name + '_scale',
|
||
|
learning_rate=norm_lr * lr_scale,
|
||
|
regularizer=L2Decay(norm_decay))
|
||
|
battr = ParamAttr(
|
||
|
name=norm_name + '_offset',
|
||
|
learning_rate=norm_lr * lr_scale,
|
||
|
regularizer=L2Decay(norm_decay))
|
||
|
|
||
|
if norm_type in ['bn', 'sync_bn']:
|
||
|
global_stats = True if freeze_norm else False
|
||
|
out = fluid.layers.batch_norm(
|
||
|
input=conv,
|
||
|
act=act,
|
||
|
name=norm_name + '.output.1',
|
||
|
param_attr=pattr,
|
||
|
bias_attr=battr,
|
||
|
moving_mean_name=norm_name + '_mean',
|
||
|
moving_variance_name=norm_name + '_variance',
|
||
|
use_global_stats=global_stats)
|
||
|
scale = fluid.framework._get_var(pattr.name)
|
||
|
bias = fluid.framework._get_var(battr.name)
|
||
|
elif norm_type == 'gn':
|
||
|
out = fluid.layers.group_norm(
|
||
|
input=conv,
|
||
|
act=act,
|
||
|
name=norm_name + '.output.1',
|
||
|
groups=norm_groups,
|
||
|
param_attr=pattr,
|
||
|
bias_attr=battr)
|
||
|
scale = fluid.framework._get_var(pattr.name)
|
||
|
bias = fluid.framework._get_var(battr.name)
|
||
|
elif norm_type == 'affine_channel':
|
||
|
scale = fluid.layers.create_parameter(
|
||
|
shape=[conv.shape[1]],
|
||
|
dtype=conv.dtype,
|
||
|
attr=pattr,
|
||
|
default_initializer=fluid.initializer.Constant(1.))
|
||
|
bias = fluid.layers.create_parameter(
|
||
|
shape=[conv.shape[1]],
|
||
|
dtype=conv.dtype,
|
||
|
attr=battr,
|
||
|
default_initializer=fluid.initializer.Constant(0.))
|
||
|
out = fluid.layers.affine_channel(
|
||
|
x=conv, scale=scale, bias=bias, act=act)
|
||
|
if freeze_norm:
|
||
|
scale.stop_gradient = True
|
||
|
bias.stop_gradient = True
|
||
|
return out
|
||
|
|
||
|
|
||
|
def DropBlock(input, block_size, keep_prob, is_test):
|
||
|
if is_test:
|
||
|
return input
|
||
|
|
||
|
def CalculateGamma(input, block_size, keep_prob):
|
||
|
input_shape = fluid.layers.shape(input)
|
||
|
feat_shape_tmp = fluid.layers.slice(input_shape, [0], [3], [4])
|
||
|
feat_shape_tmp = fluid.layers.cast(feat_shape_tmp, dtype="float32")
|
||
|
feat_shape_t = fluid.layers.reshape(feat_shape_tmp, [1, 1, 1, 1])
|
||
|
feat_area = fluid.layers.pow(feat_shape_t, factor=2)
|
||
|
|
||
|
block_shape_t = fluid.layers.fill_constant(
|
||
|
shape=[1, 1, 1, 1], value=block_size, dtype='float32')
|
||
|
block_area = fluid.layers.pow(block_shape_t, factor=2)
|
||
|
|
||
|
useful_shape_t = feat_shape_t - block_shape_t + 1
|
||
|
useful_area = fluid.layers.pow(useful_shape_t, factor=2)
|
||
|
|
||
|
upper_t = feat_area * (1 - keep_prob)
|
||
|
bottom_t = block_area * useful_area
|
||
|
output = upper_t / bottom_t
|
||
|
return output
|
||
|
|
||
|
gamma = CalculateGamma(input, block_size=block_size, keep_prob=keep_prob)
|
||
|
input_shape = fluid.layers.shape(input)
|
||
|
p = fluid.layers.expand_as(gamma, input)
|
||
|
|
||
|
input_shape_tmp = fluid.layers.cast(input_shape, dtype="int64")
|
||
|
random_matrix = fluid.layers.uniform_random(
|
||
|
input_shape_tmp, dtype='float32', min=0.0, max=1.0)
|
||
|
one_zero_m = fluid.layers.less_than(random_matrix, p)
|
||
|
one_zero_m.stop_gradient = True
|
||
|
one_zero_m = fluid.layers.cast(one_zero_m, dtype="float32")
|
||
|
|
||
|
mask_flag = fluid.layers.pool2d(
|
||
|
one_zero_m,
|
||
|
pool_size=block_size,
|
||
|
pool_type='max',
|
||
|
pool_stride=1,
|
||
|
pool_padding=block_size // 2)
|
||
|
mask = 1.0 - mask_flag
|
||
|
|
||
|
elem_numel = fluid.layers.reduce_prod(input_shape)
|
||
|
elem_numel_m = fluid.layers.cast(elem_numel, dtype="float32")
|
||
|
elem_numel_m.stop_gradient = True
|
||
|
|
||
|
elem_sum = fluid.layers.reduce_sum(mask)
|
||
|
elem_sum_m = fluid.layers.cast(elem_sum, dtype="float32")
|
||
|
elem_sum_m.stop_gradient = True
|
||
|
|
||
|
output = fluid.layers.elementwise_mul(input,
|
||
|
mask) * elem_numel_m / elem_sum_m
|
||
|
return output
|
||
|
|
||
|
|
||
|
@register
|
||
|
@serializable
|
||
|
class AnchorGenerator(object):
|
||
|
__op__ = fluid.layers.anchor_generator
|
||
|
__append_doc__ = True
|
||
|
|
||
|
def __init__(self,
|
||
|
stride=[16.0, 16.0],
|
||
|
anchor_sizes=[32, 64, 128, 256, 512],
|
||
|
aspect_ratios=[0.5, 1., 2.],
|
||
|
variance=[1., 1., 1., 1.]):
|
||
|
super(AnchorGenerator, self).__init__()
|
||
|
self.anchor_sizes = anchor_sizes
|
||
|
self.aspect_ratios = aspect_ratios
|
||
|
self.variance = variance
|
||
|
self.stride = stride
|
||
|
|
||
|
|
||
|
@register
|
||
|
@serializable
|
||
|
class AnchorGrid(object):
|
||
|
"""Generate anchor grid
|
||
|
Args:
|
||
|
image_size (int or list): input image size, may be a single integer or
|
||
|
list of [h, w]. Default: 512
|
||
|
min_level (int): min level of the feature pyramid. Default: 3
|
||
|
max_level (int): max level of the feature pyramid. Default: 7
|
||
|
anchor_base_scale: base anchor scale. Default: 4
|
||
|
num_scales: number of anchor scales. Default: 3
|
||
|
aspect_ratios: aspect ratios. default: [[1, 1], [1.4, 0.7], [0.7, 1.4]]
|
||
|
"""
|
||
|
|
||
|
def __init__(self,
|
||
|
image_size=512,
|
||
|
min_level=3,
|
||
|
max_level=7,
|
||
|
anchor_base_scale=4,
|
||
|
num_scales=3,
|
||
|
aspect_ratios=[[1, 1], [1.4, 0.7], [0.7, 1.4]]):
|
||
|
super(AnchorGrid, self).__init__()
|
||
|
if isinstance(image_size, Integral):
|
||
|
self.image_size = [image_size, image_size]
|
||
|
else:
|
||
|
self.image_size = image_size
|
||
|
for dim in self.image_size:
|
||
|
assert dim % 2 ** max_level == 0, \
|
||
|
"image size should be multiple of the max level stride"
|
||
|
self.min_level = min_level
|
||
|
self.max_level = max_level
|
||
|
self.anchor_base_scale = anchor_base_scale
|
||
|
self.num_scales = num_scales
|
||
|
self.aspect_ratios = aspect_ratios
|
||
|
|
||
|
@property
|
||
|
def base_cell(self):
|
||
|
if not hasattr(self, '_base_cell'):
|
||
|
self._base_cell = self.make_cell()
|
||
|
return self._base_cell
|
||
|
|
||
|
def make_cell(self):
|
||
|
scales = [2**(i / self.num_scales) for i in range(self.num_scales)]
|
||
|
scales = np.array(scales)
|
||
|
ratios = np.array(self.aspect_ratios)
|
||
|
ws = np.outer(scales, ratios[:, 0]).reshape(-1, 1)
|
||
|
hs = np.outer(scales, ratios[:, 1]).reshape(-1, 1)
|
||
|
anchors = np.hstack((-0.5 * ws, -0.5 * hs, 0.5 * ws, 0.5 * hs))
|
||
|
return anchors
|
||
|
|
||
|
def make_grid(self, stride):
|
||
|
cell = self.base_cell * stride * self.anchor_base_scale
|
||
|
x_steps = np.arange(stride // 2, self.image_size[1], stride)
|
||
|
y_steps = np.arange(stride // 2, self.image_size[0], stride)
|
||
|
offset_x, offset_y = np.meshgrid(x_steps, y_steps)
|
||
|
offset_x = offset_x.flatten()
|
||
|
offset_y = offset_y.flatten()
|
||
|
offsets = np.stack((offset_x, offset_y, offset_x, offset_y), axis=-1)
|
||
|
offsets = offsets[:, np.newaxis, :]
|
||
|
return (cell + offsets).reshape(-1, 4)
|
||
|
|
||
|
def generate(self):
|
||
|
return [
|
||
|
self.make_grid(2**l)
|
||
|
for l in range(self.min_level, self.max_level + 1)
|
||
|
]
|
||
|
|
||
|
def __call__(self):
|
||
|
if not hasattr(self, '_anchor_vars'):
|
||
|
anchor_vars = []
|
||
|
helper = LayerHelper('anchor_grid')
|
||
|
for idx, l in enumerate(range(self.min_level, self.max_level + 1)):
|
||
|
stride = 2**l
|
||
|
anchors = self.make_grid(stride)
|
||
|
var = helper.create_parameter(
|
||
|
attr=ParamAttr(name='anchors_{}'.format(idx)),
|
||
|
shape=anchors.shape,
|
||
|
dtype='float32',
|
||
|
stop_gradient=True,
|
||
|
default_initializer=NumpyArrayInitializer(anchors))
|
||
|
anchor_vars.append(var)
|
||
|
var.persistable = True
|
||
|
self._anchor_vars = anchor_vars
|
||
|
|
||
|
return self._anchor_vars
|
||
|
|
||
|
|
||
|
@register
|
||
|
@serializable
|
||
|
class RPNTargetAssign(object):
|
||
|
__op__ = fluid.layers.rpn_target_assign
|
||
|
__append_doc__ = True
|
||
|
|
||
|
def __init__(self,
|
||
|
rpn_batch_size_per_im=256,
|
||
|
rpn_straddle_thresh=0.,
|
||
|
rpn_fg_fraction=0.5,
|
||
|
rpn_positive_overlap=0.7,
|
||
|
rpn_negative_overlap=0.3,
|
||
|
use_random=True):
|
||
|
super(RPNTargetAssign, self).__init__()
|
||
|
self.rpn_batch_size_per_im = rpn_batch_size_per_im
|
||
|
self.rpn_straddle_thresh = rpn_straddle_thresh
|
||
|
self.rpn_fg_fraction = rpn_fg_fraction
|
||
|
self.rpn_positive_overlap = rpn_positive_overlap
|
||
|
self.rpn_negative_overlap = rpn_negative_overlap
|
||
|
self.use_random = use_random
|
||
|
|
||
|
|
||
|
@register
|
||
|
@serializable
|
||
|
class GenerateProposals(object):
|
||
|
__op__ = fluid.layers.generate_proposals
|
||
|
__append_doc__ = True
|
||
|
|
||
|
def __init__(self,
|
||
|
pre_nms_top_n=6000,
|
||
|
post_nms_top_n=1000,
|
||
|
nms_thresh=.5,
|
||
|
min_size=.1,
|
||
|
eta=1.):
|
||
|
super(GenerateProposals, self).__init__()
|
||
|
self.pre_nms_top_n = pre_nms_top_n
|
||
|
self.post_nms_top_n = post_nms_top_n
|
||
|
self.nms_thresh = nms_thresh
|
||
|
self.min_size = min_size
|
||
|
self.eta = eta
|
||
|
|
||
|
|
||
|
@register
|
||
|
class MaskAssigner(object):
|
||
|
__op__ = fluid.layers.generate_mask_labels
|
||
|
__append_doc__ = True
|
||
|
__shared__ = ['num_classes']
|
||
|
|
||
|
def __init__(self, num_classes=81, resolution=14):
|
||
|
super(MaskAssigner, self).__init__()
|
||
|
self.num_classes = num_classes
|
||
|
self.resolution = resolution
|
||
|
|
||
|
|
||
|
@register
|
||
|
@serializable
|
||
|
class MultiClassNMS(object):
|
||
|
__op__ = fluid.layers.multiclass_nms
|
||
|
__append_doc__ = True
|
||
|
|
||
|
def __init__(self,
|
||
|
score_threshold=.05,
|
||
|
nms_top_k=-1,
|
||
|
keep_top_k=100,
|
||
|
nms_threshold=.5,
|
||
|
normalized=False,
|
||
|
nms_eta=1.0,
|
||
|
background_label=0):
|
||
|
super(MultiClassNMS, self).__init__()
|
||
|
self.score_threshold = score_threshold
|
||
|
self.nms_top_k = nms_top_k
|
||
|
self.keep_top_k = keep_top_k
|
||
|
self.nms_threshold = nms_threshold
|
||
|
self.normalized = normalized
|
||
|
self.nms_eta = nms_eta
|
||
|
self.background_label = background_label
|
||
|
|
||
|
|
||
|
@register
|
||
|
@serializable
|
||
|
class MatrixNMS(object):
|
||
|
__op__ = 'paddle.fluid.layers.matrix_nms'
|
||
|
__append_doc__ = True
|
||
|
|
||
|
def __init__(self,
|
||
|
score_threshold=.05,
|
||
|
post_threshold=.05,
|
||
|
nms_top_k=-1,
|
||
|
keep_top_k=100,
|
||
|
use_gaussian=False,
|
||
|
gaussian_sigma=2.,
|
||
|
normalized=False,
|
||
|
background_label=0):
|
||
|
super(MatrixNMS, self).__init__()
|
||
|
self.score_threshold = score_threshold
|
||
|
self.post_threshold = post_threshold
|
||
|
self.nms_top_k = nms_top_k
|
||
|
self.keep_top_k = keep_top_k
|
||
|
self.normalized = normalized
|
||
|
self.use_gaussian = use_gaussian
|
||
|
self.gaussian_sigma = gaussian_sigma
|
||
|
self.background_label = background_label
|
||
|
|
||
|
|
||
|
@register
|
||
|
@serializable
|
||
|
class MultiClassSoftNMS(object):
|
||
|
def __init__(
|
||
|
self,
|
||
|
score_threshold=0.01,
|
||
|
keep_top_k=300,
|
||
|
softnms_sigma=0.5,
|
||
|
normalized=False,
|
||
|
background_label=0, ):
|
||
|
super(MultiClassSoftNMS, self).__init__()
|
||
|
self.score_threshold = score_threshold
|
||
|
self.keep_top_k = keep_top_k
|
||
|
self.softnms_sigma = softnms_sigma
|
||
|
self.normalized = normalized
|
||
|
self.background_label = background_label
|
||
|
|
||
|
def __call__(self, bboxes, scores):
|
||
|
def create_tmp_var(program, name, dtype, shape, lod_level):
|
||
|
return program.current_block().create_var(
|
||
|
name=name, dtype=dtype, shape=shape, lod_level=lod_level)
|
||
|
|
||
|
def _soft_nms_for_cls(dets, sigma, thres):
|
||
|
"""soft_nms_for_cls"""
|
||
|
dets_final = []
|
||
|
while len(dets) > 0:
|
||
|
maxpos = np.argmax(dets[:, 0])
|
||
|
dets_final.append(dets[maxpos].copy())
|
||
|
ts, tx1, ty1, tx2, ty2 = dets[maxpos]
|
||
|
scores = dets[:, 0]
|
||
|
# force remove bbox at maxpos
|
||
|
scores[maxpos] = -1
|
||
|
x1 = dets[:, 1]
|
||
|
y1 = dets[:, 2]
|
||
|
x2 = dets[:, 3]
|
||
|
y2 = dets[:, 4]
|
||
|
eta = 0 if self.normalized else 1
|
||
|
areas = (x2 - x1 + eta) * (y2 - y1 + eta)
|
||
|
xx1 = np.maximum(tx1, x1)
|
||
|
yy1 = np.maximum(ty1, y1)
|
||
|
xx2 = np.minimum(tx2, x2)
|
||
|
yy2 = np.minimum(ty2, y2)
|
||
|
w = np.maximum(0.0, xx2 - xx1 + eta)
|
||
|
h = np.maximum(0.0, yy2 - yy1 + eta)
|
||
|
inter = w * h
|
||
|
ovr = inter / (areas + areas[maxpos] - inter)
|
||
|
weight = np.exp(-(ovr * ovr) / sigma)
|
||
|
scores = scores * weight
|
||
|
idx_keep = np.where(scores >= thres)
|
||
|
dets[:, 0] = scores
|
||
|
dets = dets[idx_keep]
|
||
|
dets_final = np.array(dets_final).reshape(-1, 5)
|
||
|
return dets_final
|
||
|
|
||
|
def _soft_nms(bboxes, scores):
|
||
|
class_nums = scores.shape[-1]
|
||
|
|
||
|
softnms_thres = self.score_threshold
|
||
|
softnms_sigma = self.softnms_sigma
|
||
|
keep_top_k = self.keep_top_k
|
||
|
|
||
|
cls_boxes = [[] for _ in range(class_nums)]
|
||
|
cls_ids = [[] for _ in range(class_nums)]
|
||
|
|
||
|
start_idx = 1 if self.background_label == 0 else 0
|
||
|
for j in range(start_idx, class_nums):
|
||
|
inds = np.where(scores[:, j] >= softnms_thres)[0]
|
||
|
scores_j = scores[inds, j]
|
||
|
rois_j = bboxes[inds, j, :] if len(
|
||
|
bboxes.shape) > 2 else bboxes[inds, :]
|
||
|
dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype(
|
||
|
np.float32, copy=False)
|
||
|
cls_rank = np.argsort(-dets_j[:, 0])
|
||
|
dets_j = dets_j[cls_rank]
|
||
|
|
||
|
cls_boxes[j] = _soft_nms_for_cls(
|
||
|
dets_j, sigma=softnms_sigma, thres=softnms_thres)
|
||
|
cls_ids[j] = np.array([j] * cls_boxes[j].shape[0]).reshape(-1,
|
||
|
1)
|
||
|
|
||
|
cls_boxes = np.vstack(cls_boxes[start_idx:])
|
||
|
cls_ids = np.vstack(cls_ids[start_idx:])
|
||
|
pred_result = np.hstack([cls_ids, cls_boxes])
|
||
|
|
||
|
# Limit to max_per_image detections **over all classes**
|
||
|
image_scores = cls_boxes[:, 0]
|
||
|
if len(image_scores) > keep_top_k:
|
||
|
image_thresh = np.sort(image_scores)[-keep_top_k]
|
||
|
keep = np.where(cls_boxes[:, 0] >= image_thresh)[0]
|
||
|
pred_result = pred_result[keep, :]
|
||
|
|
||
|
return pred_result
|
||
|
|
||
|
def _batch_softnms(bboxes, scores):
|
||
|
batch_offsets = bboxes.lod()
|
||
|
bboxes = np.array(bboxes)
|
||
|
scores = np.array(scores)
|
||
|
out_offsets = [0]
|
||
|
pred_res = []
|
||
|
if len(batch_offsets) > 0:
|
||
|
batch_offset = batch_offsets[0]
|
||
|
for i in range(len(batch_offset) - 1):
|
||
|
s, e = batch_offset[i], batch_offset[i + 1]
|
||
|
pred = _soft_nms(bboxes[s:e], scores[s:e])
|
||
|
out_offsets.append(pred.shape[0] + out_offsets[-1])
|
||
|
pred_res.append(pred)
|
||
|
else:
|
||
|
assert len(bboxes.shape) == 3
|
||
|
assert len(scores.shape) == 3
|
||
|
for i in range(bboxes.shape[0]):
|
||
|
pred = _soft_nms(bboxes[i], scores[i])
|
||
|
out_offsets.append(pred.shape[0] + out_offsets[-1])
|
||
|
pred_res.append(pred)
|
||
|
|
||
|
res = fluid.LoDTensor()
|
||
|
res.set_lod([out_offsets])
|
||
|
if len(pred_res) == 0:
|
||
|
pred_res = np.array([[1]], dtype=np.float32)
|
||
|
res.set(np.vstack(pred_res).astype(np.float32), fluid.CPUPlace())
|
||
|
return res
|
||
|
|
||
|
pred_result = create_tmp_var(
|
||
|
fluid.default_main_program(),
|
||
|
name='softnms_pred_result',
|
||
|
dtype='float32',
|
||
|
shape=[-1, 6],
|
||
|
lod_level=1)
|
||
|
fluid.layers.py_func(
|
||
|
func=_batch_softnms, x=[bboxes, scores], out=pred_result)
|
||
|
return pred_result
|
||
|
|
||
|
|
||
|
@register
|
||
|
@serializable
|
||
|
class MultiClassDiouNMS(object):
|
||
|
def __init__(
|
||
|
self,
|
||
|
score_threshold=0.05,
|
||
|
keep_top_k=100,
|
||
|
nms_threshold=0.5,
|
||
|
normalized=False,
|
||
|
background_label=0, ):
|
||
|
super(MultiClassDiouNMS, self).__init__()
|
||
|
self.score_threshold = score_threshold
|
||
|
self.nms_threshold = nms_threshold
|
||
|
self.keep_top_k = keep_top_k
|
||
|
self.normalized = normalized
|
||
|
self.background_label = background_label
|
||
|
|
||
|
def __call__(self, bboxes, scores):
|
||
|
def create_tmp_var(program, name, dtype, shape, lod_level):
|
||
|
return program.current_block().create_var(
|
||
|
name=name, dtype=dtype, shape=shape, lod_level=lod_level)
|
||
|
|
||
|
def _calc_diou_term(dets1, dets2):
|
||
|
eps = 1.e-10
|
||
|
eta = 0 if self.normalized else 1
|
||
|
|
||
|
x1, y1, x2, y2 = dets1[0], dets1[1], dets1[2], dets1[3]
|
||
|
x1g, y1g, x2g, y2g = dets2[0], dets2[1], dets2[2], dets2[3]
|
||
|
|
||
|
cx = (x1 + x2) / 2
|
||
|
cy = (y1 + y2) / 2
|
||
|
w = x2 - x1 + eta
|
||
|
h = y2 - y1 + eta
|
||
|
|
||
|
cxg = (x1g + x2g) / 2
|
||
|
cyg = (y1g + y2g) / 2
|
||
|
wg = x2g - x1g + eta
|
||
|
hg = y2g - y1g + eta
|
||
|
|
||
|
x2 = np.maximum(x1, x2)
|
||
|
y2 = np.maximum(y1, y2)
|
||
|
|
||
|
# A or B
|
||
|
xc1 = np.minimum(x1, x1g)
|
||
|
yc1 = np.minimum(y1, y1g)
|
||
|
xc2 = np.maximum(x2, x2g)
|
||
|
yc2 = np.maximum(y2, y2g)
|
||
|
|
||
|
# DIOU term
|
||
|
dist_intersection = (cx - cxg)**2 + (cy - cyg)**2
|
||
|
dist_union = (xc2 - xc1)**2 + (yc2 - yc1)**2
|
||
|
diou_term = (dist_intersection + eps) / (dist_union + eps)
|
||
|
return diou_term
|
||
|
|
||
|
def _diou_nms_for_cls(dets, thres):
|
||
|
"""_diou_nms_for_cls"""
|
||
|
scores = dets[:, 0]
|
||
|
x1 = dets[:, 1]
|
||
|
y1 = dets[:, 2]
|
||
|
x2 = dets[:, 3]
|
||
|
y2 = dets[:, 4]
|
||
|
eta = 0 if self.normalized else 1
|
||
|
areas = (x2 - x1 + eta) * (y2 - y1 + eta)
|
||
|
dt_num = dets.shape[0]
|
||
|
order = np.array(range(dt_num))
|
||
|
|
||
|
keep = []
|
||
|
while order.size > 0:
|
||
|
i = order[0]
|
||
|
keep.append(i)
|
||
|
xx1 = np.maximum(x1[i], x1[order[1:]])
|
||
|
yy1 = np.maximum(y1[i], y1[order[1:]])
|
||
|
xx2 = np.minimum(x2[i], x2[order[1:]])
|
||
|
yy2 = np.minimum(y2[i], y2[order[1:]])
|
||
|
|
||
|
w = np.maximum(0.0, xx2 - xx1 + eta)
|
||
|
h = np.maximum(0.0, yy2 - yy1 + eta)
|
||
|
inter = w * h
|
||
|
ovr = inter / (areas[i] + areas[order[1:]] - inter)
|
||
|
|
||
|
diou_term = _calc_diou_term([x1[i], y1[i], x2[i], y2[i]], [
|
||
|
x1[order[1:]], y1[order[1:]], x2[order[1:]], y2[order[1:]]
|
||
|
])
|
||
|
|
||
|
inds = np.where(ovr - diou_term <= thres)[0]
|
||
|
|
||
|
order = order[inds + 1]
|
||
|
|
||
|
dets_final = dets[keep]
|
||
|
return dets_final
|
||
|
|
||
|
def _diou_nms(bboxes, scores):
|
||
|
bboxes = np.array(bboxes)
|
||
|
scores = np.array(scores)
|
||
|
class_nums = scores.shape[-1]
|
||
|
|
||
|
score_threshold = self.score_threshold
|
||
|
nms_threshold = self.nms_threshold
|
||
|
keep_top_k = self.keep_top_k
|
||
|
|
||
|
cls_boxes = [[] for _ in range(class_nums)]
|
||
|
cls_ids = [[] for _ in range(class_nums)]
|
||
|
|
||
|
start_idx = 1 if self.background_label == 0 else 0
|
||
|
for j in range(start_idx, class_nums):
|
||
|
inds = np.where(scores[:, j] >= score_threshold)[0]
|
||
|
scores_j = scores[inds, j]
|
||
|
rois_j = bboxes[inds, j, :]
|
||
|
dets_j = np.hstack((scores_j[:, np.newaxis], rois_j)).astype(
|
||
|
np.float32, copy=False)
|
||
|
cls_rank = np.argsort(-dets_j[:, 0])
|
||
|
dets_j = dets_j[cls_rank]
|
||
|
|
||
|
cls_boxes[j] = _diou_nms_for_cls(dets_j, thres=nms_threshold)
|
||
|
cls_ids[j] = np.array([j] * cls_boxes[j].shape[0]).reshape(-1,
|
||
|
1)
|
||
|
|
||
|
cls_boxes = np.vstack(cls_boxes[start_idx:])
|
||
|
cls_ids = np.vstack(cls_ids[start_idx:])
|
||
|
pred_result = np.hstack([cls_ids, cls_boxes]).astype(np.float32)
|
||
|
|
||
|
# Limit to max_per_image detections **over all classes**
|
||
|
image_scores = cls_boxes[:, 0]
|
||
|
if len(image_scores) > keep_top_k:
|
||
|
image_thresh = np.sort(image_scores)[-keep_top_k]
|
||
|
keep = np.where(cls_boxes[:, 0] >= image_thresh)[0]
|
||
|
pred_result = pred_result[keep, :]
|
||
|
|
||
|
res = fluid.LoDTensor()
|
||
|
res.set_lod([[0, pred_result.shape[0]]])
|
||
|
if pred_result.shape[0] == 0:
|
||
|
pred_result = np.array([[1]], dtype=np.float32)
|
||
|
res.set(pred_result, fluid.CPUPlace())
|
||
|
|
||
|
return res
|
||
|
|
||
|
pred_result = create_tmp_var(
|
||
|
fluid.default_main_program(),
|
||
|
name='diou_nms_pred_result',
|
||
|
dtype='float32',
|
||
|
shape=[-1, 6],
|
||
|
lod_level=0)
|
||
|
fluid.layers.py_func(
|
||
|
func=_diou_nms, x=[bboxes, scores], out=pred_result)
|
||
|
return pred_result
|
||
|
|
||
|
|
||
|
@register
|
||
|
class BBoxAssigner(object):
|
||
|
__op__ = fluid.layers.generate_proposal_labels
|
||
|
__append_doc__ = True
|
||
|
__shared__ = ['num_classes']
|
||
|
|
||
|
def __init__(self,
|
||
|
batch_size_per_im=512,
|
||
|
fg_fraction=.25,
|
||
|
fg_thresh=.5,
|
||
|
bg_thresh_hi=.5,
|
||
|
bg_thresh_lo=0.,
|
||
|
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
|
||
|
num_classes=81,
|
||
|
shuffle_before_sample=True):
|
||
|
super(BBoxAssigner, self).__init__()
|
||
|
self.batch_size_per_im = batch_size_per_im
|
||
|
self.fg_fraction = fg_fraction
|
||
|
self.fg_thresh = fg_thresh
|
||
|
self.bg_thresh_hi = bg_thresh_hi
|
||
|
self.bg_thresh_lo = bg_thresh_lo
|
||
|
self.bbox_reg_weights = bbox_reg_weights
|
||
|
self.class_nums = num_classes
|
||
|
self.use_random = shuffle_before_sample
|
||
|
|
||
|
|
||
|
@register
|
||
|
class LibraBBoxAssigner(object):
|
||
|
__shared__ = ['num_classes']
|
||
|
|
||
|
def __init__(self,
|
||
|
batch_size_per_im=512,
|
||
|
fg_fraction=.25,
|
||
|
fg_thresh=.5,
|
||
|
bg_thresh_hi=.5,
|
||
|
bg_thresh_lo=0.,
|
||
|
bbox_reg_weights=[0.1, 0.1, 0.2, 0.2],
|
||
|
num_classes=81,
|
||
|
shuffle_before_sample=True,
|
||
|
is_cls_agnostic=False,
|
||
|
num_bins=3):
|
||
|
super(LibraBBoxAssigner, self).__init__()
|
||
|
self.batch_size_per_im = batch_size_per_im
|
||
|
self.fg_fraction = fg_fraction
|
||
|
self.fg_thresh = fg_thresh
|
||
|
self.bg_thresh_hi = bg_thresh_hi
|
||
|
self.bg_thresh_lo = bg_thresh_lo
|
||
|
self.bbox_reg_weights = bbox_reg_weights
|
||
|
self.class_nums = num_classes
|
||
|
self.use_random = shuffle_before_sample
|
||
|
self.is_cls_agnostic = is_cls_agnostic
|
||
|
self.num_bins = num_bins
|
||
|
|
||
|
def __call__(
|
||
|
self,
|
||
|
rpn_rois,
|
||
|
gt_classes,
|
||
|
is_crowd,
|
||
|
gt_boxes,
|
||
|
im_info, ):
|
||
|
return self.generate_proposal_label_libra(
|
||
|
rpn_rois=rpn_rois,
|
||
|
gt_classes=gt_classes,
|
||
|
is_crowd=is_crowd,
|
||
|
gt_boxes=gt_boxes,
|
||
|
im_info=im_info,
|
||
|
batch_size_per_im=self.batch_size_per_im,
|
||
|
fg_fraction=self.fg_fraction,
|
||
|
fg_thresh=self.fg_thresh,
|
||
|
bg_thresh_hi=self.bg_thresh_hi,
|
||
|
bg_thresh_lo=self.bg_thresh_lo,
|
||
|
bbox_reg_weights=self.bbox_reg_weights,
|
||
|
class_nums=self.class_nums,
|
||
|
use_random=self.use_random,
|
||
|
is_cls_agnostic=self.is_cls_agnostic,
|
||
|
is_cascade_rcnn=False)
|
||
|
|
||
|
def generate_proposal_label_libra(
|
||
|
self, rpn_rois, gt_classes, is_crowd, gt_boxes, im_info,
|
||
|
batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi,
|
||
|
bg_thresh_lo, bbox_reg_weights, class_nums, use_random,
|
||
|
is_cls_agnostic, is_cascade_rcnn):
|
||
|
num_bins = self.num_bins
|
||
|
|
||
|
def create_tmp_var(program, name, dtype, shape, lod_level=None):
|
||
|
return program.current_block().create_var(
|
||
|
name=name, dtype=dtype, shape=shape, lod_level=lod_level)
|
||
|
|
||
|
def _sample_pos(max_overlaps, max_classes, pos_inds, num_expected):
|
||
|
if len(pos_inds) <= num_expected:
|
||
|
return pos_inds
|
||
|
else:
|
||
|
unique_gt_inds = np.unique(max_classes[pos_inds])
|
||
|
num_gts = len(unique_gt_inds)
|
||
|
num_per_gt = int(round(num_expected / float(num_gts)) + 1)
|
||
|
|
||
|
sampled_inds = []
|
||
|
for i in unique_gt_inds:
|
||
|
inds = np.nonzero(max_classes == i)[0]
|
||
|
before_len = len(inds)
|
||
|
inds = list(set(inds) & set(pos_inds))
|
||
|
after_len = len(inds)
|
||
|
if len(inds) > num_per_gt:
|
||
|
inds = np.random.choice(
|
||
|
inds, size=num_per_gt, replace=False)
|
||
|
sampled_inds.extend(list(inds)) # combine as a new sampler
|
||
|
if len(sampled_inds) < num_expected:
|
||
|
num_extra = num_expected - len(sampled_inds)
|
||
|
extra_inds = np.array(
|
||
|
list(set(pos_inds) - set(sampled_inds)))
|
||
|
assert len(sampled_inds)+len(extra_inds) == len(pos_inds), \
|
||
|
"sum of sampled_inds({}) and extra_inds({}) length must be equal with pos_inds({})!".format(
|
||
|
len(sampled_inds), len(extra_inds), len(pos_inds))
|
||
|
if len(extra_inds) > num_extra:
|
||
|
extra_inds = np.random.choice(
|
||
|
extra_inds, size=num_extra, replace=False)
|
||
|
sampled_inds.extend(extra_inds.tolist())
|
||
|
elif len(sampled_inds) > num_expected:
|
||
|
sampled_inds = np.random.choice(
|
||
|
sampled_inds, size=num_expected, replace=False)
|
||
|
return sampled_inds
|
||
|
|
||
|
def sample_via_interval(max_overlaps, full_set, num_expected, floor_thr,
|
||
|
num_bins, bg_thresh_hi):
|
||
|
max_iou = max_overlaps.max()
|
||
|
iou_interval = (max_iou - floor_thr) / num_bins
|
||
|
per_num_expected = int(num_expected / num_bins)
|
||
|
|
||
|
sampled_inds = []
|
||
|
for i in range(num_bins):
|
||
|
start_iou = floor_thr + i * iou_interval
|
||
|
end_iou = floor_thr + (i + 1) * iou_interval
|
||
|
|
||
|
tmp_set = set(
|
||
|
np.where(
|
||
|
np.logical_and(max_overlaps >= start_iou, max_overlaps <
|
||
|
end_iou))[0])
|
||
|
tmp_inds = list(tmp_set & full_set)
|
||
|
|
||
|
if len(tmp_inds) > per_num_expected:
|
||
|
tmp_sampled_set = np.random.choice(
|
||
|
tmp_inds, size=per_num_expected, replace=False)
|
||
|
else:
|
||
|
tmp_sampled_set = np.array(tmp_inds, dtype=np.int)
|
||
|
sampled_inds.append(tmp_sampled_set)
|
||
|
|
||
|
sampled_inds = np.concatenate(sampled_inds)
|
||
|
if len(sampled_inds) < num_expected:
|
||
|
num_extra = num_expected - len(sampled_inds)
|
||
|
extra_inds = np.array(list(full_set - set(sampled_inds)))
|
||
|
assert len(sampled_inds)+len(extra_inds) == len(full_set), \
|
||
|
"sum of sampled_inds({}) and extra_inds({}) length must be equal with full_set({})!".format(
|
||
|
len(sampled_inds), len(extra_inds), len(full_set))
|
||
|
|
||
|
if len(extra_inds) > num_extra:
|
||
|
extra_inds = np.random.choice(
|
||
|
extra_inds, num_extra, replace=False)
|
||
|
sampled_inds = np.concatenate([sampled_inds, extra_inds])
|
||
|
|
||
|
return sampled_inds
|
||
|
|
||
|
def _sample_neg(max_overlaps,
|
||
|
max_classes,
|
||
|
neg_inds,
|
||
|
num_expected,
|
||
|
floor_thr=-1,
|
||
|
floor_fraction=0,
|
||
|
num_bins=3,
|
||
|
bg_thresh_hi=0.5):
|
||
|
if len(neg_inds) <= num_expected:
|
||
|
return neg_inds
|
||
|
else:
|
||
|
# balance sampling for negative samples
|
||
|
neg_set = set(neg_inds)
|
||
|
if floor_thr > 0:
|
||
|
floor_set = set(
|
||
|
np.where(
|
||
|
np.logical_and(max_overlaps >= 0, max_overlaps <
|
||
|
floor_thr))[0])
|
||
|
iou_sampling_set = set(
|
||
|
np.where(max_overlaps >= floor_thr)[0])
|
||
|
elif floor_thr == 0:
|
||
|
floor_set = set(np.where(max_overlaps == 0)[0])
|
||
|
iou_sampling_set = set(
|
||
|
np.where(max_overlaps > floor_thr)[0])
|
||
|
else:
|
||
|
floor_set = set()
|
||
|
iou_sampling_set = set(
|
||
|
np.where(max_overlaps > floor_thr)[0])
|
||
|
floor_thr = 0
|
||
|
|
||
|
floor_neg_inds = list(floor_set & neg_set)
|
||
|
iou_sampling_neg_inds = list(iou_sampling_set & neg_set)
|
||
|
|
||
|
num_expected_iou_sampling = int(num_expected *
|
||
|
(1 - floor_fraction))
|
||
|
if len(iou_sampling_neg_inds) > num_expected_iou_sampling:
|
||
|
if num_bins >= 2:
|
||
|
iou_sampled_inds = sample_via_interval(
|
||
|
max_overlaps,
|
||
|
set(iou_sampling_neg_inds),
|
||
|
num_expected_iou_sampling, floor_thr, num_bins,
|
||
|
bg_thresh_hi)
|
||
|
else:
|
||
|
iou_sampled_inds = np.random.choice(
|
||
|
iou_sampling_neg_inds,
|
||
|
size=num_expected_iou_sampling,
|
||
|
replace=False)
|
||
|
else:
|
||
|
iou_sampled_inds = np.array(
|
||
|
iou_sampling_neg_inds, dtype=np.int)
|
||
|
num_expected_floor = num_expected - len(iou_sampled_inds)
|
||
|
if len(floor_neg_inds) > num_expected_floor:
|
||
|
sampled_floor_inds = np.random.choice(
|
||
|
floor_neg_inds, size=num_expected_floor, replace=False)
|
||
|
else:
|
||
|
sampled_floor_inds = np.array(floor_neg_inds, dtype=np.int)
|
||
|
sampled_inds = np.concatenate(
|
||
|
(sampled_floor_inds, iou_sampled_inds))
|
||
|
if len(sampled_inds) < num_expected:
|
||
|
num_extra = num_expected - len(sampled_inds)
|
||
|
extra_inds = np.array(list(neg_set - set(sampled_inds)))
|
||
|
if len(extra_inds) > num_extra:
|
||
|
extra_inds = np.random.choice(
|
||
|
extra_inds, size=num_extra, replace=False)
|
||
|
sampled_inds = np.concatenate((sampled_inds, extra_inds))
|
||
|
return sampled_inds
|
||
|
|
||
|
def _sample_rois(rpn_rois, gt_classes, is_crowd, gt_boxes, im_info,
|
||
|
batch_size_per_im, fg_fraction, fg_thresh,
|
||
|
bg_thresh_hi, bg_thresh_lo, bbox_reg_weights,
|
||
|
class_nums, use_random, is_cls_agnostic,
|
||
|
is_cascade_rcnn):
|
||
|
rois_per_image = int(batch_size_per_im)
|
||
|
fg_rois_per_im = int(np.round(fg_fraction * rois_per_image))
|
||
|
|
||
|
# Roidb
|
||
|
im_scale = im_info[2]
|
||
|
inv_im_scale = 1. / im_scale
|
||
|
rpn_rois = rpn_rois * inv_im_scale
|
||
|
if is_cascade_rcnn:
|
||
|
rpn_rois = rpn_rois[gt_boxes.shape[0]:, :]
|
||
|
boxes = np.vstack([gt_boxes, rpn_rois])
|
||
|
gt_overlaps = np.zeros((boxes.shape[0], class_nums))
|
||
|
box_to_gt_ind_map = np.zeros((boxes.shape[0]), dtype=np.int32)
|
||
|
if len(gt_boxes) > 0:
|
||
|
proposal_to_gt_overlaps = bbox_overlaps(boxes, gt_boxes)
|
||
|
|
||
|
overlaps_argmax = proposal_to_gt_overlaps.argmax(axis=1)
|
||
|
overlaps_max = proposal_to_gt_overlaps.max(axis=1)
|
||
|
# Boxes which with non-zero overlap with gt boxes
|
||
|
overlapped_boxes_ind = np.where(overlaps_max > 0)[0]
|
||
|
|
||
|
overlapped_boxes_gt_classes = gt_classes[overlaps_argmax[
|
||
|
overlapped_boxes_ind]]
|
||
|
|
||
|
for idx in range(len(overlapped_boxes_ind)):
|
||
|
gt_overlaps[overlapped_boxes_ind[
|
||
|
idx], overlapped_boxes_gt_classes[idx]] = overlaps_max[
|
||
|
overlapped_boxes_ind[idx]]
|
||
|
box_to_gt_ind_map[overlapped_boxes_ind[
|
||
|
idx]] = overlaps_argmax[overlapped_boxes_ind[idx]]
|
||
|
|
||
|
crowd_ind = np.where(is_crowd)[0]
|
||
|
gt_overlaps[crowd_ind] = -1
|
||
|
|
||
|
max_overlaps = gt_overlaps.max(axis=1)
|
||
|
max_classes = gt_overlaps.argmax(axis=1)
|
||
|
|
||
|
# Cascade RCNN Decode Filter
|
||
|
if is_cascade_rcnn:
|
||
|
ws = boxes[:, 2] - boxes[:, 0] + 1
|
||
|
hs = boxes[:, 3] - boxes[:, 1] + 1
|
||
|
keep = np.where((ws > 0) & (hs > 0))[0]
|
||
|
boxes = boxes[keep]
|
||
|
max_overlaps = max_overlaps[keep]
|
||
|
fg_inds = np.where(max_overlaps >= fg_thresh)[0]
|
||
|
bg_inds = np.where((max_overlaps < bg_thresh_hi) & (
|
||
|
max_overlaps >= bg_thresh_lo))[0]
|
||
|
fg_rois_per_this_image = fg_inds.shape[0]
|
||
|
bg_rois_per_this_image = bg_inds.shape[0]
|
||
|
else:
|
||
|
# Foreground
|
||
|
fg_inds = np.where(max_overlaps >= fg_thresh)[0]
|
||
|
fg_rois_per_this_image = np.minimum(fg_rois_per_im,
|
||
|
fg_inds.shape[0])
|
||
|
# Sample foreground if there are too many
|
||
|
if fg_inds.shape[0] > fg_rois_per_this_image:
|
||
|
if use_random:
|
||
|
fg_inds = _sample_pos(max_overlaps, max_classes,
|
||
|
fg_inds, fg_rois_per_this_image)
|
||
|
fg_inds = fg_inds[:fg_rois_per_this_image]
|
||
|
|
||
|
# Background
|
||
|
bg_inds = np.where((max_overlaps < bg_thresh_hi) & (
|
||
|
max_overlaps >= bg_thresh_lo))[0]
|
||
|
bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
|
||
|
bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
|
||
|
bg_inds.shape[0])
|
||
|
assert bg_rois_per_this_image >= 0, "bg_rois_per_this_image must be >= 0 but got {}".format(
|
||
|
bg_rois_per_this_image)
|
||
|
|
||
|
# Sample background if there are too many
|
||
|
if bg_inds.shape[0] > bg_rois_per_this_image:
|
||
|
if use_random:
|
||
|
# libra neg sample
|
||
|
bg_inds = _sample_neg(
|
||
|
max_overlaps,
|
||
|
max_classes,
|
||
|
bg_inds,
|
||
|
bg_rois_per_this_image,
|
||
|
num_bins=num_bins,
|
||
|
bg_thresh_hi=bg_thresh_hi)
|
||
|
bg_inds = bg_inds[:bg_rois_per_this_image]
|
||
|
|
||
|
keep_inds = np.append(fg_inds, bg_inds)
|
||
|
sampled_labels = max_classes[keep_inds] # N x 1
|
||
|
sampled_labels[fg_rois_per_this_image:] = 0
|
||
|
sampled_boxes = boxes[keep_inds] # N x 324
|
||
|
sampled_gts = gt_boxes[box_to_gt_ind_map[keep_inds]]
|
||
|
sampled_gts[fg_rois_per_this_image:, :] = gt_boxes[0]
|
||
|
bbox_label_targets = _compute_targets(
|
||
|
sampled_boxes, sampled_gts, sampled_labels, bbox_reg_weights)
|
||
|
bbox_targets, bbox_inside_weights = _expand_bbox_targets(
|
||
|
bbox_label_targets, class_nums, is_cls_agnostic)
|
||
|
bbox_outside_weights = np.array(
|
||
|
bbox_inside_weights > 0, dtype=bbox_inside_weights.dtype)
|
||
|
# Scale rois
|
||
|
sampled_rois = sampled_boxes * im_scale
|
||
|
|
||
|
# Faster RCNN blobs
|
||
|
frcn_blobs = dict(
|
||
|
rois=sampled_rois,
|
||
|
labels_int32=sampled_labels,
|
||
|
bbox_targets=bbox_targets,
|
||
|
bbox_inside_weights=bbox_inside_weights,
|
||
|
bbox_outside_weights=bbox_outside_weights)
|
||
|
return frcn_blobs
|
||
|
|
||
|
def _compute_targets(roi_boxes, gt_boxes, labels, bbox_reg_weights):
|
||
|
assert roi_boxes.shape[0] == gt_boxes.shape[0]
|
||
|
assert roi_boxes.shape[1] == 4
|
||
|
assert gt_boxes.shape[1] == 4
|
||
|
|
||
|
targets = np.zeros(roi_boxes.shape)
|
||
|
bbox_reg_weights = np.asarray(bbox_reg_weights)
|
||
|
targets = box_to_delta(
|
||
|
ex_boxes=roi_boxes, gt_boxes=gt_boxes, weights=bbox_reg_weights)
|
||
|
|
||
|
return np.hstack([labels[:, np.newaxis], targets]).astype(
|
||
|
np.float32, copy=False)
|
||
|
|
||
|
def _expand_bbox_targets(bbox_targets_input, class_nums,
|
||
|
is_cls_agnostic):
|
||
|
class_labels = bbox_targets_input[:, 0]
|
||
|
fg_inds = np.where(class_labels > 0)[0]
|
||
|
bbox_targets = np.zeros((class_labels.shape[0], 4 * class_nums
|
||
|
if not is_cls_agnostic else 4 * 2))
|
||
|
bbox_inside_weights = np.zeros(bbox_targets.shape)
|
||
|
for ind in fg_inds:
|
||
|
class_label = int(class_labels[
|
||
|
ind]) if not is_cls_agnostic else 1
|
||
|
start_ind = class_label * 4
|
||
|
end_ind = class_label * 4 + 4
|
||
|
bbox_targets[ind, start_ind:end_ind] = bbox_targets_input[ind,
|
||
|
1:]
|
||
|
bbox_inside_weights[ind, start_ind:end_ind] = (1.0, 1.0, 1.0,
|
||
|
1.0)
|
||
|
return bbox_targets, bbox_inside_weights
|
||
|
|
||
|
def generate_func(
|
||
|
rpn_rois,
|
||
|
gt_classes,
|
||
|
is_crowd,
|
||
|
gt_boxes,
|
||
|
im_info, ):
|
||
|
rpn_rois_lod = rpn_rois.lod()[0]
|
||
|
gt_classes_lod = gt_classes.lod()[0]
|
||
|
|
||
|
# convert
|
||
|
rpn_rois = np.array(rpn_rois)
|
||
|
gt_classes = np.array(gt_classes)
|
||
|
is_crowd = np.array(is_crowd)
|
||
|
gt_boxes = np.array(gt_boxes)
|
||
|
im_info = np.array(im_info)
|
||
|
|
||
|
rois = []
|
||
|
labels_int32 = []
|
||
|
bbox_targets = []
|
||
|
bbox_inside_weights = []
|
||
|
bbox_outside_weights = []
|
||
|
lod = [0]
|
||
|
|
||
|
for idx in range(len(rpn_rois_lod) - 1):
|
||
|
rois_si = rpn_rois_lod[idx]
|
||
|
rois_ei = rpn_rois_lod[idx + 1]
|
||
|
|
||
|
gt_si = gt_classes_lod[idx]
|
||
|
gt_ei = gt_classes_lod[idx + 1]
|
||
|
frcn_blobs = _sample_rois(
|
||
|
rpn_rois[rois_si:rois_ei], gt_classes[gt_si:gt_ei],
|
||
|
is_crowd[gt_si:gt_ei], gt_boxes[gt_si:gt_ei], im_info[idx],
|
||
|
batch_size_per_im, fg_fraction, fg_thresh, bg_thresh_hi,
|
||
|
bg_thresh_lo, bbox_reg_weights, class_nums, use_random,
|
||
|
is_cls_agnostic, is_cascade_rcnn)
|
||
|
lod.append(frcn_blobs['rois'].shape[0] + lod[-1])
|
||
|
rois.append(frcn_blobs['rois'])
|
||
|
labels_int32.append(frcn_blobs['labels_int32'].reshape(-1, 1))
|
||
|
bbox_targets.append(frcn_blobs['bbox_targets'])
|
||
|
bbox_inside_weights.append(frcn_blobs['bbox_inside_weights'])
|
||
|
bbox_outside_weights.append(frcn_blobs['bbox_outside_weights'])
|
||
|
|
||
|
rois = np.vstack(rois)
|
||
|
labels_int32 = np.vstack(labels_int32)
|
||
|
bbox_targets = np.vstack(bbox_targets)
|
||
|
bbox_inside_weights = np.vstack(bbox_inside_weights)
|
||
|
bbox_outside_weights = np.vstack(bbox_outside_weights)
|
||
|
|
||
|
# create lod-tensor for return
|
||
|
# notice that the func create_lod_tensor does not work well here
|
||
|
ret_rois = fluid.LoDTensor()
|
||
|
ret_rois.set_lod([lod])
|
||
|
ret_rois.set(rois.astype("float32"), fluid.CPUPlace())
|
||
|
|
||
|
ret_labels_int32 = fluid.LoDTensor()
|
||
|
ret_labels_int32.set_lod([lod])
|
||
|
ret_labels_int32.set(labels_int32.astype("int32"), fluid.CPUPlace())
|
||
|
|
||
|
ret_bbox_targets = fluid.LoDTensor()
|
||
|
ret_bbox_targets.set_lod([lod])
|
||
|
ret_bbox_targets.set(
|
||
|
bbox_targets.astype("float32"), fluid.CPUPlace())
|
||
|
|
||
|
ret_bbox_inside_weights = fluid.LoDTensor()
|
||
|
ret_bbox_inside_weights.set_lod([lod])
|
||
|
ret_bbox_inside_weights.set(
|
||
|
bbox_inside_weights.astype("float32"), fluid.CPUPlace())
|
||
|
|
||
|
ret_bbox_outside_weights = fluid.LoDTensor()
|
||
|
ret_bbox_outside_weights.set_lod([lod])
|
||
|
ret_bbox_outside_weights.set(
|
||
|
bbox_outside_weights.astype("float32"), fluid.CPUPlace())
|
||
|
|
||
|
return ret_rois, ret_labels_int32, ret_bbox_targets, ret_bbox_inside_weights, ret_bbox_outside_weights
|
||
|
|
||
|
rois = create_tmp_var(
|
||
|
fluid.default_main_program(),
|
||
|
name=None,
|
||
|
dtype='float32',
|
||
|
shape=[-1, 4], )
|
||
|
bbox_inside_weights = create_tmp_var(
|
||
|
fluid.default_main_program(),
|
||
|
name=None,
|
||
|
dtype='float32',
|
||
|
shape=[-1, 8 if self.is_cls_agnostic else self.class_nums * 4], )
|
||
|
bbox_outside_weights = create_tmp_var(
|
||
|
fluid.default_main_program(),
|
||
|
name=None,
|
||
|
dtype='float32',
|
||
|
shape=[-1, 8 if self.is_cls_agnostic else self.class_nums * 4], )
|
||
|
bbox_targets = create_tmp_var(
|
||
|
fluid.default_main_program(),
|
||
|
name=None,
|
||
|
dtype='float32',
|
||
|
shape=[-1, 8 if self.is_cls_agnostic else self.class_nums * 4], )
|
||
|
labels_int32 = create_tmp_var(
|
||
|
fluid.default_main_program(),
|
||
|
name=None,
|
||
|
dtype='int32',
|
||
|
shape=[-1, 1], )
|
||
|
|
||
|
outs = [
|
||
|
rois, labels_int32, bbox_targets, bbox_inside_weights,
|
||
|
bbox_outside_weights
|
||
|
]
|
||
|
|
||
|
fluid.layers.py_func(
|
||
|
func=generate_func,
|
||
|
x=[rpn_rois, gt_classes, is_crowd, gt_boxes, im_info],
|
||
|
out=outs)
|
||
|
return outs
|
||
|
|
||
|
|
||
|
@register
|
||
|
class RoIAlign(object):
|
||
|
__op__ = fluid.layers.roi_align
|
||
|
__append_doc__ = True
|
||
|
|
||
|
def __init__(self, resolution=7, spatial_scale=1. / 16, sampling_ratio=0):
|
||
|
super(RoIAlign, self).__init__()
|
||
|
if isinstance(resolution, Integral):
|
||
|
resolution = [resolution, resolution]
|
||
|
self.pooled_height = resolution[0]
|
||
|
self.pooled_width = resolution[1]
|
||
|
self.spatial_scale = spatial_scale
|
||
|
self.sampling_ratio = sampling_ratio
|
||
|
|
||
|
|
||
|
@register
|
||
|
class RoIPool(object):
|
||
|
__op__ = fluid.layers.roi_pool
|
||
|
__append_doc__ = True
|
||
|
|
||
|
def __init__(self, resolution=7, spatial_scale=1. / 16):
|
||
|
super(RoIPool, self).__init__()
|
||
|
if isinstance(resolution, Integral):
|
||
|
resolution = [resolution, resolution]
|
||
|
self.pooled_height = resolution[0]
|
||
|
self.pooled_width = resolution[1]
|
||
|
self.spatial_scale = spatial_scale
|
||
|
|
||
|
|
||
|
@register
|
||
|
class MultiBoxHead(object):
|
||
|
__op__ = fluid.layers.multi_box_head
|
||
|
__append_doc__ = True
|
||
|
|
||
|
def __init__(self,
|
||
|
min_ratio=20,
|
||
|
max_ratio=90,
|
||
|
base_size=300,
|
||
|
min_sizes=[60.0, 105.0, 150.0, 195.0, 240.0, 285.0],
|
||
|
max_sizes=[[], 150.0, 195.0, 240.0, 285.0, 300.0],
|
||
|
aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.],
|
||
|
[2., 3.]],
|
||
|
steps=None,
|
||
|
offset=0.5,
|
||
|
flip=True,
|
||
|
min_max_aspect_ratios_order=False,
|
||
|
kernel_size=1,
|
||
|
pad=0):
|
||
|
super(MultiBoxHead, self).__init__()
|
||
|
self.min_ratio = min_ratio
|
||
|
self.max_ratio = max_ratio
|
||
|
self.base_size = base_size
|
||
|
self.min_sizes = min_sizes
|
||
|
self.max_sizes = max_sizes
|
||
|
self.aspect_ratios = aspect_ratios
|
||
|
self.steps = steps
|
||
|
self.offset = offset
|
||
|
self.flip = flip
|
||
|
self.min_max_aspect_ratios_order = min_max_aspect_ratios_order
|
||
|
self.kernel_size = kernel_size
|
||
|
self.pad = pad
|
||
|
|
||
|
|
||
|
@register
|
||
|
@serializable
|
||
|
class SSDLiteMultiBoxHead(object):
|
||
|
def __init__(self,
|
||
|
min_ratio=20,
|
||
|
max_ratio=90,
|
||
|
base_size=300,
|
||
|
min_sizes=None,
|
||
|
max_sizes=None,
|
||
|
aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.],
|
||
|
[2., 3.]],
|
||
|
steps=None,
|
||
|
offset=0.5,
|
||
|
flip=True,
|
||
|
clip=False,
|
||
|
pad=0,
|
||
|
conv_decay=0.0):
|
||
|
super(SSDLiteMultiBoxHead, self).__init__()
|
||
|
self.min_ratio = min_ratio
|
||
|
self.max_ratio = max_ratio
|
||
|
self.base_size = base_size
|
||
|
self.min_sizes = min_sizes
|
||
|
self.max_sizes = max_sizes
|
||
|
self.aspect_ratios = aspect_ratios
|
||
|
self.steps = steps
|
||
|
self.offset = offset
|
||
|
self.flip = flip
|
||
|
self.pad = pad
|
||
|
self.clip = clip
|
||
|
self.conv_decay = conv_decay
|
||
|
|
||
|
def _separable_conv(self, input, num_filters, name):
|
||
|
dwconv_param_attr = ParamAttr(
|
||
|
name=name + 'dw_weights', regularizer=L2Decay(self.conv_decay))
|
||
|
num_filter1 = input.shape[1]
|
||
|
depthwise_conv = fluid.layers.conv2d(
|
||
|
input=input,
|
||
|
num_filters=num_filter1,
|
||
|
filter_size=3,
|
||
|
stride=1,
|
||
|
padding="SAME",
|
||
|
groups=int(num_filter1),
|
||
|
act=None,
|
||
|
use_cudnn=False,
|
||
|
param_attr=dwconv_param_attr,
|
||
|
bias_attr=False)
|
||
|
bn_name = name + '_bn'
|
||
|
bn_param_attr = ParamAttr(
|
||
|
name=bn_name + "_scale", regularizer=L2Decay(0.0))
|
||
|
bn_bias_attr = ParamAttr(
|
||
|
name=bn_name + "_offset", regularizer=L2Decay(0.0))
|
||
|
bn = fluid.layers.batch_norm(
|
||
|
input=depthwise_conv,
|
||
|
param_attr=bn_param_attr,
|
||
|
bias_attr=bn_bias_attr,
|
||
|
moving_mean_name=bn_name + '_mean',
|
||
|
moving_variance_name=bn_name + '_variance')
|
||
|
bn = fluid.layers.relu6(bn)
|
||
|
pwconv_param_attr = ParamAttr(
|
||
|
name=name + 'pw_weights', regularizer=L2Decay(self.conv_decay))
|
||
|
pointwise_conv = fluid.layers.conv2d(
|
||
|
input=bn,
|
||
|
num_filters=num_filters,
|
||
|
filter_size=1,
|
||
|
stride=1,
|
||
|
act=None,
|
||
|
use_cudnn=True,
|
||
|
param_attr=pwconv_param_attr,
|
||
|
bias_attr=False)
|
||
|
return pointwise_conv
|
||
|
|
||
|
def __call__(self, inputs, image, num_classes):
|
||
|
def _permute_and_reshape(input, last_dim):
|
||
|
trans = fluid.layers.transpose(input, perm=[0, 2, 3, 1])
|
||
|
compile_shape = [0, -1, last_dim]
|
||
|
return fluid.layers.reshape(trans, shape=compile_shape)
|
||
|
|
||
|
def _is_list_or_tuple_(data):
|
||
|
return (isinstance(data, list) or isinstance(data, tuple))
|
||
|
|
||
|
if self.min_sizes is None and self.max_sizes is None:
|
||
|
num_layer = len(inputs)
|
||
|
self.min_sizes = []
|
||
|
self.max_sizes = []
|
||
|
step = int(
|
||
|
math.floor(((self.max_ratio - self.min_ratio)) / (num_layer - 2
|
||
|
)))
|
||
|
for ratio in six.moves.range(self.min_ratio, self.max_ratio + 1,
|
||
|
step):
|
||
|
self.min_sizes.append(self.base_size * ratio / 100.)
|
||
|
self.max_sizes.append(self.base_size * (ratio + step) / 100.)
|
||
|
self.min_sizes = [self.base_size * .10] + self.min_sizes
|
||
|
self.max_sizes = [self.base_size * .20] + self.max_sizes
|
||
|
|
||
|
locs, confs = [], []
|
||
|
boxes, mvars = [], []
|
||
|
|
||
|
for i, input in enumerate(inputs):
|
||
|
min_size = self.min_sizes[i]
|
||
|
max_size = self.max_sizes[i]
|
||
|
if not _is_list_or_tuple_(min_size):
|
||
|
min_size = [min_size]
|
||
|
if not _is_list_or_tuple_(max_size):
|
||
|
max_size = [max_size]
|
||
|
step = [
|
||
|
self.steps[i] if self.steps else 0.0, self.steps[i]
|
||
|
if self.steps else 0.0
|
||
|
]
|
||
|
box, var = fluid.layers.prior_box(
|
||
|
input,
|
||
|
image,
|
||
|
min_sizes=min_size,
|
||
|
max_sizes=max_size,
|
||
|
steps=step,
|
||
|
aspect_ratios=self.aspect_ratios[i],
|
||
|
variance=[0.1, 0.1, 0.2, 0.2],
|
||
|
clip=self.clip,
|
||
|
flip=self.flip,
|
||
|
offset=0.5)
|
||
|
|
||
|
num_boxes = box.shape[2]
|
||
|
box = fluid.layers.reshape(box, shape=[-1, 4])
|
||
|
var = fluid.layers.reshape(var, shape=[-1, 4])
|
||
|
num_loc_output = num_boxes * 4
|
||
|
num_conf_output = num_boxes * num_classes
|
||
|
# get loc
|
||
|
mbox_loc = self._separable_conv(input, num_loc_output,
|
||
|
"loc_{}".format(i + 1))
|
||
|
loc = _permute_and_reshape(mbox_loc, 4)
|
||
|
# get conf
|
||
|
mbox_conf = self._separable_conv(input, num_conf_output,
|
||
|
"conf_{}".format(i + 1))
|
||
|
conf = _permute_and_reshape(mbox_conf, num_classes)
|
||
|
|
||
|
locs.append(loc)
|
||
|
confs.append(conf)
|
||
|
boxes.append(box)
|
||
|
mvars.append(var)
|
||
|
|
||
|
ssd_mbox_loc = fluid.layers.concat(locs, axis=1)
|
||
|
ssd_mbox_conf = fluid.layers.concat(confs, axis=1)
|
||
|
prior_boxes = fluid.layers.concat(boxes)
|
||
|
box_vars = fluid.layers.concat(mvars)
|
||
|
|
||
|
prior_boxes.stop_gradient = True
|
||
|
box_vars.stop_gradient = True
|
||
|
return ssd_mbox_loc, ssd_mbox_conf, prior_boxes, box_vars
|
||
|
|
||
|
|
||
|
@register
|
||
|
@serializable
|
||
|
class SSDOutputDecoder(object):
|
||
|
__op__ = fluid.layers.detection_output
|
||
|
__append_doc__ = True
|
||
|
|
||
|
def __init__(self,
|
||
|
nms_threshold=0.45,
|
||
|
nms_top_k=400,
|
||
|
keep_top_k=200,
|
||
|
score_threshold=0.01,
|
||
|
nms_eta=1.0,
|
||
|
background_label=0,
|
||
|
return_index=False):
|
||
|
super(SSDOutputDecoder, self).__init__()
|
||
|
self.nms_threshold = nms_threshold
|
||
|
self.background_label = background_label
|
||
|
self.nms_top_k = nms_top_k
|
||
|
self.keep_top_k = keep_top_k
|
||
|
self.score_threshold = score_threshold
|
||
|
self.nms_eta = nms_eta
|
||
|
self.return_index = return_index
|
||
|
|
||
|
|
||
|
@register
|
||
|
@serializable
|
||
|
class RetinaTargetAssign(object):
|
||
|
__op__ = fluid.layers.retinanet_target_assign
|
||
|
__append_doc__ = True
|
||
|
|
||
|
def __init__(self, positive_overlap=0.5, negative_overlap=0.4):
|
||
|
super(RetinaTargetAssign, self).__init__()
|
||
|
self.positive_overlap = positive_overlap
|
||
|
self.negative_overlap = negative_overlap
|
||
|
|
||
|
|
||
|
@register
|
||
|
@serializable
|
||
|
class RetinaOutputDecoder(object):
|
||
|
__op__ = fluid.layers.retinanet_detection_output
|
||
|
__append_doc__ = True
|
||
|
|
||
|
def __init__(self,
|
||
|
score_thresh=0.05,
|
||
|
nms_thresh=0.3,
|
||
|
pre_nms_top_n=1000,
|
||
|
detections_per_im=100,
|
||
|
nms_eta=1.0):
|
||
|
super(RetinaOutputDecoder, self).__init__()
|
||
|
self.score_threshold = score_thresh
|
||
|
self.nms_threshold = nms_thresh
|
||
|
self.nms_top_k = pre_nms_top_n
|
||
|
self.keep_top_k = detections_per_im
|
||
|
self.nms_eta = nms_eta
|
||
|
|
||
|
|
||
|
@register
|
||
|
@serializable
|
||
|
class MaskMatrixNMS(object):
|
||
|
"""
|
||
|
Matrix NMS for multi-class masks.
|
||
|
Args:
|
||
|
update_threshold (float): Updated threshold of categroy score in second time.
|
||
|
pre_nms_top_n (int): Number of total instance to be kept per image before NMS
|
||
|
post_nms_top_n (int): Number of total instance to be kept per image after NMS.
|
||
|
kernel (str): 'linear' or 'gaussian'.
|
||
|
sigma (float): std in gaussian method.
|
||
|
Input:
|
||
|
seg_preds (Variable): shape (n, h, w), segmentation feature maps
|
||
|
seg_masks (Variable): shape (n, h, w), segmentation feature maps
|
||
|
cate_labels (Variable): shape (n), mask labels in descending order
|
||
|
cate_scores (Variable): shape (n), mask scores in descending order
|
||
|
sum_masks (Variable): a float tensor of the sum of seg_masks
|
||
|
Returns:
|
||
|
Variable: cate_scores, tensors of shape (n)
|
||
|
"""
|
||
|
|
||
|
def __init__(self,
|
||
|
update_threshold=0.05,
|
||
|
pre_nms_top_n=500,
|
||
|
post_nms_top_n=100,
|
||
|
kernel='gaussian',
|
||
|
sigma=2.0):
|
||
|
super(MaskMatrixNMS, self).__init__()
|
||
|
self.update_threshold = update_threshold
|
||
|
self.pre_nms_top_n = pre_nms_top_n
|
||
|
self.post_nms_top_n = post_nms_top_n
|
||
|
self.kernel = kernel
|
||
|
self.sigma = sigma
|
||
|
|
||
|
def _sort_score(self, scores, top_num):
|
||
|
self.case_scores = scores
|
||
|
|
||
|
def fn_1():
|
||
|
return fluid.layers.topk(self.case_scores, top_num)
|
||
|
|
||
|
def fn_2():
|
||
|
return fluid.layers.argsort(self.case_scores, descending=True)
|
||
|
|
||
|
sort_inds = fluid.layers.case(
|
||
|
pred_fn_pairs=[(fluid.layers.shape(scores)[0] > top_num, fn_1)],
|
||
|
default=fn_2)
|
||
|
return sort_inds
|
||
|
|
||
|
def __call__(self,
|
||
|
seg_preds,
|
||
|
seg_masks,
|
||
|
cate_labels,
|
||
|
cate_scores,
|
||
|
sum_masks=None):
|
||
|
# sort and keep top nms_pre
|
||
|
sort_inds = self._sort_score(cate_scores, self.pre_nms_top_n)
|
||
|
|
||
|
seg_masks = fluid.layers.gather(seg_masks, index=sort_inds[1])
|
||
|
seg_preds = fluid.layers.gather(seg_preds, index=sort_inds[1])
|
||
|
sum_masks = fluid.layers.gather(sum_masks, index=sort_inds[1])
|
||
|
cate_scores = sort_inds[0]
|
||
|
cate_labels = fluid.layers.gather(cate_labels, index=sort_inds[1])
|
||
|
|
||
|
seg_masks = paddle.flatten(seg_masks, start_axis=1, stop_axis=-1)
|
||
|
# inter.
|
||
|
inter_matrix = paddle.mm(seg_masks,
|
||
|
fluid.layers.transpose(seg_masks, [1, 0]))
|
||
|
n_samples = fluid.layers.shape(cate_labels)
|
||
|
# union.
|
||
|
sum_masks_x = fluid.layers.reshape(
|
||
|
fluid.layers.expand(
|
||
|
sum_masks, expand_times=[n_samples]),
|
||
|
shape=[n_samples, n_samples])
|
||
|
# iou.
|
||
|
iou_matrix = paddle.divide(inter_matrix,
|
||
|
paddle.subtract(
|
||
|
paddle.add(sum_masks_x,
|
||
|
fluid.layers.transpose(
|
||
|
sum_masks_x, [1, 0])),
|
||
|
inter_matrix))
|
||
|
iou_matrix = paddle.triu(iou_matrix, diagonal=1)
|
||
|
# label_specific matrix.
|
||
|
cate_labels_x = fluid.layers.reshape(
|
||
|
fluid.layers.expand(
|
||
|
cate_labels, expand_times=[n_samples]),
|
||
|
shape=[n_samples, n_samples])
|
||
|
label_matrix = fluid.layers.cast(
|
||
|
paddle.equal(cate_labels_x,
|
||
|
fluid.layers.transpose(cate_labels_x, [1, 0])),
|
||
|
'float32')
|
||
|
label_matrix = paddle.triu(label_matrix, diagonal=1)
|
||
|
|
||
|
# IoU compensation
|
||
|
compensate_iou = paddle.max(paddle.multiply(iou_matrix, label_matrix),
|
||
|
axis=0)
|
||
|
compensate_iou = fluid.layers.reshape(
|
||
|
fluid.layers.expand(
|
||
|
compensate_iou, expand_times=[n_samples]),
|
||
|
shape=[n_samples, n_samples])
|
||
|
compensate_iou = fluid.layers.transpose(compensate_iou, [1, 0])
|
||
|
|
||
|
# IoU decay
|
||
|
decay_iou = paddle.multiply(iou_matrix, label_matrix)
|
||
|
|
||
|
# matrix nms
|
||
|
if self.kernel == 'gaussian':
|
||
|
decay_matrix = fluid.layers.exp(-1 * self.sigma * (decay_iou**2))
|
||
|
compensate_matrix = fluid.layers.exp(-1 * self.sigma *
|
||
|
(compensate_iou**2))
|
||
|
decay_coefficient = paddle.min(
|
||
|
paddle.divide(decay_matrix, compensate_matrix), axis=0)
|
||
|
elif self.kernel == 'linear':
|
||
|
decay_matrix = (1 - decay_iou) / (1 - compensate_iou)
|
||
|
decay_coefficient = paddle.min(decay_matrix, axis=0)
|
||
|
else:
|
||
|
raise NotImplementedError
|
||
|
|
||
|
# update the score.
|
||
|
cate_scores = paddle.multiply(cate_scores, decay_coefficient)
|
||
|
|
||
|
keep = fluid.layers.where(cate_scores >= self.update_threshold)
|
||
|
keep = fluid.layers.squeeze(keep, axes=[1])
|
||
|
# Prevent empty and increase fake data
|
||
|
keep = fluid.layers.concat([
|
||
|
keep, fluid.layers.cast(
|
||
|
fluid.layers.shape(cate_scores)[0] - 1, 'int64')
|
||
|
])
|
||
|
|
||
|
seg_preds = fluid.layers.gather(seg_preds, index=keep)
|
||
|
cate_scores = fluid.layers.gather(cate_scores, index=keep)
|
||
|
cate_labels = fluid.layers.gather(cate_labels, index=keep)
|
||
|
|
||
|
# sort and keep top_k
|
||
|
sort_inds = self._sort_score(cate_scores, self.post_nms_top_n)
|
||
|
|
||
|
seg_preds = fluid.layers.gather(seg_preds, index=sort_inds[1])
|
||
|
cate_scores = sort_inds[0]
|
||
|
cate_labels = fluid.layers.gather(cate_labels, index=sort_inds[1])
|
||
|
return seg_preds, cate_scores, cate_labels
|