forked from PulseFocusPlatform/PulseFocusPlatform
264 lines
11 KiB
Python
264 lines
11 KiB
Python
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
from __future__ import absolute_import
|
|
from __future__ import division
|
|
from __future__ import print_function
|
|
import paddle.fluid as fluid
|
|
from paddle.fluid.param_attr import ParamAttr
|
|
from paddle.fluid.initializer import Normal
|
|
from paddle.fluid.regularizer import L2Decay
|
|
|
|
from ppdet.modeling.ops import MultiClassNMS
|
|
from ppdet.modeling.losses import SmoothL1Loss
|
|
from ppdet.core.workspace import register
|
|
|
|
__all__ = ['HTCBBoxHead']
|
|
|
|
|
|
@register
|
|
class HTCBBoxHead(object):
|
|
"""
|
|
HTC bbox head
|
|
|
|
Args:
|
|
head (object): the head module instance
|
|
nms (object): `MultiClassNMS` instance
|
|
num_classes: number of output classes
|
|
"""
|
|
__inject__ = ['head', 'nms', 'bbox_loss']
|
|
__shared__ = ['num_classes']
|
|
|
|
def __init__(self,
|
|
head,
|
|
nms=MultiClassNMS().__dict__,
|
|
bbox_loss=SmoothL1Loss().__dict__,
|
|
num_classes=81,
|
|
lr_ratio=2.0):
|
|
super(HTCBBoxHead, self).__init__()
|
|
self.head = head
|
|
self.nms = nms
|
|
self.bbox_loss = bbox_loss
|
|
self.num_classes = num_classes
|
|
self.lr_ratio = lr_ratio
|
|
|
|
if isinstance(nms, dict):
|
|
self.nms = MultiClassNMS(**nms)
|
|
if isinstance(bbox_loss, dict):
|
|
self.bbox_loss = SmoothL1Loss(**bbox_loss)
|
|
|
|
def get_output(self,
|
|
roi_feat,
|
|
cls_agnostic_bbox_reg=2,
|
|
wb_scalar=1.0,
|
|
name=''):
|
|
"""
|
|
Get bbox head output.
|
|
|
|
Args:
|
|
roi_feat (Variable): RoI feature from RoIExtractor.
|
|
cls_agnostic_bbox_reg(Int): BBox regressor are class agnostic.
|
|
wb_scalar(Float): Weights and Bias's learning rate.
|
|
name(String): Layer's name
|
|
|
|
Returns:
|
|
cls_score(Variable): cls score.
|
|
bbox_pred(Variable): bbox regression.
|
|
"""
|
|
head_feat = self.head(roi_feat, wb_scalar, name)
|
|
cls_score = fluid.layers.fc(input=head_feat,
|
|
size=self.num_classes,
|
|
act=None,
|
|
name='cls_score' + name,
|
|
param_attr=ParamAttr(
|
|
name='cls_score%s_w' % name,
|
|
initializer=Normal(
|
|
loc=0.0, scale=0.01),
|
|
learning_rate=wb_scalar),
|
|
bias_attr=ParamAttr(
|
|
name='cls_score%s_b' % name,
|
|
learning_rate=wb_scalar * self.lr_ratio,
|
|
regularizer=L2Decay(0.)))
|
|
bbox_pred = fluid.layers.fc(input=head_feat,
|
|
size=4 * cls_agnostic_bbox_reg,
|
|
act=None,
|
|
name='bbox_pred' + name,
|
|
param_attr=ParamAttr(
|
|
name='bbox_pred%s_w' % name,
|
|
initializer=Normal(
|
|
loc=0.0, scale=0.001),
|
|
learning_rate=wb_scalar),
|
|
bias_attr=ParamAttr(
|
|
name='bbox_pred%s_b' % name,
|
|
learning_rate=wb_scalar * self.lr_ratio,
|
|
regularizer=L2Decay(0.)))
|
|
return cls_score, bbox_pred
|
|
|
|
def get_loss(self, rcnn_pred_list, rcnn_target_list, rcnn_loss_weight_list):
|
|
"""
|
|
Get bbox_head loss.
|
|
|
|
Args:
|
|
rcnn_pred_list(List): Cascade RCNN's head's output including
|
|
bbox_pred and cls_score
|
|
rcnn_target_list(List): Cascade rcnn's bbox and label target
|
|
rcnn_loss_weight_list(List): The weight of location and class loss
|
|
|
|
Return:
|
|
loss_cls(Variable): bbox_head loss.
|
|
loss_bbox(Variable): bbox_head loss.
|
|
"""
|
|
loss_dict = {}
|
|
for i, (rcnn_pred, rcnn_target
|
|
) in enumerate(zip(rcnn_pred_list, rcnn_target_list)):
|
|
labels_int64 = fluid.layers.cast(x=rcnn_target[1], dtype='int64')
|
|
labels_int64.stop_gradient = True
|
|
|
|
loss_cls = fluid.layers.softmax_with_cross_entropy(
|
|
logits=rcnn_pred[0],
|
|
label=labels_int64,
|
|
numeric_stable_mode=True, )
|
|
loss_cls = fluid.layers.reduce_mean(
|
|
loss_cls, name='loss_cls_' + str(i)) * rcnn_loss_weight_list[i]
|
|
|
|
loss_bbox = self.bbox_loss(
|
|
x=rcnn_pred[1],
|
|
y=rcnn_target[2],
|
|
inside_weight=rcnn_target[3],
|
|
outside_weight=rcnn_target[4])
|
|
loss_bbox = fluid.layers.reduce_mean(
|
|
loss_bbox,
|
|
name='loss_bbox_' + str(i)) * rcnn_loss_weight_list[i]
|
|
|
|
loss_dict['loss_cls_%d' % i] = loss_cls
|
|
loss_dict['loss_loc_%d' % i] = loss_bbox
|
|
|
|
return loss_dict
|
|
|
|
def get_prediction(self,
|
|
im_info,
|
|
im_shape,
|
|
roi_feat_list,
|
|
rcnn_pred_list,
|
|
proposal_list,
|
|
cascade_bbox_reg_weights,
|
|
cls_agnostic_bbox_reg=2,
|
|
return_box_score=False):
|
|
"""
|
|
Get prediction bounding box in test stage.
|
|
:
|
|
Args:
|
|
im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the
|
|
number of input images, each element consists
|
|
of im_height, im_width, im_scale.
|
|
im_shape (Variable): Actual shape of original image with shape
|
|
[B, 3]. B is the number of images, each element consists of
|
|
original_height, original_width, 1
|
|
rois_feat_list (List): RoI feature from RoIExtractor.
|
|
rcnn_pred_list (Variable): Cascade rcnn's head's output
|
|
including bbox_pred and cls_score
|
|
proposal_list (List): RPN proposal boxes.
|
|
cascade_bbox_reg_weights (List): BBox decode var.
|
|
cls_agnostic_bbox_reg(Int): BBox regressor are class agnostic
|
|
|
|
Returns:
|
|
pred_result(Variable): Prediction result with shape [N, 6]. Each
|
|
row has 6 values: [label, confidence, xmin, ymin, xmax, ymax].
|
|
N is the total number of prediction.
|
|
"""
|
|
repeat_num = 3
|
|
# cls score
|
|
boxes_cls_prob_l = []
|
|
for i in range(repeat_num):
|
|
cls_score = rcnn_pred_list[i][0]
|
|
cls_prob = fluid.layers.softmax(cls_score, use_cudnn=False)
|
|
boxes_cls_prob_l.append(cls_prob)
|
|
|
|
boxes_cls_prob_mean = fluid.layers.sum(boxes_cls_prob_l) / float(
|
|
len(boxes_cls_prob_l))
|
|
|
|
# bbox pred
|
|
im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3])
|
|
bbox_pred_l = []
|
|
for i in range(repeat_num):
|
|
if i < 2:
|
|
continue
|
|
bbox_reg_w = cascade_bbox_reg_weights[i]
|
|
proposals_boxes = proposal_list[i]
|
|
im_scale_lod = fluid.layers.sequence_expand(im_scale,
|
|
proposals_boxes)
|
|
proposals_boxes = proposals_boxes / im_scale_lod
|
|
bbox_pred = rcnn_pred_list[i][1]
|
|
bbox_pred_new = fluid.layers.reshape(bbox_pred,
|
|
(-1, cls_agnostic_bbox_reg, 4))
|
|
bbox_pred_l.append(bbox_pred_new)
|
|
|
|
bbox_pred_new = bbox_pred_l[-1]
|
|
if cls_agnostic_bbox_reg == 2:
|
|
# only use fg box delta to decode box
|
|
bbox_pred_new = fluid.layers.slice(
|
|
bbox_pred_new, axes=[1], starts=[1], ends=[2])
|
|
bbox_pred_new = fluid.layers.expand(bbox_pred_new,
|
|
[1, self.num_classes, 1])
|
|
decoded_box = fluid.layers.box_coder(
|
|
prior_box=proposals_boxes,
|
|
prior_box_var=bbox_reg_w,
|
|
target_box=bbox_pred_new,
|
|
code_type='decode_center_size',
|
|
box_normalized=False,
|
|
axis=1)
|
|
|
|
box_out = fluid.layers.box_clip(input=decoded_box, im_info=im_shape)
|
|
if return_box_score:
|
|
return {'bbox': box_out, 'score': boxes_cls_prob_mean}
|
|
pred_result = self.nms(bboxes=box_out, scores=boxes_cls_prob_mean)
|
|
return {"bbox": pred_result}
|
|
|
|
def get_prediction_cls_aware(self,
|
|
im_info,
|
|
im_shape,
|
|
cascade_cls_prob,
|
|
cascade_decoded_box,
|
|
cascade_bbox_reg_weights,
|
|
return_box_score=False):
|
|
'''
|
|
get_prediction_cls_aware: predict bbox for each class
|
|
'''
|
|
cascade_num_stage = 3
|
|
cascade_eval_weight = [0.2, 0.3, 0.5]
|
|
# merge 3 stages results
|
|
sum_cascade_cls_prob = sum([
|
|
prob * cascade_eval_weight[idx]
|
|
for idx, prob in enumerate(cascade_cls_prob)
|
|
])
|
|
sum_cascade_decoded_box = sum([
|
|
bbox * cascade_eval_weight[idx]
|
|
for idx, bbox in enumerate(cascade_decoded_box)
|
|
])
|
|
self.im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3])
|
|
im_scale_lod = fluid.layers.sequence_expand(self.im_scale,
|
|
sum_cascade_decoded_box)
|
|
|
|
sum_cascade_decoded_box = sum_cascade_decoded_box / im_scale_lod
|
|
|
|
decoded_bbox = sum_cascade_decoded_box
|
|
decoded_bbox = fluid.layers.reshape(
|
|
decoded_bbox, shape=(-1, self.num_classes, 4))
|
|
|
|
box_out = fluid.layers.box_clip(input=decoded_bbox, im_info=im_shape)
|
|
if return_box_score:
|
|
return {'bbox': box_out, 'score': sum_cascade_cls_prob}
|
|
pred_result = self.nms(bboxes=box_out, scores=sum_cascade_cls_prob)
|
|
return {"bbox": pred_result}
|