PulseFocusPlatform/static/ppdet/modeling/roi_heads/htc_bbox_head.py

264 lines
11 KiB
Python

# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle.fluid as fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.initializer import Normal
from paddle.fluid.regularizer import L2Decay
from ppdet.modeling.ops import MultiClassNMS
from ppdet.modeling.losses import SmoothL1Loss
from ppdet.core.workspace import register
__all__ = ['HTCBBoxHead']
@register
class HTCBBoxHead(object):
"""
HTC bbox head
Args:
head (object): the head module instance
nms (object): `MultiClassNMS` instance
num_classes: number of output classes
"""
__inject__ = ['head', 'nms', 'bbox_loss']
__shared__ = ['num_classes']
def __init__(self,
head,
nms=MultiClassNMS().__dict__,
bbox_loss=SmoothL1Loss().__dict__,
num_classes=81,
lr_ratio=2.0):
super(HTCBBoxHead, self).__init__()
self.head = head
self.nms = nms
self.bbox_loss = bbox_loss
self.num_classes = num_classes
self.lr_ratio = lr_ratio
if isinstance(nms, dict):
self.nms = MultiClassNMS(**nms)
if isinstance(bbox_loss, dict):
self.bbox_loss = SmoothL1Loss(**bbox_loss)
def get_output(self,
roi_feat,
cls_agnostic_bbox_reg=2,
wb_scalar=1.0,
name=''):
"""
Get bbox head output.
Args:
roi_feat (Variable): RoI feature from RoIExtractor.
cls_agnostic_bbox_reg(Int): BBox regressor are class agnostic.
wb_scalar(Float): Weights and Bias's learning rate.
name(String): Layer's name
Returns:
cls_score(Variable): cls score.
bbox_pred(Variable): bbox regression.
"""
head_feat = self.head(roi_feat, wb_scalar, name)
cls_score = fluid.layers.fc(input=head_feat,
size=self.num_classes,
act=None,
name='cls_score' + name,
param_attr=ParamAttr(
name='cls_score%s_w' % name,
initializer=Normal(
loc=0.0, scale=0.01),
learning_rate=wb_scalar),
bias_attr=ParamAttr(
name='cls_score%s_b' % name,
learning_rate=wb_scalar * self.lr_ratio,
regularizer=L2Decay(0.)))
bbox_pred = fluid.layers.fc(input=head_feat,
size=4 * cls_agnostic_bbox_reg,
act=None,
name='bbox_pred' + name,
param_attr=ParamAttr(
name='bbox_pred%s_w' % name,
initializer=Normal(
loc=0.0, scale=0.001),
learning_rate=wb_scalar),
bias_attr=ParamAttr(
name='bbox_pred%s_b' % name,
learning_rate=wb_scalar * self.lr_ratio,
regularizer=L2Decay(0.)))
return cls_score, bbox_pred
def get_loss(self, rcnn_pred_list, rcnn_target_list, rcnn_loss_weight_list):
"""
Get bbox_head loss.
Args:
rcnn_pred_list(List): Cascade RCNN's head's output including
bbox_pred and cls_score
rcnn_target_list(List): Cascade rcnn's bbox and label target
rcnn_loss_weight_list(List): The weight of location and class loss
Return:
loss_cls(Variable): bbox_head loss.
loss_bbox(Variable): bbox_head loss.
"""
loss_dict = {}
for i, (rcnn_pred, rcnn_target
) in enumerate(zip(rcnn_pred_list, rcnn_target_list)):
labels_int64 = fluid.layers.cast(x=rcnn_target[1], dtype='int64')
labels_int64.stop_gradient = True
loss_cls = fluid.layers.softmax_with_cross_entropy(
logits=rcnn_pred[0],
label=labels_int64,
numeric_stable_mode=True, )
loss_cls = fluid.layers.reduce_mean(
loss_cls, name='loss_cls_' + str(i)) * rcnn_loss_weight_list[i]
loss_bbox = self.bbox_loss(
x=rcnn_pred[1],
y=rcnn_target[2],
inside_weight=rcnn_target[3],
outside_weight=rcnn_target[4])
loss_bbox = fluid.layers.reduce_mean(
loss_bbox,
name='loss_bbox_' + str(i)) * rcnn_loss_weight_list[i]
loss_dict['loss_cls_%d' % i] = loss_cls
loss_dict['loss_loc_%d' % i] = loss_bbox
return loss_dict
def get_prediction(self,
im_info,
im_shape,
roi_feat_list,
rcnn_pred_list,
proposal_list,
cascade_bbox_reg_weights,
cls_agnostic_bbox_reg=2,
return_box_score=False):
"""
Get prediction bounding box in test stage.
:
Args:
im_info (Variable): A 2-D LoDTensor with shape [B, 3]. B is the
number of input images, each element consists
of im_height, im_width, im_scale.
im_shape (Variable): Actual shape of original image with shape
[B, 3]. B is the number of images, each element consists of
original_height, original_width, 1
rois_feat_list (List): RoI feature from RoIExtractor.
rcnn_pred_list (Variable): Cascade rcnn's head's output
including bbox_pred and cls_score
proposal_list (List): RPN proposal boxes.
cascade_bbox_reg_weights (List): BBox decode var.
cls_agnostic_bbox_reg(Int): BBox regressor are class agnostic
Returns:
pred_result(Variable): Prediction result with shape [N, 6]. Each
row has 6 values: [label, confidence, xmin, ymin, xmax, ymax].
N is the total number of prediction.
"""
repeat_num = 3
# cls score
boxes_cls_prob_l = []
for i in range(repeat_num):
cls_score = rcnn_pred_list[i][0]
cls_prob = fluid.layers.softmax(cls_score, use_cudnn=False)
boxes_cls_prob_l.append(cls_prob)
boxes_cls_prob_mean = fluid.layers.sum(boxes_cls_prob_l) / float(
len(boxes_cls_prob_l))
# bbox pred
im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3])
bbox_pred_l = []
for i in range(repeat_num):
if i < 2:
continue
bbox_reg_w = cascade_bbox_reg_weights[i]
proposals_boxes = proposal_list[i]
im_scale_lod = fluid.layers.sequence_expand(im_scale,
proposals_boxes)
proposals_boxes = proposals_boxes / im_scale_lod
bbox_pred = rcnn_pred_list[i][1]
bbox_pred_new = fluid.layers.reshape(bbox_pred,
(-1, cls_agnostic_bbox_reg, 4))
bbox_pred_l.append(bbox_pred_new)
bbox_pred_new = bbox_pred_l[-1]
if cls_agnostic_bbox_reg == 2:
# only use fg box delta to decode box
bbox_pred_new = fluid.layers.slice(
bbox_pred_new, axes=[1], starts=[1], ends=[2])
bbox_pred_new = fluid.layers.expand(bbox_pred_new,
[1, self.num_classes, 1])
decoded_box = fluid.layers.box_coder(
prior_box=proposals_boxes,
prior_box_var=bbox_reg_w,
target_box=bbox_pred_new,
code_type='decode_center_size',
box_normalized=False,
axis=1)
box_out = fluid.layers.box_clip(input=decoded_box, im_info=im_shape)
if return_box_score:
return {'bbox': box_out, 'score': boxes_cls_prob_mean}
pred_result = self.nms(bboxes=box_out, scores=boxes_cls_prob_mean)
return {"bbox": pred_result}
def get_prediction_cls_aware(self,
im_info,
im_shape,
cascade_cls_prob,
cascade_decoded_box,
cascade_bbox_reg_weights,
return_box_score=False):
'''
get_prediction_cls_aware: predict bbox for each class
'''
cascade_num_stage = 3
cascade_eval_weight = [0.2, 0.3, 0.5]
# merge 3 stages results
sum_cascade_cls_prob = sum([
prob * cascade_eval_weight[idx]
for idx, prob in enumerate(cascade_cls_prob)
])
sum_cascade_decoded_box = sum([
bbox * cascade_eval_weight[idx]
for idx, bbox in enumerate(cascade_decoded_box)
])
self.im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3])
im_scale_lod = fluid.layers.sequence_expand(self.im_scale,
sum_cascade_decoded_box)
sum_cascade_decoded_box = sum_cascade_decoded_box / im_scale_lod
decoded_bbox = sum_cascade_decoded_box
decoded_bbox = fluid.layers.reshape(
decoded_bbox, shape=(-1, self.num_classes, 4))
box_out = fluid.layers.box_clip(input=decoded_bbox, im_info=im_shape)
if return_box_score:
return {'bbox': box_out, 'score': sum_cascade_cls_prob}
pred_result = self.nms(bboxes=box_out, scores=sum_cascade_cls_prob)
return {"bbox": pred_result}