forked from PulseFocusPlatform/PulseFocusPlatform
372 lines
13 KiB
Python
372 lines
13 KiB
Python
|
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
|
||
|
#
|
||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
# you may not use this file except in compliance with the License.
|
||
|
# You may obtain a copy of the License at
|
||
|
#
|
||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||
|
#
|
||
|
# Unless required by applicable law or agreed to in writing, software
|
||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
# See the License for the specific language governing permissions and
|
||
|
# limitations under the License.
|
||
|
|
||
|
import os
|
||
|
import base64
|
||
|
|
||
|
import cv2
|
||
|
import numpy as np
|
||
|
from PIL import Image, ImageDraw
|
||
|
import paddle.fluid as fluid
|
||
|
|
||
|
|
||
|
def create_inputs(im, im_info):
|
||
|
"""generate input for different model type
|
||
|
Args:
|
||
|
im (np.ndarray): image (np.ndarray)
|
||
|
im_info (dict): info of image
|
||
|
Returns:
|
||
|
inputs (dict): input of model
|
||
|
"""
|
||
|
inputs = {}
|
||
|
inputs['image'] = im
|
||
|
origin_shape = list(im_info['origin_shape'])
|
||
|
resize_shape = list(im_info['resize_shape'])
|
||
|
pad_shape = list(im_info['pad_shape']) if im_info[
|
||
|
'pad_shape'] is not None else list(im_info['resize_shape'])
|
||
|
scale_x, scale_y = im_info['scale']
|
||
|
scale = scale_x
|
||
|
im_info = np.array([resize_shape + [scale]]).astype('float32')
|
||
|
inputs['im_info'] = im_info
|
||
|
return inputs
|
||
|
|
||
|
|
||
|
def visualize_box_mask(im,
|
||
|
results,
|
||
|
labels=None,
|
||
|
mask_resolution=14,
|
||
|
threshold=0.5):
|
||
|
"""
|
||
|
Args:
|
||
|
im (str/np.ndarray): path of image/np.ndarray read by cv2
|
||
|
results (dict): include 'boxes': np.ndarray: shape:[N,6], N: number of box,
|
||
|
matix element:[class, score, x_min, y_min, x_max, y_max]
|
||
|
MaskRCNN's results include 'masks': np.ndarray:
|
||
|
shape:[N, class_num, mask_resolution, mask_resolution]
|
||
|
labels (list): labels:['class1', ..., 'classn']
|
||
|
mask_resolution (int): shape of a mask is:[mask_resolution, mask_resolution]
|
||
|
threshold (float): Threshold of score.
|
||
|
Returns:
|
||
|
im (PIL.Image.Image): visualized image
|
||
|
"""
|
||
|
if not labels:
|
||
|
labels = ['background', 'person']
|
||
|
if isinstance(im, str):
|
||
|
im = Image.open(im).convert('RGB')
|
||
|
else:
|
||
|
im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
|
||
|
im = Image.fromarray(im)
|
||
|
if 'masks' in results and 'boxes' in results:
|
||
|
im = draw_mask(
|
||
|
im,
|
||
|
results['boxes'],
|
||
|
results['masks'],
|
||
|
labels,
|
||
|
resolution=mask_resolution)
|
||
|
if 'boxes' in results:
|
||
|
im = draw_box(im, results['boxes'], labels)
|
||
|
if 'segm' in results:
|
||
|
im = draw_segm(
|
||
|
im,
|
||
|
results['segm'],
|
||
|
results['label'],
|
||
|
results['score'],
|
||
|
labels,
|
||
|
threshold=threshold)
|
||
|
if 'landmark' in results:
|
||
|
im = draw_lmk(im, results['landmark'])
|
||
|
return im
|
||
|
|
||
|
|
||
|
def get_color_map_list(num_classes):
|
||
|
"""
|
||
|
Args:
|
||
|
num_classes (int): number of class
|
||
|
Returns:
|
||
|
color_map (list): RGB color list
|
||
|
"""
|
||
|
color_map = num_classes * [0, 0, 0]
|
||
|
for i in range(0, num_classes):
|
||
|
j = 0
|
||
|
lab = i
|
||
|
while lab:
|
||
|
color_map[i * 3] |= (((lab >> 0) & 1) << (7 - j))
|
||
|
color_map[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j))
|
||
|
color_map[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j))
|
||
|
j += 1
|
||
|
lab >>= 3
|
||
|
color_map = [color_map[i:i + 3] for i in range(0, len(color_map), 3)]
|
||
|
return color_map
|
||
|
|
||
|
|
||
|
def expand_boxes(boxes, scale=0.0):
|
||
|
"""
|
||
|
Args:
|
||
|
boxes (np.ndarray): shape:[N,4], N:number of box,
|
||
|
matix element:[x_min, y_min, x_max, y_max]
|
||
|
scale (float): scale of boxes
|
||
|
Returns:
|
||
|
boxes_exp (np.ndarray): expanded boxes
|
||
|
"""
|
||
|
w_half = (boxes[:, 2] - boxes[:, 0]) * .5
|
||
|
h_half = (boxes[:, 3] - boxes[:, 1]) * .5
|
||
|
x_c = (boxes[:, 2] + boxes[:, 0]) * .5
|
||
|
y_c = (boxes[:, 3] + boxes[:, 1]) * .5
|
||
|
w_half *= scale
|
||
|
h_half *= scale
|
||
|
boxes_exp = np.zeros(boxes.shape)
|
||
|
boxes_exp[:, 0] = x_c - w_half
|
||
|
boxes_exp[:, 2] = x_c + w_half
|
||
|
boxes_exp[:, 1] = y_c - h_half
|
||
|
boxes_exp[:, 3] = y_c + h_half
|
||
|
return boxes_exp
|
||
|
|
||
|
|
||
|
def draw_mask(im, np_boxes, np_masks, labels, resolution=14, threshold=0.5):
|
||
|
"""
|
||
|
Args:
|
||
|
im (PIL.Image.Image): PIL image
|
||
|
np_boxes (np.ndarray): shape:[N,6], N: number of box,
|
||
|
matix element:[class, score, x_min, y_min, x_max, y_max]
|
||
|
np_masks (np.ndarray): shape:[N, class_num, resolution, resolution]
|
||
|
labels (list): labels:['class1', ..., 'classn']
|
||
|
resolution (int): shape of a mask is:[resolution, resolution]
|
||
|
threshold (float): threshold of mask
|
||
|
Returns:
|
||
|
im (PIL.Image.Image): visualized image
|
||
|
"""
|
||
|
color_list = get_color_map_list(len(labels))
|
||
|
scale = (resolution + 2.0) / resolution
|
||
|
im_w, im_h = im.size
|
||
|
w_ratio = 0.4
|
||
|
alpha = 0.7
|
||
|
im = np.array(im).astype('float32')
|
||
|
rects = np_boxes[:, 2:]
|
||
|
expand_rects = expand_boxes(rects, scale)
|
||
|
expand_rects = expand_rects.astype(np.int32)
|
||
|
clsid_scores = np_boxes[:, 0:2]
|
||
|
padded_mask = np.zeros((resolution + 2, resolution + 2), dtype=np.float32)
|
||
|
clsid2color = {}
|
||
|
for idx in range(len(np_boxes)):
|
||
|
clsid, score = clsid_scores[idx].tolist()
|
||
|
clsid = int(clsid)
|
||
|
xmin, ymin, xmax, ymax = expand_rects[idx].tolist()
|
||
|
w = xmax - xmin + 1
|
||
|
h = ymax - ymin + 1
|
||
|
w = np.maximum(w, 1)
|
||
|
h = np.maximum(h, 1)
|
||
|
padded_mask[1:-1, 1:-1] = np_masks[idx, int(clsid), :, :]
|
||
|
resized_mask = cv2.resize(padded_mask, (w, h))
|
||
|
resized_mask = np.array(resized_mask > threshold, dtype=np.uint8)
|
||
|
x0 = min(max(xmin, 0), im_w)
|
||
|
x1 = min(max(xmax + 1, 0), im_w)
|
||
|
y0 = min(max(ymin, 0), im_h)
|
||
|
y1 = min(max(ymax + 1, 0), im_h)
|
||
|
im_mask = np.zeros((im_h, im_w), dtype=np.uint8)
|
||
|
im_mask[y0:y1, x0:x1] = resized_mask[(y0 - ymin):(y1 - ymin), (
|
||
|
x0 - xmin):(x1 - xmin)]
|
||
|
if clsid not in clsid2color:
|
||
|
clsid2color[clsid] = color_list[clsid]
|
||
|
color_mask = clsid2color[clsid]
|
||
|
for c in range(3):
|
||
|
color_mask[c] = color_mask[c] * (1 - w_ratio) + w_ratio * 255
|
||
|
idx = np.nonzero(im_mask)
|
||
|
color_mask = np.array(color_mask)
|
||
|
im[idx[0], idx[1], :] *= 1.0 - alpha
|
||
|
im[idx[0], idx[1], :] += alpha * color_mask
|
||
|
return Image.fromarray(im.astype('uint8'))
|
||
|
|
||
|
|
||
|
def draw_box(im, np_boxes, labels):
|
||
|
"""
|
||
|
Args:
|
||
|
im (PIL.Image.Image): PIL image
|
||
|
np_boxes (np.ndarray): shape:[N,6], N: number of box,
|
||
|
matix element:[class, score, x_min, y_min, x_max, y_max]
|
||
|
labels (list): labels:['class1', ..., 'classn']
|
||
|
Returns:
|
||
|
im (PIL.Image.Image): visualized image
|
||
|
"""
|
||
|
draw_thickness = min(im.size) // 320
|
||
|
draw = ImageDraw.Draw(im)
|
||
|
clsid2color = {}
|
||
|
color_list = get_color_map_list(len(labels))
|
||
|
|
||
|
for dt in np_boxes:
|
||
|
clsid, bbox, score = int(dt[0]), dt[2:], dt[1]
|
||
|
xmin, ymin, xmax, ymax = bbox
|
||
|
w = xmax - xmin
|
||
|
h = ymax - ymin
|
||
|
if clsid not in clsid2color:
|
||
|
clsid2color[clsid] = color_list[clsid]
|
||
|
color = tuple(clsid2color[clsid])
|
||
|
|
||
|
# draw bbox
|
||
|
draw.line(
|
||
|
[(xmin, ymin), (xmin, ymax), (xmax, ymax), (xmax, ymin),
|
||
|
(xmin, ymin)],
|
||
|
width=draw_thickness,
|
||
|
fill=color)
|
||
|
|
||
|
# draw label
|
||
|
text = "{} {:.4f}".format(labels[clsid], score)
|
||
|
tw, th = draw.textsize(text)
|
||
|
draw.rectangle(
|
||
|
[(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill=color)
|
||
|
draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
|
||
|
return im
|
||
|
|
||
|
|
||
|
def draw_segm(im,
|
||
|
np_segms,
|
||
|
np_label,
|
||
|
np_score,
|
||
|
labels,
|
||
|
threshold=0.5,
|
||
|
alpha=0.7):
|
||
|
"""
|
||
|
Draw segmentation on image
|
||
|
"""
|
||
|
mask_color_id = 0
|
||
|
w_ratio = .4
|
||
|
color_list = get_color_map_list(len(labels))
|
||
|
im = np.array(im).astype('float32')
|
||
|
clsid2color = {}
|
||
|
np_segms = np_segms.astype(np.uint8)
|
||
|
index = np.where(np_label == 0)[0]
|
||
|
index = np.where(np_score[index] > threshold)[0]
|
||
|
person_segms = np_segms[index]
|
||
|
person_mask = np.sum(person_segms, axis=0)
|
||
|
person_mask[person_mask > 1] = 1
|
||
|
person_mask = np.expand_dims(person_mask, axis=2)
|
||
|
person_mask = np.repeat(person_mask, 3, axis=2)
|
||
|
im = im * person_mask
|
||
|
|
||
|
return Image.fromarray(im.astype('uint8'))
|
||
|
|
||
|
|
||
|
def load_predictor(model_dir,
|
||
|
run_mode='fluid',
|
||
|
batch_size=1,
|
||
|
use_gpu=False,
|
||
|
min_subgraph_size=3):
|
||
|
"""set AnalysisConfig, generate AnalysisPredictor
|
||
|
Args:
|
||
|
model_dir (str): root path of __model__ and __params__
|
||
|
use_gpu (bool): whether use gpu
|
||
|
Returns:
|
||
|
predictor (PaddlePredictor): AnalysisPredictor
|
||
|
Raises:
|
||
|
ValueError: predict by TensorRT need use_gpu == True.
|
||
|
"""
|
||
|
if not use_gpu and not run_mode == 'fluid':
|
||
|
raise ValueError(
|
||
|
"Predict by TensorRT mode: {}, expect use_gpu==True, but use_gpu == {}"
|
||
|
.format(run_mode, use_gpu))
|
||
|
if run_mode == 'trt_int8':
|
||
|
raise ValueError("TensorRT int8 mode is not supported now, "
|
||
|
"please use trt_fp32 or trt_fp16 instead.")
|
||
|
precision_map = {
|
||
|
'trt_int8': fluid.core.AnalysisConfig.Precision.Int8,
|
||
|
'trt_fp32': fluid.core.AnalysisConfig.Precision.Float32,
|
||
|
'trt_fp16': fluid.core.AnalysisConfig.Precision.Half
|
||
|
}
|
||
|
config = fluid.core.AnalysisConfig(
|
||
|
os.path.join(model_dir, '__model__'),
|
||
|
os.path.join(model_dir, '__params__'))
|
||
|
if use_gpu:
|
||
|
# initial GPU memory(M), device ID
|
||
|
config.enable_use_gpu(100, 0)
|
||
|
# optimize graph and fuse op
|
||
|
config.switch_ir_optim(True)
|
||
|
else:
|
||
|
config.disable_gpu()
|
||
|
|
||
|
if run_mode in precision_map.keys():
|
||
|
config.enable_tensorrt_engine(
|
||
|
workspace_size=1 << 10,
|
||
|
max_batch_size=batch_size,
|
||
|
min_subgraph_size=min_subgraph_size,
|
||
|
precision_mode=precision_map[run_mode],
|
||
|
use_static=False,
|
||
|
use_calib_mode=False)
|
||
|
|
||
|
# disable print log when predict
|
||
|
config.disable_glog_info()
|
||
|
# enable shared memory
|
||
|
config.enable_memory_optim()
|
||
|
# disable feed, fetch OP, needed by zero_copy_run
|
||
|
config.switch_use_feed_fetch_ops(False)
|
||
|
predictor = fluid.core.create_paddle_predictor(config)
|
||
|
return predictor
|
||
|
|
||
|
|
||
|
def cv2_to_base64(image):
|
||
|
data = cv2.imencode('.jpg', image)[1]
|
||
|
return base64.b64encode(data.tostring()).decode('utf8')
|
||
|
|
||
|
|
||
|
def base64_to_cv2(b64str):
|
||
|
data = base64.b64decode(b64str.encode('utf8'))
|
||
|
data = np.fromstring(data, np.uint8)
|
||
|
data = cv2.imdecode(data, cv2.IMREAD_COLOR)
|
||
|
return data
|
||
|
|
||
|
|
||
|
def lmk2out(bboxes, np_lmk, im_info, threshold=0.5, is_bbox_normalized=True):
|
||
|
image_w, image_h = im_info['origin_shape']
|
||
|
scale = im_info['scale']
|
||
|
face_index, landmark, prior_box = np_lmk[:]
|
||
|
xywh_res = []
|
||
|
if bboxes.shape == (1, 1) or bboxes is None:
|
||
|
return np.array([])
|
||
|
prior = np.reshape(prior_box, (-1, 4))
|
||
|
predict_lmk = np.reshape(landmark, (-1, 10))
|
||
|
k = 0
|
||
|
for i in range(bboxes.shape[0]):
|
||
|
score = bboxes[i][1]
|
||
|
if score < threshold:
|
||
|
continue
|
||
|
theindex = face_index[i][0]
|
||
|
me_prior = prior[theindex, :]
|
||
|
lmk_pred = predict_lmk[theindex, :]
|
||
|
prior_h = me_prior[2] - me_prior[0]
|
||
|
prior_w = me_prior[3] - me_prior[1]
|
||
|
prior_h_center = (me_prior[2] + me_prior[0]) / 2
|
||
|
prior_w_center = (me_prior[3] + me_prior[1]) / 2
|
||
|
lmk_decode = np.zeros((10))
|
||
|
for j in [0, 2, 4, 6, 8]:
|
||
|
lmk_decode[j] = lmk_pred[j] * 0.1 * prior_w + prior_h_center
|
||
|
for j in [1, 3, 5, 7, 9]:
|
||
|
lmk_decode[j] = lmk_pred[j] * 0.1 * prior_h + prior_w_center
|
||
|
|
||
|
if is_bbox_normalized:
|
||
|
lmk_decode = lmk_decode * np.array([
|
||
|
image_h, image_w, image_h, image_w, image_h, image_w, image_h,
|
||
|
image_w, image_h, image_w
|
||
|
])
|
||
|
xywh_res.append(lmk_decode)
|
||
|
return np.asarray(xywh_res)
|
||
|
|
||
|
|
||
|
def draw_lmk(image, lmk_results):
|
||
|
draw = ImageDraw.Draw(image)
|
||
|
for lmk_decode in lmk_results:
|
||
|
for j in range(5):
|
||
|
x1 = int(round(lmk_decode[2 * j]))
|
||
|
y1 = int(round(lmk_decode[2 * j + 1]))
|
||
|
draw.ellipse(
|
||
|
(x1 - 2, y1 - 2, x1 + 3, y1 + 3), fill='green', outline='green')
|
||
|
return image
|