# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from scipy.optimize import linear_sum_assignment from collections import defaultdict import numpy as np import math from keypoint_preprocess import get_affine_mat_kernel, get_affine_transform class HrHRNetPostProcess(object): ''' HrHRNet postprocess contain: 1) get topk keypoints in the output heatmap 2) sample the tagmap's value corresponding to each of the topk coordinate 3) match different joints to combine to some people with Hungary algorithm 4) adjust the coordinate by +-0.25 to decrease error std 5) salvage missing joints by check positivity of heatmap - tagdiff_norm Args: max_num_people (int): max number of people support in postprocess heat_thresh (float): value of topk below this threshhold will be ignored tag_thresh (float): coord's value sampled in tagmap below this threshold belong to same people for init inputs(list[heatmap]): the output list of modle, [heatmap, heatmap_maxpool, tagmap], heatmap_maxpool used to get topk original_height, original_width (float): the original image size ''' def __init__(self, max_num_people=30, heat_thresh=0.2, tag_thresh=1.): self.max_num_people = max_num_people self.heat_thresh = heat_thresh self.tag_thresh = tag_thresh def lerp(self, j, y, x, heatmap): H, W = heatmap.shape[-2:] left = np.clip(x - 1, 0, W - 1) right = np.clip(x + 1, 0, W - 1) up = np.clip(y - 1, 0, H - 1) down = np.clip(y + 1, 0, H - 1) offset_y = np.where(heatmap[j, down, x] > heatmap[j, up, x], 0.25, -0.25) offset_x = np.where(heatmap[j, y, right] > heatmap[j, y, left], 0.25, -0.25) return offset_y + 0.5, offset_x + 0.5 def __call__(self, heatmap, tagmap, heat_k, inds_k, original_height, original_width): N, J, H, W = heatmap.shape assert N == 1, "only support batch size 1" heatmap = heatmap[0] tagmap = tagmap[0] heats = heat_k[0] inds_np = inds_k[0] y = inds_np // W x = inds_np % W tags = tagmap[np.arange(J)[None, :].repeat(self.max_num_people), y.flatten(), x.flatten()].reshape(J, -1, tagmap.shape[-1]) coords = np.stack((y, x), axis=2) # threshold mask = heats > self.heat_thresh # cluster cluster = defaultdict(lambda: { 'coords': np.zeros((J, 2), dtype=np.float32), 'scores': np.zeros(J, dtype=np.float32), 'tags': [] }) for jid, m in enumerate(mask): num_valid = m.sum() if num_valid == 0: continue valid_inds = np.where(m)[0] valid_tags = tags[jid, m, :] if len(cluster) == 0: # initialize for i in valid_inds: tag = tags[jid, i] key = tag[0] cluster[key]['tags'].append(tag) cluster[key]['scores'][jid] = heats[jid, i] cluster[key]['coords'][jid] = coords[jid, i] continue candidates = list(cluster.keys())[:self.max_num_people] centroids = [ np.mean( cluster[k]['tags'], axis=0) for k in candidates ] num_clusters = len(centroids) # shape is (num_valid, num_clusters, tag_dim) dist = valid_tags[:, None, :] - np.array(centroids)[None, ...] l2_dist = np.linalg.norm(dist, ord=2, axis=2) # modulate dist with heat value, see `use_detection_val` cost = np.round(l2_dist) * 100 - heats[jid, m, None] # pad the cost matrix, otherwise new pose are ignored if num_valid > num_clusters: cost = np.pad(cost, ((0, 0), (0, num_valid - num_clusters)), constant_values=((0, 0), (0, 1e-10))) rows, cols = linear_sum_assignment(cost) for y, x in zip(rows, cols): tag = tags[jid, y] if y < num_valid and x < num_clusters and \ l2_dist[y, x] < self.tag_thresh: key = candidates[x] # merge to cluster else: key = tag[0] # initialize new cluster cluster[key]['tags'].append(tag) cluster[key]['scores'][jid] = heats[jid, y] cluster[key]['coords'][jid] = coords[jid, y] # shape is [k, J, 2] and [k, J] pose_tags = np.array([cluster[k]['tags'] for k in cluster]) pose_coords = np.array([cluster[k]['coords'] for k in cluster]) pose_scores = np.array([cluster[k]['scores'] for k in cluster]) valid = pose_scores > 0 pose_kpts = np.zeros((pose_scores.shape[0], J, 3), dtype=np.float32) if valid.sum() == 0: return pose_kpts, pose_kpts # refine coords valid_coords = pose_coords[valid].astype(np.int32) y = valid_coords[..., 0].flatten() x = valid_coords[..., 1].flatten() _, j = np.nonzero(valid) offsets = self.lerp(j, y, x, heatmap) pose_coords[valid, 0] += offsets[0] pose_coords[valid, 1] += offsets[1] # mean score before salvage mean_score = pose_scores.mean(axis=1) pose_kpts[valid, 2] = pose_scores[valid] # salvage missing joints if True: for pid, coords in enumerate(pose_coords): tag_mean = np.array(pose_tags[pid]).mean(axis=0) norm = np.sum((tagmap - tag_mean)**2, axis=3)**0.5 score = heatmap - np.round(norm) # (J, H, W) flat_score = score.reshape(J, -1) max_inds = np.argmax(flat_score, axis=1) max_scores = np.max(flat_score, axis=1) salvage_joints = (pose_scores[pid] == 0) & (max_scores > 0) if salvage_joints.sum() == 0: continue y = max_inds[salvage_joints] // W x = max_inds[salvage_joints] % W offsets = self.lerp(salvage_joints.nonzero()[0], y, x, heatmap) y = y.astype(np.float32) + offsets[0] x = x.astype(np.float32) + offsets[1] pose_coords[pid][salvage_joints, 0] = y pose_coords[pid][salvage_joints, 1] = x pose_kpts[pid][salvage_joints, 2] = max_scores[salvage_joints] pose_kpts[..., :2] = transpred(pose_coords[..., :2][..., ::-1], original_height, original_width, min(H, W)) return pose_kpts, mean_score def transpred(kpts, h, w, s): trans, _ = get_affine_mat_kernel(h, w, s, inv=True) return warp_affine_joints(kpts[..., :2].copy(), trans) def warp_affine_joints(joints, mat): """Apply affine transformation defined by the transform matrix on the joints. Args: joints (np.ndarray[..., 2]): Origin coordinate of joints. mat (np.ndarray[3, 2]): The affine matrix. Returns: matrix (np.ndarray[..., 2]): Result coordinate of joints. """ joints = np.array(joints) shape = joints.shape joints = joints.reshape(-1, 2) return np.dot(np.concatenate( (joints, joints[:, 0:1] * 0 + 1), axis=1), mat.T).reshape(shape) class HRNetPostProcess(object): def flip_back(self, output_flipped, matched_parts): assert output_flipped.ndim == 4,\ 'output_flipped should be [batch_size, num_joints, height, width]' output_flipped = output_flipped[:, :, :, ::-1] for pair in matched_parts: tmp = output_flipped[:, pair[0], :, :].copy() output_flipped[:, pair[0], :, :] = output_flipped[:, pair[1], :, :] output_flipped[:, pair[1], :, :] = tmp return output_flipped def get_max_preds(self, heatmaps): '''get predictions from score maps Args: heatmaps: numpy.ndarray([batch_size, num_joints, height, width]) Returns: preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords maxvals: numpy.ndarray([batch_size, num_joints, 2]), the maximum confidence of the keypoints ''' assert isinstance(heatmaps, np.ndarray), 'heatmaps should be numpy.ndarray' assert heatmaps.ndim == 4, 'batch_images should be 4-ndim' batch_size = heatmaps.shape[0] num_joints = heatmaps.shape[1] width = heatmaps.shape[3] heatmaps_reshaped = heatmaps.reshape((batch_size, num_joints, -1)) idx = np.argmax(heatmaps_reshaped, 2) maxvals = np.amax(heatmaps_reshaped, 2) maxvals = maxvals.reshape((batch_size, num_joints, 1)) idx = idx.reshape((batch_size, num_joints, 1)) preds = np.tile(idx, (1, 1, 2)).astype(np.float32) preds[:, :, 0] = (preds[:, :, 0]) % width preds[:, :, 1] = np.floor((preds[:, :, 1]) / width) pred_mask = np.tile(np.greater(maxvals, 0.0), (1, 1, 2)) pred_mask = pred_mask.astype(np.float32) preds *= pred_mask return preds, maxvals def get_final_preds(self, heatmaps, center, scale): """the highest heatvalue location with a quarter offset in the direction from the highest response to the second highest response. Args: heatmaps (numpy.ndarray): The predicted heatmaps center (numpy.ndarray): The boxes center scale (numpy.ndarray): The scale factor Returns: preds: numpy.ndarray([batch_size, num_joints, 2]), keypoints coords maxvals: numpy.ndarray([batch_size, num_joints, 1]), the maximum confidence of the keypoints """ coords, maxvals = self.get_max_preds(heatmaps) heatmap_height = heatmaps.shape[2] heatmap_width = heatmaps.shape[3] for n in range(coords.shape[0]): for p in range(coords.shape[1]): hm = heatmaps[n][p] px = int(math.floor(coords[n][p][0] + 0.5)) py = int(math.floor(coords[n][p][1] + 0.5)) if 1 < px < heatmap_width - 1 and 1 < py < heatmap_height - 1: diff = np.array([ hm[py][px + 1] - hm[py][px - 1], hm[py + 1][px] - hm[py - 1][px] ]) coords[n][p] += np.sign(diff) * .25 preds = coords.copy() # Transform back for i in range(coords.shape[0]): preds[i] = transform_preds(coords[i], center[i], scale[i], [heatmap_width, heatmap_height]) return preds, maxvals def __call__(self, output, center, scale): preds, maxvals = self.get_final_preds(output, center, scale) return np.concatenate( (preds, maxvals), axis=-1), np.mean( maxvals, axis=1) def transform_preds(coords, center, scale, output_size): target_coords = np.zeros(coords.shape) trans = get_affine_transform(center, scale * 200, 0, output_size, inv=1) for p in range(coords.shape[0]): target_coords[p, 0:2] = affine_transform(coords[p, 0:2], trans) return target_coords def affine_transform(pt, t): new_pt = np.array([pt[0], pt[1], 1.]).T new_pt = np.dot(t, new_pt) return new_pt[:2]