12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022 |
- # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- # function:
- # operators to process sample,
- # eg: decode/resize/crop image
- from __future__ import absolute_import
- try:
- from collections.abc import Sequence
- except Exception:
- from collections import Sequence
- import cv2
- import numpy as np
- import math
- import copy
- from ...modeling.keypoint_utils import get_affine_mat_kernel, warp_affine_joints, get_affine_transform, affine_transform, get_warp_matrix
- from ppdet.core.workspace import serializable
- from ppdet.utils.logger import setup_logger
- logger = setup_logger(__name__)
- registered_ops = []
- __all__ = [
- 'RandomAffine', 'KeyPointFlip', 'TagGenerate', 'ToHeatmaps',
- 'NormalizePermute', 'EvalAffine', 'RandomFlipHalfBodyTransform',
- 'TopDownAffine', 'ToHeatmapsTopDown', 'ToHeatmapsTopDown_DARK',
- 'ToHeatmapsTopDown_UDP', 'TopDownEvalAffine',
- 'AugmentationbyInformantionDropping', 'SinglePoseAffine', 'NoiseJitter',
- 'FlipPose'
- ]
- def register_keypointop(cls):
- return serializable(cls)
- @register_keypointop
- class KeyPointFlip(object):
- """Get the fliped image by flip_prob. flip the coords also
- the left coords and right coords should exchange while flip, for the right keypoint will be left keypoint after image fliped
- Args:
- flip_permutation (list[17]): the left-right exchange order list corresponding to [0,1,2,...,16]
- hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet
- flip_prob (float): the ratio whether to flip the image
- records(dict): the dict contained the image, mask and coords
- Returns:
- records(dict): contain the image, mask and coords after tranformed
- """
- def __init__(self, flip_permutation, hmsize, flip_prob=0.5):
- super(KeyPointFlip, self).__init__()
- assert isinstance(flip_permutation, Sequence)
- self.flip_permutation = flip_permutation
- self.flip_prob = flip_prob
- self.hmsize = hmsize
- def __call__(self, records):
- image = records['image']
- kpts_lst = records['joints']
- mask_lst = records['mask']
- flip = np.random.random() < self.flip_prob
- if flip:
- image = image[:, ::-1]
- for idx, hmsize in enumerate(self.hmsize):
- if len(mask_lst) > idx:
- mask_lst[idx] = mask_lst[idx][:, ::-1]
- if kpts_lst[idx].ndim == 3:
- kpts_lst[idx] = kpts_lst[idx][:, self.flip_permutation]
- else:
- kpts_lst[idx] = kpts_lst[idx][self.flip_permutation]
- kpts_lst[idx][..., 0] = hmsize - kpts_lst[idx][..., 0]
- kpts_lst[idx] = kpts_lst[idx].astype(np.int64)
- kpts_lst[idx][kpts_lst[idx][..., 0] >= hmsize, 2] = 0
- kpts_lst[idx][kpts_lst[idx][..., 1] >= hmsize, 2] = 0
- kpts_lst[idx][kpts_lst[idx][..., 0] < 0, 2] = 0
- kpts_lst[idx][kpts_lst[idx][..., 1] < 0, 2] = 0
- records['image'] = image
- records['joints'] = kpts_lst
- records['mask'] = mask_lst
- return records
- @register_keypointop
- class RandomAffine(object):
- """apply affine transform to image, mask and coords
- to achieve the rotate, scale and shift effect for training image
- Args:
- max_degree (float): the max abslute rotate degree to apply, transform range is [-max_degree, max_degree]
- max_scale (list[2]): the scale range to apply, transform range is [min, max]
- max_shift (float): the max abslute shift ratio to apply, transform range is [-max_shift*imagesize, max_shift*imagesize]
- hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet
- trainsize (int): the standard length used to train, the 'scale_type' of [h,w] will be resize to trainsize for standard
- scale_type (str): the length of [h,w] to used for trainsize, chosed between 'short' and 'long'
- records(dict): the dict contained the image, mask and coords
- Returns:
- records(dict): contain the image, mask and coords after tranformed
- """
- def __init__(self,
- max_degree=30,
- scale=[0.75, 1.5],
- max_shift=0.2,
- hmsize=[128, 256],
- trainsize=512,
- scale_type='short'):
- super(RandomAffine, self).__init__()
- self.max_degree = max_degree
- self.min_scale = scale[0]
- self.max_scale = scale[1]
- self.max_shift = max_shift
- self.hmsize = hmsize
- self.trainsize = trainsize
- self.scale_type = scale_type
- def _get_affine_matrix(self, center, scale, res, rot=0):
- """Generate transformation matrix."""
- h = scale
- t = np.zeros((3, 3), dtype=np.float32)
- t[0, 0] = float(res[1]) / h
- t[1, 1] = float(res[0]) / h
- t[0, 2] = res[1] * (-float(center[0]) / h + .5)
- t[1, 2] = res[0] * (-float(center[1]) / h + .5)
- t[2, 2] = 1
- if rot != 0:
- rot = -rot # To match direction of rotation from cropping
- rot_mat = np.zeros((3, 3), dtype=np.float32)
- rot_rad = rot * np.pi / 180
- sn, cs = np.sin(rot_rad), np.cos(rot_rad)
- rot_mat[0, :2] = [cs, -sn]
- rot_mat[1, :2] = [sn, cs]
- rot_mat[2, 2] = 1
- # Need to rotate around center
- t_mat = np.eye(3)
- t_mat[0, 2] = -res[1] / 2
- t_mat[1, 2] = -res[0] / 2
- t_inv = t_mat.copy()
- t_inv[:2, 2] *= -1
- t = np.dot(t_inv, np.dot(rot_mat, np.dot(t_mat, t)))
- return t
- def __call__(self, records):
- image = records['image']
- keypoints = records['joints']
- heatmap_mask = records['mask']
- degree = (np.random.random() * 2 - 1) * self.max_degree
- shape = np.array(image.shape[:2][::-1])
- center = center = np.array((np.array(shape) / 2))
- aug_scale = np.random.random() * (self.max_scale - self.min_scale
- ) + self.min_scale
- if self.scale_type == 'long':
- scale = max(shape[0], shape[1]) / 1.0
- elif self.scale_type == 'short':
- scale = min(shape[0], shape[1]) / 1.0
- else:
- raise ValueError('Unknown scale type: {}'.format(self.scale_type))
- roi_size = aug_scale * scale
- dx = int(0)
- dy = int(0)
- if self.max_shift > 0:
- dx = np.random.randint(-self.max_shift * roi_size,
- self.max_shift * roi_size)
- dy = np.random.randint(-self.max_shift * roi_size,
- self.max_shift * roi_size)
- center += np.array([dx, dy])
- input_size = 2 * center
- keypoints[..., :2] *= shape
- heatmap_mask *= 255
- kpts_lst = []
- mask_lst = []
- image_affine_mat = self._get_affine_matrix(
- center, roi_size, (self.trainsize, self.trainsize), degree)[:2]
- image = cv2.warpAffine(
- image,
- image_affine_mat, (self.trainsize, self.trainsize),
- flags=cv2.INTER_LINEAR)
- for hmsize in self.hmsize:
- kpts = copy.deepcopy(keypoints)
- mask_affine_mat = self._get_affine_matrix(
- center, roi_size, (hmsize, hmsize), degree)[:2]
- if heatmap_mask is not None:
- mask = cv2.warpAffine(heatmap_mask, mask_affine_mat,
- (hmsize, hmsize))
- mask = ((mask / 255) > 0.5).astype(np.float32)
- kpts[..., 0:2] = warp_affine_joints(kpts[..., 0:2].copy(),
- mask_affine_mat)
- kpts[np.trunc(kpts[..., 0]) >= hmsize, 2] = 0
- kpts[np.trunc(kpts[..., 1]) >= hmsize, 2] = 0
- kpts[np.trunc(kpts[..., 0]) < 0, 2] = 0
- kpts[np.trunc(kpts[..., 1]) < 0, 2] = 0
- kpts_lst.append(kpts)
- mask_lst.append(mask)
- records['image'] = image
- records['joints'] = kpts_lst
- records['mask'] = mask_lst
- return records
- @register_keypointop
- class EvalAffine(object):
- """apply affine transform to image
- resize the short of [h,w] to standard size for eval
- Args:
- size (int): the standard length used to train, the 'short' of [h,w] will be resize to trainsize for standard
- records(dict): the dict contained the image, mask and coords
- Returns:
- records(dict): contain the image, mask and coords after tranformed
- """
- def __init__(self, size, stride=64):
- super(EvalAffine, self).__init__()
- self.size = size
- self.stride = stride
- def __call__(self, records):
- image = records['image']
- mask = records['mask'] if 'mask' in records else None
- s = self.size
- h, w, _ = image.shape
- trans, size_resized = get_affine_mat_kernel(h, w, s, inv=False)
- image_resized = cv2.warpAffine(image, trans, size_resized)
- if mask is not None:
- mask = cv2.warpAffine(mask, trans, size_resized)
- records['mask'] = mask
- if 'joints' in records:
- del records['joints']
- records['image'] = image_resized
- return records
- @register_keypointop
- class NormalizePermute(object):
- def __init__(self,
- mean=[123.675, 116.28, 103.53],
- std=[58.395, 57.120, 57.375],
- is_scale=True):
- super(NormalizePermute, self).__init__()
- self.mean = mean
- self.std = std
- self.is_scale = is_scale
- def __call__(self, records):
- image = records['image']
- image = image.astype(np.float32)
- if self.is_scale:
- image /= 255.
- image = image.transpose((2, 0, 1))
- mean = np.array(self.mean, dtype=np.float32)
- std = np.array(self.std, dtype=np.float32)
- invstd = 1. / std
- for v, m, s in zip(image, mean, invstd):
- v.__isub__(m).__imul__(s)
- records['image'] = image
- return records
- @register_keypointop
- class TagGenerate(object):
- """record gt coords for aeloss to sample coords value in tagmaps
- Args:
- num_joints (int): the keypoint numbers of dataset to train
- num_people (int): maxmum people to support for sample aeloss
- records(dict): the dict contained the image, mask and coords
- Returns:
- records(dict): contain the gt coords used in tagmap
- """
- def __init__(self, num_joints, max_people=30):
- super(TagGenerate, self).__init__()
- self.max_people = max_people
- self.num_joints = num_joints
- def __call__(self, records):
- kpts_lst = records['joints']
- kpts = kpts_lst[0]
- tagmap = np.zeros((self.max_people, self.num_joints, 4), dtype=np.int64)
- inds = np.where(kpts[..., 2] > 0)
- p, j = inds[0], inds[1]
- visible = kpts[inds]
- # tagmap is [p, j, 3], where last dim is j, y, x
- tagmap[p, j, 0] = j
- tagmap[p, j, 1] = visible[..., 1] # y
- tagmap[p, j, 2] = visible[..., 0] # x
- tagmap[p, j, 3] = 1
- records['tagmap'] = tagmap
- del records['joints']
- return records
- @register_keypointop
- class ToHeatmaps(object):
- """to generate the gaussin heatmaps of keypoint for heatmap loss
- Args:
- num_joints (int): the keypoint numbers of dataset to train
- hmsize (list[2]): output heatmap's shape list of different scale outputs of higherhrnet
- sigma (float): the std of gaussin kernel genereted
- records(dict): the dict contained the image, mask and coords
- Returns:
- records(dict): contain the heatmaps used to heatmaploss
- """
- def __init__(self, num_joints, hmsize, sigma=None):
- super(ToHeatmaps, self).__init__()
- self.num_joints = num_joints
- self.hmsize = np.array(hmsize)
- if sigma is None:
- sigma = hmsize[0] // 64
- self.sigma = sigma
- r = 6 * sigma + 3
- x = np.arange(0, r, 1, np.float32)
- y = x[:, None]
- x0, y0 = 3 * sigma + 1, 3 * sigma + 1
- self.gaussian = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * sigma**2))
- def __call__(self, records):
- kpts_lst = records['joints']
- mask_lst = records['mask']
- for idx, hmsize in enumerate(self.hmsize):
- mask = mask_lst[idx]
- kpts = kpts_lst[idx]
- heatmaps = np.zeros((self.num_joints, hmsize, hmsize))
- inds = np.where(kpts[..., 2] > 0)
- visible = kpts[inds].astype(np.int64)[..., :2]
- ul = np.round(visible - 3 * self.sigma - 1)
- br = np.round(visible + 3 * self.sigma + 2)
- sul = np.maximum(0, -ul)
- sbr = np.minimum(hmsize, br) - ul
- dul = np.clip(ul, 0, hmsize - 1)
- dbr = np.clip(br, 0, hmsize)
- for i in range(len(visible)):
- if visible[i][0] < 0 or visible[i][1] < 0 or visible[i][
- 0] >= hmsize or visible[i][1] >= hmsize:
- continue
- dx1, dy1 = dul[i]
- dx2, dy2 = dbr[i]
- sx1, sy1 = sul[i]
- sx2, sy2 = sbr[i]
- heatmaps[inds[1][i], dy1:dy2, dx1:dx2] = np.maximum(
- self.gaussian[sy1:sy2, sx1:sx2],
- heatmaps[inds[1][i], dy1:dy2, dx1:dx2])
- records['heatmap_gt{}x'.format(idx + 1)] = heatmaps
- records['mask_{}x'.format(idx + 1)] = mask
- del records['mask']
- return records
- @register_keypointop
- class RandomFlipHalfBodyTransform(object):
- """apply data augment to image and coords
- to achieve the flip, scale, rotate and half body transform effect for training image
- Args:
- trainsize (list):[w, h], Image target size
- upper_body_ids (list): The upper body joint ids
- flip_pairs (list): The left-right joints exchange order list
- pixel_std (int): The pixel std of the scale
- scale (float): The scale factor to transform the image
- rot (int): The rotate factor to transform the image
- num_joints_half_body (int): The joints threshold of the half body transform
- prob_half_body (float): The threshold of the half body transform
- flip (bool): Whether to flip the image
- Returns:
- records(dict): contain the image and coords after tranformed
- """
- def __init__(self,
- trainsize,
- upper_body_ids,
- flip_pairs,
- pixel_std,
- scale=0.35,
- rot=40,
- num_joints_half_body=8,
- prob_half_body=0.3,
- flip=True,
- rot_prob=0.6):
- super(RandomFlipHalfBodyTransform, self).__init__()
- self.trainsize = trainsize
- self.upper_body_ids = upper_body_ids
- self.flip_pairs = flip_pairs
- self.pixel_std = pixel_std
- self.scale = scale
- self.rot = rot
- self.num_joints_half_body = num_joints_half_body
- self.prob_half_body = prob_half_body
- self.flip = flip
- self.aspect_ratio = trainsize[0] * 1.0 / trainsize[1]
- self.rot_prob = rot_prob
- def halfbody_transform(self, joints, joints_vis):
- upper_joints = []
- lower_joints = []
- for joint_id in range(joints.shape[0]):
- if joints_vis[joint_id][0] > 0:
- if joint_id in self.upper_body_ids:
- upper_joints.append(joints[joint_id])
- else:
- lower_joints.append(joints[joint_id])
- if np.random.randn() < 0.5 and len(upper_joints) > 2:
- selected_joints = upper_joints
- else:
- selected_joints = lower_joints if len(
- lower_joints) > 2 else upper_joints
- if len(selected_joints) < 2:
- return None, None
- selected_joints = np.array(selected_joints, dtype=np.float32)
- center = selected_joints.mean(axis=0)[:2]
- left_top = np.amin(selected_joints, axis=0)
- right_bottom = np.amax(selected_joints, axis=0)
- w = right_bottom[0] - left_top[0]
- h = right_bottom[1] - left_top[1]
- if w > self.aspect_ratio * h:
- h = w * 1.0 / self.aspect_ratio
- elif w < self.aspect_ratio * h:
- w = h * self.aspect_ratio
- scale = np.array(
- [w * 1.0 / self.pixel_std, h * 1.0 / self.pixel_std],
- dtype=np.float32)
- scale = scale * 1.5
- return center, scale
- def flip_joints(self, joints, joints_vis, width, matched_parts):
- joints[:, 0] = width - joints[:, 0] - 1
- for pair in matched_parts:
- joints[pair[0], :], joints[pair[1], :] = \
- joints[pair[1], :], joints[pair[0], :].copy()
- joints_vis[pair[0], :], joints_vis[pair[1], :] = \
- joints_vis[pair[1], :], joints_vis[pair[0], :].copy()
- return joints * joints_vis, joints_vis
- def __call__(self, records):
- image = records['image']
- joints = records['joints']
- joints_vis = records['joints_vis']
- c = records['center']
- s = records['scale']
- r = 0
- if (np.sum(joints_vis[:, 0]) > self.num_joints_half_body and
- np.random.rand() < self.prob_half_body):
- c_half_body, s_half_body = self.halfbody_transform(joints,
- joints_vis)
- if c_half_body is not None and s_half_body is not None:
- c, s = c_half_body, s_half_body
- sf = self.scale
- rf = self.rot
- s = s * np.clip(np.random.randn() * sf + 1, 1 - sf, 1 + sf)
- r = np.clip(np.random.randn() * rf, -rf * 2,
- rf * 2) if np.random.random() <= self.rot_prob else 0
- if self.flip and np.random.random() <= 0.5:
- image = image[:, ::-1, :]
- joints, joints_vis = self.flip_joints(
- joints, joints_vis, image.shape[1], self.flip_pairs)
- c[0] = image.shape[1] - c[0] - 1
- records['image'] = image
- records['joints'] = joints
- records['joints_vis'] = joints_vis
- records['center'] = c
- records['scale'] = s
- records['rotate'] = r
- return records
- @register_keypointop
- class AugmentationbyInformantionDropping(object):
- """AID: Augmentation by Informantion Dropping. Please refer
- to https://arxiv.org/abs/2008.07139
-
- Args:
- prob_cutout (float): The probability of the Cutout augmentation.
- offset_factor (float): Offset factor of cutout center.
- num_patch (int): Number of patches to be cutout.
- records(dict): the dict contained the image and coords
-
- Returns:
- records (dict): contain the image and coords after tranformed
-
- """
- def __init__(self,
- trainsize,
- prob_cutout=0.0,
- offset_factor=0.2,
- num_patch=1):
- self.prob_cutout = prob_cutout
- self.offset_factor = offset_factor
- self.num_patch = num_patch
- self.trainsize = trainsize
- def _cutout(self, img, joints, joints_vis):
- height, width, _ = img.shape
- img = img.reshape((height * width, -1))
- feat_x_int = np.arange(0, width)
- feat_y_int = np.arange(0, height)
- feat_x_int, feat_y_int = np.meshgrid(feat_x_int, feat_y_int)
- feat_x_int = feat_x_int.reshape((-1, ))
- feat_y_int = feat_y_int.reshape((-1, ))
- for _ in range(self.num_patch):
- vis_idx, _ = np.where(joints_vis > 0)
- occlusion_joint_id = np.random.choice(vis_idx)
- center = joints[occlusion_joint_id, 0:2]
- offset = np.random.randn(2) * self.trainsize[0] * self.offset_factor
- center = center + offset
- radius = np.random.uniform(0.1, 0.2) * self.trainsize[0]
- x_offset = (center[0] - feat_x_int) / radius
- y_offset = (center[1] - feat_y_int) / radius
- dis = x_offset**2 + y_offset**2
- keep_pos = np.where((dis <= 1) & (dis >= 0))[0]
- img[keep_pos, :] = 0
- img = img.reshape((height, width, -1))
- return img
- def __call__(self, records):
- img = records['image']
- joints = records['joints']
- joints_vis = records['joints_vis']
- if np.random.rand() < self.prob_cutout:
- img = self._cutout(img, joints, joints_vis)
- records['image'] = img
- return records
- @register_keypointop
- class TopDownAffine(object):
- """apply affine transform to image and coords
- Args:
- trainsize (list): [w, h], the standard size used to train
- use_udp (bool): whether to use Unbiased Data Processing.
- records(dict): the dict contained the image and coords
- Returns:
- records (dict): contain the image and coords after tranformed
- """
- def __init__(self, trainsize, use_udp=False):
- self.trainsize = trainsize
- self.use_udp = use_udp
- def __call__(self, records):
- image = records['image']
- joints = records['joints']
- joints_vis = records['joints_vis']
- rot = records['rotate'] if "rotate" in records else 0
- if self.use_udp:
- trans = get_warp_matrix(
- rot, records['center'] * 2.0,
- [self.trainsize[0] - 1.0, self.trainsize[1] - 1.0],
- records['scale'] * 200.0)
- image = cv2.warpAffine(
- image,
- trans, (int(self.trainsize[0]), int(self.trainsize[1])),
- flags=cv2.INTER_LINEAR)
- joints[:, 0:2] = warp_affine_joints(joints[:, 0:2].copy(), trans)
- else:
- trans = get_affine_transform(records['center'], records['scale'] *
- 200, rot, self.trainsize)
- image = cv2.warpAffine(
- image,
- trans, (int(self.trainsize[0]), int(self.trainsize[1])),
- flags=cv2.INTER_LINEAR)
- for i in range(joints.shape[0]):
- if joints_vis[i, 0] > 0.0:
- joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
- records['image'] = image
- records['joints'] = joints
- return records
- @register_keypointop
- class SinglePoseAffine(object):
- """apply affine transform to image and coords
- Args:
- trainsize (list): [w, h], the standard size used to train
- use_udp (bool): whether to use Unbiased Data Processing.
- records(dict): the dict contained the image and coords
- Returns:
- records (dict): contain the image and coords after tranformed
- """
- def __init__(self,
- trainsize,
- rotate=[1.0, 30],
- scale=[1.0, 0.25],
- use_udp=False):
- self.trainsize = trainsize
- self.use_udp = use_udp
- self.rot_prob = rotate[0]
- self.rot_range = rotate[1]
- self.scale_prob = scale[0]
- self.scale_ratio = scale[1]
- def __call__(self, records):
- image = records['image']
- if 'joints_2d' in records:
- joints = records['joints_2d'] if 'joints_2d' in records else None
- joints_vis = records[
- 'joints_vis'] if 'joints_vis' in records else np.ones(
- (len(joints), 1))
- rot = 0
- s = 1.
- if np.random.random() < self.rot_prob:
- rot = np.clip(np.random.randn() * self.rot_range,
- -self.rot_range * 2, self.rot_range * 2)
- if np.random.random() < self.scale_prob:
- s = np.clip(np.random.randn() * self.scale_ratio + 1,
- 1 - self.scale_ratio, 1 + self.scale_ratio)
- if self.use_udp:
- trans = get_warp_matrix(
- rot,
- np.array(records['bbox_center']) * 2.0,
- [self.trainsize[0] - 1.0, self.trainsize[1] - 1.0],
- records['bbox_scale'] * 200.0 * s)
- image = cv2.warpAffine(
- image,
- trans, (int(self.trainsize[0]), int(self.trainsize[1])),
- flags=cv2.INTER_LINEAR)
- if 'joints_2d' in records:
- joints[:, 0:2] = warp_affine_joints(joints[:, 0:2].copy(),
- trans)
- else:
- trans = get_affine_transform(
- np.array(records['bbox_center']),
- records['bbox_scale'] * s * 200, rot, self.trainsize)
- image = cv2.warpAffine(
- image,
- trans, (int(self.trainsize[0]), int(self.trainsize[1])),
- flags=cv2.INTER_LINEAR)
- if 'joints_2d' in records:
- for i in range(len(joints)):
- if joints_vis[i, 0] > 0.0:
- joints[i, 0:2] = affine_transform(joints[i, 0:2], trans)
- if 'joints_3d' in records:
- pose3d = records['joints_3d']
- if not rot == 0:
- trans_3djoints = np.eye(3)
- rot_rad = -rot * np.pi / 180
- sn, cs = np.sin(rot_rad), np.cos(rot_rad)
- trans_3djoints[0, :2] = [cs, -sn]
- trans_3djoints[1, :2] = [sn, cs]
- pose3d[:, :3] = np.einsum('ij,kj->ki', trans_3djoints,
- pose3d[:, :3])
- records['joints_3d'] = pose3d
- records['image'] = image
- if 'joints_2d' in records:
- records['joints_2d'] = joints
- return records
- @register_keypointop
- class NoiseJitter(object):
- """apply NoiseJitter to image
- Args:
- noise_factor (float): the noise factor ratio used to generate the jitter
- Returns:
- records (dict): contain the image and coords after tranformed
- """
- def __init__(self, noise_factor=0.4):
- self.noise_factor = noise_factor
- def __call__(self, records):
- self.pn = np.random.uniform(1 - self.noise_factor,
- 1 + self.noise_factor, 3)
- rgb_img = records['image']
- rgb_img[:, :, 0] = np.minimum(
- 255.0, np.maximum(0.0, rgb_img[:, :, 0] * self.pn[0]))
- rgb_img[:, :, 1] = np.minimum(
- 255.0, np.maximum(0.0, rgb_img[:, :, 1] * self.pn[1]))
- rgb_img[:, :, 2] = np.minimum(
- 255.0, np.maximum(0.0, rgb_img[:, :, 2] * self.pn[2]))
- records['image'] = rgb_img
- return records
- @register_keypointop
- class FlipPose(object):
- """random apply flip to image
- Args:
- noise_factor (float): the noise factor ratio used to generate the jitter
- Returns:
- records (dict): contain the image and coords after tranformed
- """
- def __init__(self, flip_prob=0.5, img_res=224, num_joints=14):
- self.flip_pob = flip_prob
- self.img_res = img_res
- if num_joints == 24:
- self.perm = [
- 5, 4, 3, 2, 1, 0, 11, 10, 9, 8, 7, 6, 12, 13, 14, 15, 16, 17,
- 18, 19, 21, 20, 23, 22
- ]
- elif num_joints == 14:
- self.perm = [5, 4, 3, 2, 1, 0, 11, 10, 9, 8, 7, 6, 12, 13]
- else:
- print("error num_joints in flip :{}".format(num_joints))
- def __call__(self, records):
- if np.random.random() < self.flip_pob:
- img = records['image']
- img = np.fliplr(img)
- if 'joints_2d' in records:
- joints_2d = records['joints_2d']
- joints_2d = joints_2d[self.perm]
- joints_2d[:, 0] = self.img_res - joints_2d[:, 0]
- records['joints_2d'] = joints_2d
- if 'joints_3d' in records:
- joints_3d = records['joints_3d']
- joints_3d = joints_3d[self.perm]
- joints_3d[:, 0] = -joints_3d[:, 0]
- records['joints_3d'] = joints_3d
- records['image'] = img
- return records
- @register_keypointop
- class TopDownEvalAffine(object):
- """apply affine transform to image and coords
- Args:
- trainsize (list): [w, h], the standard size used to train
- use_udp (bool): whether to use Unbiased Data Processing.
- records(dict): the dict contained the image and coords
- Returns:
- records (dict): contain the image and coords after tranformed
- """
- def __init__(self, trainsize, use_udp=False):
- self.trainsize = trainsize
- self.use_udp = use_udp
- def __call__(self, records):
- image = records['image']
- rot = 0
- imshape = records['im_shape'][::-1]
- center = imshape / 2.
- scale = imshape
- if self.use_udp:
- trans = get_warp_matrix(
- rot, center * 2.0,
- [self.trainsize[0] - 1.0, self.trainsize[1] - 1.0], scale)
- image = cv2.warpAffine(
- image,
- trans, (int(self.trainsize[0]), int(self.trainsize[1])),
- flags=cv2.INTER_LINEAR)
- else:
- trans = get_affine_transform(center, scale, rot, self.trainsize)
- image = cv2.warpAffine(
- image,
- trans, (int(self.trainsize[0]), int(self.trainsize[1])),
- flags=cv2.INTER_LINEAR)
- records['image'] = image
- return records
- @register_keypointop
- class ToHeatmapsTopDown(object):
- """to generate the gaussin heatmaps of keypoint for heatmap loss
- Args:
- hmsize (list): [w, h] output heatmap's size
- sigma (float): the std of gaussin kernel genereted
- records(dict): the dict contained the image and coords
- Returns:
- records (dict): contain the heatmaps used to heatmaploss
- """
- def __init__(self, hmsize, sigma):
- super(ToHeatmapsTopDown, self).__init__()
- self.hmsize = np.array(hmsize)
- self.sigma = sigma
- def __call__(self, records):
- """refer to
- https://github.com/leoxiaobin/deep-high-resolution-net.pytorch
- Copyright (c) Microsoft, under the MIT License.
- """
- joints = records['joints']
- joints_vis = records['joints_vis']
- num_joints = joints.shape[0]
- image_size = np.array(
- [records['image'].shape[1], records['image'].shape[0]])
- target_weight = np.ones((num_joints, 1), dtype=np.float32)
- target_weight[:, 0] = joints_vis[:, 0]
- target = np.zeros(
- (num_joints, self.hmsize[1], self.hmsize[0]), dtype=np.float32)
- tmp_size = self.sigma * 3
- feat_stride = image_size / self.hmsize
- for joint_id in range(num_joints):
- mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5)
- mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5)
- # Check that any part of the gaussian is in-bounds
- ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
- br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
- if ul[0] >= self.hmsize[0] or ul[1] >= self.hmsize[1] or br[
- 0] < 0 or br[1] < 0:
- # If not, just return the image as is
- target_weight[joint_id] = 0
- continue
- # # Generate gaussian
- size = 2 * tmp_size + 1
- x = np.arange(0, size, 1, np.float32)
- y = x[:, np.newaxis]
- x0 = y0 = size // 2
- # The gaussian is not normalized, we want the center value to equal 1
- g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * self.sigma**2))
- # Usable gaussian range
- g_x = max(0, -ul[0]), min(br[0], self.hmsize[0]) - ul[0]
- g_y = max(0, -ul[1]), min(br[1], self.hmsize[1]) - ul[1]
- # Image range
- img_x = max(0, ul[0]), min(br[0], self.hmsize[0])
- img_y = max(0, ul[1]), min(br[1], self.hmsize[1])
- v = target_weight[joint_id]
- if v > 0.5:
- target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[
- 0]:g_y[1], g_x[0]:g_x[1]]
- records['target'] = target
- records['target_weight'] = target_weight
- del records['joints'], records['joints_vis']
- return records
- @register_keypointop
- class ToHeatmapsTopDown_DARK(object):
- """to generate the gaussin heatmaps of keypoint for heatmap loss
- Args:
- hmsize (list): [w, h] output heatmap's size
- sigma (float): the std of gaussin kernel genereted
- records(dict): the dict contained the image and coords
- Returns:
- records (dict): contain the heatmaps used to heatmaploss
- """
- def __init__(self, hmsize, sigma):
- super(ToHeatmapsTopDown_DARK, self).__init__()
- self.hmsize = np.array(hmsize)
- self.sigma = sigma
- def __call__(self, records):
- joints = records['joints']
- joints_vis = records['joints_vis']
- num_joints = joints.shape[0]
- image_size = np.array(
- [records['image'].shape[1], records['image'].shape[0]])
- target_weight = np.ones((num_joints, 1), dtype=np.float32)
- target_weight[:, 0] = joints_vis[:, 0]
- target = np.zeros(
- (num_joints, self.hmsize[1], self.hmsize[0]), dtype=np.float32)
- tmp_size = self.sigma * 3
- feat_stride = image_size / self.hmsize
- for joint_id in range(num_joints):
- mu_x = joints[joint_id][0] / feat_stride[0]
- mu_y = joints[joint_id][1] / feat_stride[1]
- # Check that any part of the gaussian is in-bounds
- ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
- br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
- if ul[0] >= self.hmsize[0] or ul[1] >= self.hmsize[1] or br[
- 0] < 0 or br[1] < 0:
- # If not, just return the image as is
- target_weight[joint_id] = 0
- continue
- x = np.arange(0, self.hmsize[0], 1, np.float32)
- y = np.arange(0, self.hmsize[1], 1, np.float32)
- y = y[:, np.newaxis]
- v = target_weight[joint_id]
- if v > 0.5:
- target[joint_id] = np.exp(-(
- (x - mu_x)**2 + (y - mu_y)**2) / (2 * self.sigma**2))
- records['target'] = target
- records['target_weight'] = target_weight
- del records['joints'], records['joints_vis']
- return records
- @register_keypointop
- class ToHeatmapsTopDown_UDP(object):
- """This code is based on:
- https://github.com/HuangJunJie2017/UDP-Pose/blob/master/deep-high-resolution-net.pytorch/lib/dataset/JointsDataset.py
-
- to generate the gaussian heatmaps of keypoint for heatmap loss.
- ref: Huang et al. The Devil is in the Details: Delving into Unbiased Data Processing
- for Human Pose Estimation (CVPR 2020).
- Args:
- hmsize (list): [w, h] output heatmap's size
- sigma (float): the std of gaussin kernel genereted
- records(dict): the dict contained the image and coords
- Returns:
- records (dict): contain the heatmaps used to heatmaploss
- """
- def __init__(self, hmsize, sigma):
- super(ToHeatmapsTopDown_UDP, self).__init__()
- self.hmsize = np.array(hmsize)
- self.sigma = sigma
- def __call__(self, records):
- joints = records['joints']
- joints_vis = records['joints_vis']
- num_joints = joints.shape[0]
- image_size = np.array(
- [records['image'].shape[1], records['image'].shape[0]])
- target_weight = np.ones((num_joints, 1), dtype=np.float32)
- target_weight[:, 0] = joints_vis[:, 0]
- target = np.zeros(
- (num_joints, self.hmsize[1], self.hmsize[0]), dtype=np.float32)
- tmp_size = self.sigma * 3
- size = 2 * tmp_size + 1
- x = np.arange(0, size, 1, np.float32)
- y = x[:, None]
- feat_stride = (image_size - 1.0) / (self.hmsize - 1.0)
- for joint_id in range(num_joints):
- mu_x = int(joints[joint_id][0] / feat_stride[0] + 0.5)
- mu_y = int(joints[joint_id][1] / feat_stride[1] + 0.5)
- # Check that any part of the gaussian is in-bounds
- ul = [int(mu_x - tmp_size), int(mu_y - tmp_size)]
- br = [int(mu_x + tmp_size + 1), int(mu_y + tmp_size + 1)]
- if ul[0] >= self.hmsize[0] or ul[1] >= self.hmsize[1] or br[
- 0] < 0 or br[1] < 0:
- # If not, just return the image as is
- target_weight[joint_id] = 0
- continue
- mu_x_ac = joints[joint_id][0] / feat_stride[0]
- mu_y_ac = joints[joint_id][1] / feat_stride[1]
- x0 = y0 = size // 2
- x0 += mu_x_ac - mu_x
- y0 += mu_y_ac - mu_y
- g = np.exp(-((x - x0)**2 + (y - y0)**2) / (2 * self.sigma**2))
- # Usable gaussian range
- g_x = max(0, -ul[0]), min(br[0], self.hmsize[0]) - ul[0]
- g_y = max(0, -ul[1]), min(br[1], self.hmsize[1]) - ul[1]
- # Image range
- img_x = max(0, ul[0]), min(br[0], self.hmsize[0])
- img_y = max(0, ul[1]), min(br[1], self.hmsize[1])
- v = target_weight[joint_id]
- if v > 0.5:
- target[joint_id][img_y[0]:img_y[1], img_x[0]:img_x[1]] = g[g_y[
- 0]:g_y[1], g_x[0]:g_x[1]]
- records['target'] = target
- records['target_weight'] = target_weight
- del records['joints'], records['joints_vis']
- return records
|