123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565 |
- import numpy as np
- import cv2
- import copy
- def decode_image(img_path):
- with open(img_path, 'rb') as f:
- im_read = f.read()
- data = np.frombuffer(im_read, dtype='uint8')
- im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
- im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
- img_info = {
- "im_shape": np.array(
- im.shape[:2], dtype=np.float32),
- "scale_factor": np.array(
- [1., 1.], dtype=np.float32)
- }
- return im, img_info
- class Resize(object):
- """resize image by target_size and max_size
- Args:
- target_size (int): the target size of image
- keep_ratio (bool): whether keep_ratio or not, default true
- interp (int): method of resize
- """
- def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR):
- if isinstance(target_size, int):
- target_size = [target_size, target_size]
- self.target_size = target_size
- self.keep_ratio = keep_ratio
- self.interp = interp
- def __call__(self, im, im_info):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- im_info (dict): info of image
- Returns:
- im (np.ndarray): processed image (np.ndarray)
- im_info (dict): info of processed image
- """
- assert len(self.target_size) == 2
- assert self.target_size[0] > 0 and self.target_size[1] > 0
- im_channel = im.shape[2]
- im_scale_y, im_scale_x = self.generate_scale(im)
- im = cv2.resize(
- im,
- None,
- None,
- fx=im_scale_x,
- fy=im_scale_y,
- interpolation=self.interp)
- im_info['im_shape'] = np.array(im.shape[:2]).astype('float32')
- im_info['scale_factor'] = np.array(
- [im_scale_y, im_scale_x]).astype('float32')
- return im, im_info
- def generate_scale(self, im):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- Returns:
- im_scale_x: the resize ratio of X
- im_scale_y: the resize ratio of Y
- """
- origin_shape = im.shape[:2]
- im_c = im.shape[2]
- if self.keep_ratio:
- im_size_min = np.min(origin_shape)
- im_size_max = np.max(origin_shape)
- target_size_min = np.min(self.target_size)
- target_size_max = np.max(self.target_size)
- im_scale = float(target_size_min) / float(im_size_min)
- if np.round(im_scale * im_size_max) > target_size_max:
- im_scale = float(target_size_max) / float(im_size_max)
- im_scale_x = im_scale
- im_scale_y = im_scale
- else:
- resize_h, resize_w = self.target_size
- im_scale_y = resize_h / float(origin_shape[0])
- im_scale_x = resize_w / float(origin_shape[1])
- return im_scale_y, im_scale_x
- class NormalizeImage(object):
- """normalize image
- Args:
- mean (list): im - mean
- std (list): im / std
- is_scale (bool): whether need im / 255
- norm_type (str): type in ['mean_std', 'none']
- """
- def __init__(self, mean, std, is_scale=True, norm_type='mean_std'):
- self.mean = mean
- self.std = std
- self.is_scale = is_scale
- self.norm_type = norm_type
- def __call__(self, im, im_info):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- im_info (dict): info of image
- Returns:
- im (np.ndarray): processed image (np.ndarray)
- im_info (dict): info of processed image
- """
- im = im.astype(np.float32, copy=False)
- if self.is_scale:
- scale = 1.0 / 255.0
- im *= scale
- if self.norm_type == 'mean_std':
- mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
- std = np.array(self.std)[np.newaxis, np.newaxis, :]
- im -= mean
- im /= std
- return im, im_info
- class Permute(object):
- """permute image
- Args:
- to_bgr (bool): whether convert RGB to BGR
- channel_first (bool): whether convert HWC to CHW
- """
- def __init__(self, ):
- super(Permute, self).__init__()
- def __call__(self, im, im_info):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- im_info (dict): info of image
- Returns:
- im (np.ndarray): processed image (np.ndarray)
- im_info (dict): info of processed image
- """
- im = im.transpose((2, 0, 1)).copy()
- return im, im_info
- class PadStride(object):
- """ padding image for model with FPN, instead PadBatch(pad_to_stride) in original config
- Args:
- stride (bool): model with FPN need image shape % stride == 0
- """
- def __init__(self, stride=0):
- self.coarsest_stride = stride
- def __call__(self, im, im_info):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- im_info (dict): info of image
- Returns:
- im (np.ndarray): processed image (np.ndarray)
- im_info (dict): info of processed image
- """
- coarsest_stride = self.coarsest_stride
- if coarsest_stride <= 0:
- return im, im_info
- im_c, im_h, im_w = im.shape
- pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
- pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
- padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
- padding_im[:, :im_h, :im_w] = im
- return padding_im, im_info
- class LetterBoxResize(object):
- def __init__(self, target_size):
- """
- Resize image to target size, convert normalized xywh to pixel xyxy
- format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]).
- Args:
- target_size (int|list): image target size.
- """
- super(LetterBoxResize, self).__init__()
- if isinstance(target_size, int):
- target_size = [target_size, target_size]
- self.target_size = target_size
- def letterbox(self, img, height, width, color=(127.5, 127.5, 127.5)):
- # letterbox: resize a rectangular image to a padded rectangular
- shape = img.shape[:2] # [height, width]
- ratio_h = float(height) / shape[0]
- ratio_w = float(width) / shape[1]
- ratio = min(ratio_h, ratio_w)
- new_shape = (round(shape[1] * ratio),
- round(shape[0] * ratio)) # [width, height]
- padw = (width - new_shape[0]) / 2
- padh = (height - new_shape[1]) / 2
- top, bottom = round(padh - 0.1), round(padh + 0.1)
- left, right = round(padw - 0.1), round(padw + 0.1)
- img = cv2.resize(
- img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border
- img = cv2.copyMakeBorder(
- img, top, bottom, left, right, cv2.BORDER_CONSTANT,
- value=color) # padded rectangular
- return img, ratio, padw, padh
- def __call__(self, im, im_info):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- im_info (dict): info of image
- Returns:
- im (np.ndarray): processed image (np.ndarray)
- im_info (dict): info of processed image
- """
- assert len(self.target_size) == 2
- assert self.target_size[0] > 0 and self.target_size[1] > 0
- height, width = self.target_size
- h, w = im.shape[:2]
- im, ratio, padw, padh = self.letterbox(im, height=height, width=width)
- new_shape = [round(h * ratio), round(w * ratio)]
- im_info['im_shape'] = np.array(new_shape, dtype=np.float32)
- im_info['scale_factor'] = np.array([ratio, ratio], dtype=np.float32)
- return im, im_info
- class Pad(object):
- def __init__(self, size, fill_value=[114.0, 114.0, 114.0]):
- """
- Pad image to a specified size.
- Args:
- size (list[int]): image target size
- fill_value (list[float]): rgb value of pad area, default (114.0, 114.0, 114.0)
- """
- super(Pad, self).__init__()
- if isinstance(size, int):
- size = [size, size]
- self.size = size
- self.fill_value = fill_value
- def __call__(self, im, im_info):
- im_h, im_w = im.shape[:2]
- h, w = self.size
- if h == im_h and w == im_w:
- im = im.astype(np.float32)
- return im, im_info
- canvas = np.ones((h, w, 3), dtype=np.float32)
- canvas *= np.array(self.fill_value, dtype=np.float32)
- canvas[0:im_h, 0:im_w, :] = im.astype(np.float32)
- im = canvas
- return im, im_info
- def rotate_point(pt, angle_rad):
- """Rotate a point by an angle.
- Args:
- pt (list[float]): 2 dimensional point to be rotated
- angle_rad (float): rotation angle by radian
- Returns:
- list[float]: Rotated point.
- """
- assert len(pt) == 2
- sn, cs = np.sin(angle_rad), np.cos(angle_rad)
- new_x = pt[0] * cs - pt[1] * sn
- new_y = pt[0] * sn + pt[1] * cs
- rotated_pt = [new_x, new_y]
- return rotated_pt
- def _get_3rd_point(a, b):
- """To calculate the affine matrix, three pairs of points are required. This
- function is used to get the 3rd point, given 2D points a & b.
- The 3rd point is defined by rotating vector `a - b` by 90 degrees
- anticlockwise, using b as the rotation center.
- Args:
- a (np.ndarray): point(x,y)
- b (np.ndarray): point(x,y)
- Returns:
- np.ndarray: The 3rd point.
- """
- assert len(a) == 2
- assert len(b) == 2
- direction = a - b
- third_pt = b + np.array([-direction[1], direction[0]], dtype=np.float32)
- return third_pt
- def get_affine_transform(center,
- input_size,
- rot,
- output_size,
- shift=(0., 0.),
- inv=False):
- """Get the affine transform matrix, given the center/scale/rot/output_size.
- Args:
- center (np.ndarray[2, ]): Center of the bounding box (x, y).
- scale (np.ndarray[2, ]): Scale of the bounding box
- wrt [width, height].
- rot (float): Rotation angle (degree).
- output_size (np.ndarray[2, ]): Size of the destination heatmaps.
- shift (0-100%): Shift translation ratio wrt the width/height.
- Default (0., 0.).
- inv (bool): Option to inverse the affine transform direction.
- (inv=False: src->dst or inv=True: dst->src)
- Returns:
- np.ndarray: The transform matrix.
- """
- assert len(center) == 2
- assert len(output_size) == 2
- assert len(shift) == 2
- if not isinstance(input_size, (np.ndarray, list)):
- input_size = np.array([input_size, input_size], dtype=np.float32)
- scale_tmp = input_size
- shift = np.array(shift)
- src_w = scale_tmp[0]
- dst_w = output_size[0]
- dst_h = output_size[1]
- rot_rad = np.pi * rot / 180
- src_dir = rotate_point([0., src_w * -0.5], rot_rad)
- dst_dir = np.array([0., dst_w * -0.5])
- src = np.zeros((3, 2), dtype=np.float32)
- src[0, :] = center + scale_tmp * shift
- src[1, :] = center + src_dir + scale_tmp * shift
- src[2, :] = _get_3rd_point(src[0, :], src[1, :])
- dst = np.zeros((3, 2), dtype=np.float32)
- dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
- dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
- dst[2, :] = _get_3rd_point(dst[0, :], dst[1, :])
- if inv:
- trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
- else:
- trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
- return trans
- class WarpAffine(object):
- """Warp affine the image
- """
- def __init__(self,
- keep_res=False,
- pad=31,
- input_h=512,
- input_w=512,
- scale=0.4,
- shift=0.1):
- self.keep_res = keep_res
- self.pad = pad
- self.input_h = input_h
- self.input_w = input_w
- self.scale = scale
- self.shift = shift
- def __call__(self, im, im_info):
- """
- Args:
- im (np.ndarray): image (np.ndarray)
- im_info (dict): info of image
- Returns:
- im (np.ndarray): processed image (np.ndarray)
- im_info (dict): info of processed image
- """
- img = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
- h, w = img.shape[:2]
- if self.keep_res:
- input_h = (h | self.pad) + 1
- input_w = (w | self.pad) + 1
- s = np.array([input_w, input_h], dtype=np.float32)
- c = np.array([w // 2, h // 2], dtype=np.float32)
- else:
- s = max(h, w) * 1.0
- input_h, input_w = self.input_h, self.input_w
- c = np.array([w / 2., h / 2.], dtype=np.float32)
- trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
- img = cv2.resize(img, (w, h))
- inp = cv2.warpAffine(
- img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR)
- return inp, im_info
- # keypoint preprocess
- def get_warp_matrix(theta, size_input, size_dst, size_target):
- """This code is based on
- https://github.com/open-mmlab/mmpose/blob/master/mmpose/core/post_processing/post_transforms.py
- Calculate the transformation matrix under the constraint of unbiased.
- Paper ref: Huang et al. The Devil is in the Details: Delving into Unbiased
- Data Processing for Human Pose Estimation (CVPR 2020).
- Args:
- theta (float): Rotation angle in degrees.
- size_input (np.ndarray): Size of input image [w, h].
- size_dst (np.ndarray): Size of output image [w, h].
- size_target (np.ndarray): Size of ROI in input plane [w, h].
- Returns:
- matrix (np.ndarray): A matrix for transformation.
- """
- theta = np.deg2rad(theta)
- matrix = np.zeros((2, 3), dtype=np.float32)
- scale_x = size_dst[0] / size_target[0]
- scale_y = size_dst[1] / size_target[1]
- matrix[0, 0] = np.cos(theta) * scale_x
- matrix[0, 1] = -np.sin(theta) * scale_x
- matrix[0, 2] = scale_x * (
- -0.5 * size_input[0] * np.cos(theta) + 0.5 * size_input[1] *
- np.sin(theta) + 0.5 * size_target[0])
- matrix[1, 0] = np.sin(theta) * scale_y
- matrix[1, 1] = np.cos(theta) * scale_y
- matrix[1, 2] = scale_y * (
- -0.5 * size_input[0] * np.sin(theta) - 0.5 * size_input[1] *
- np.cos(theta) + 0.5 * size_target[1])
- return matrix
- class TopDownEvalAffine(object):
- """apply affine transform to image and coords
- Args:
- trainsize (list): [w, h], the standard size used to train
- use_udp (bool): whether to use Unbiased Data Processing.
- records(dict): the dict contained the image and coords
- Returns:
- records (dict): contain the image and coords after tranformed
- """
- def __init__(self, trainsize, use_udp=False):
- self.trainsize = trainsize
- self.use_udp = use_udp
- def __call__(self, image, im_info):
- rot = 0
- imshape = im_info['im_shape'][::-1]
- center = im_info['center'] if 'center' in im_info else imshape / 2.
- scale = im_info['scale'] if 'scale' in im_info else imshape
- if self.use_udp:
- trans = get_warp_matrix(
- rot, center * 2.0,
- [self.trainsize[0] - 1.0, self.trainsize[1] - 1.0], scale)
- image = cv2.warpAffine(
- image,
- trans, (int(self.trainsize[0]), int(self.trainsize[1])),
- flags=cv2.INTER_LINEAR)
- else:
- trans = get_affine_transform(center, scale, rot, self.trainsize)
- image = cv2.warpAffine(
- image,
- trans, (int(self.trainsize[0]), int(self.trainsize[1])),
- flags=cv2.INTER_LINEAR)
- return image, im_info
- class Compose:
- def __init__(self, transforms):
- self.transforms = []
- for op_info in transforms:
- new_op_info = op_info.copy()
- op_type = new_op_info.pop('type')
- self.transforms.append(eval(op_type)(**new_op_info))
- def __call__(self, img_path):
- img, im_info = decode_image(img_path)
- for t in self.transforms:
- img, im_info = t(img, im_info)
- inputs = copy.deepcopy(im_info)
- inputs['image'] = np.ascontiguousarray(img.astype('float32'))
- return inputs
- coco_clsid2catid = {
- 0: 1,
- 1: 2,
- 2: 3,
- 3: 4,
- 4: 5,
- 5: 6,
- 6: 7,
- 7: 8,
- 8: 9,
- 9: 10,
- 10: 11,
- 11: 13,
- 12: 14,
- 13: 15,
- 14: 16,
- 15: 17,
- 16: 18,
- 17: 19,
- 18: 20,
- 19: 21,
- 20: 22,
- 21: 23,
- 22: 24,
- 23: 25,
- 24: 27,
- 25: 28,
- 26: 31,
- 27: 32,
- 28: 33,
- 29: 34,
- 30: 35,
- 31: 36,
- 32: 37,
- 33: 38,
- 34: 39,
- 35: 40,
- 36: 41,
- 37: 42,
- 38: 43,
- 39: 44,
- 40: 46,
- 41: 47,
- 42: 48,
- 43: 49,
- 44: 50,
- 45: 51,
- 46: 52,
- 47: 53,
- 48: 54,
- 49: 55,
- 50: 56,
- 51: 57,
- 52: 58,
- 53: 59,
- 54: 60,
- 55: 61,
- 56: 62,
- 57: 63,
- 58: 64,
- 59: 65,
- 60: 67,
- 61: 70,
- 62: 72,
- 63: 73,
- 64: 74,
- 65: 75,
- 66: 76,
- 67: 77,
- 68: 78,
- 69: 79,
- 70: 80,
- 71: 81,
- 72: 82,
- 73: 84,
- 74: 85,
- 75: 86,
- 76: 87,
- 77: 88,
- 78: 89,
- 79: 90
- }
|