preprocess_ops.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490
  1. import numpy as np
  2. import cv2
  3. import copy
  4. def decode_image(im):
  5. im = np.array(im)
  6. img_info = {
  7. "im_shape": np.array(
  8. im.shape[:2], dtype=np.float32),
  9. "scale_factor": np.array(
  10. [1., 1.], dtype=np.float32)
  11. }
  12. return im, img_info
  13. class Resize(object):
  14. """resize image by target_size and max_size
  15. Args:
  16. target_size (int): the target size of image
  17. keep_ratio (bool): whether keep_ratio or not, default true
  18. interp (int): method of resize
  19. """
  20. def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR):
  21. if isinstance(target_size, int):
  22. target_size = [target_size, target_size]
  23. self.target_size = target_size
  24. self.keep_ratio = keep_ratio
  25. self.interp = interp
  26. def __call__(self, im, im_info):
  27. """
  28. Args:
  29. im (np.ndarray): image (np.ndarray)
  30. im_info (dict): info of image
  31. Returns:
  32. im (np.ndarray): processed image (np.ndarray)
  33. im_info (dict): info of processed image
  34. """
  35. assert len(self.target_size) == 2
  36. assert self.target_size[0] > 0 and self.target_size[1] > 0
  37. im_channel = im.shape[2]
  38. im_scale_y, im_scale_x = self.generate_scale(im)
  39. im = cv2.resize(
  40. im,
  41. None,
  42. None,
  43. fx=im_scale_x,
  44. fy=im_scale_y,
  45. interpolation=self.interp)
  46. im_info['im_shape'] = np.array(im.shape[:2]).astype('float32')
  47. im_info['scale_factor'] = np.array(
  48. [im_scale_y, im_scale_x]).astype('float32')
  49. return im, im_info
  50. def generate_scale(self, im):
  51. """
  52. Args:
  53. im (np.ndarray): image (np.ndarray)
  54. Returns:
  55. im_scale_x: the resize ratio of X
  56. im_scale_y: the resize ratio of Y
  57. """
  58. origin_shape = im.shape[:2]
  59. im_c = im.shape[2]
  60. if self.keep_ratio:
  61. im_size_min = np.min(origin_shape)
  62. im_size_max = np.max(origin_shape)
  63. target_size_min = np.min(self.target_size)
  64. target_size_max = np.max(self.target_size)
  65. im_scale = float(target_size_min) / float(im_size_min)
  66. if np.round(im_scale * im_size_max) > target_size_max:
  67. im_scale = float(target_size_max) / float(im_size_max)
  68. im_scale_x = im_scale
  69. im_scale_y = im_scale
  70. else:
  71. resize_h, resize_w = self.target_size
  72. im_scale_y = resize_h / float(origin_shape[0])
  73. im_scale_x = resize_w / float(origin_shape[1])
  74. return im_scale_y, im_scale_x
  75. class NormalizeImage(object):
  76. """normalize image
  77. Args:
  78. mean (list): im - mean
  79. std (list): im / std
  80. is_scale (bool): whether need im / 255
  81. norm_type (str): type in ['mean_std', 'none']
  82. """
  83. def __init__(self, mean, std, is_scale=True, norm_type='mean_std'):
  84. self.mean = mean
  85. self.std = std
  86. self.is_scale = is_scale
  87. self.norm_type = norm_type
  88. def __call__(self, im, im_info):
  89. """
  90. Args:
  91. im (np.ndarray): image (np.ndarray)
  92. im_info (dict): info of image
  93. Returns:
  94. im (np.ndarray): processed image (np.ndarray)
  95. im_info (dict): info of processed image
  96. """
  97. im = im.astype(np.float32, copy=False)
  98. if self.is_scale:
  99. scale = 1.0 / 255.0
  100. im *= scale
  101. if self.norm_type == 'mean_std':
  102. mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
  103. std = np.array(self.std)[np.newaxis, np.newaxis, :]
  104. im -= mean
  105. im /= std
  106. return im, im_info
  107. class Permute(object):
  108. """permute image
  109. Args:
  110. to_bgr (bool): whether convert RGB to BGR
  111. channel_first (bool): whether convert HWC to CHW
  112. """
  113. def __init__(self, ):
  114. super(Permute, self).__init__()
  115. def __call__(self, im, im_info):
  116. """
  117. Args:
  118. im (np.ndarray): image (np.ndarray)
  119. im_info (dict): info of image
  120. Returns:
  121. im (np.ndarray): processed image (np.ndarray)
  122. im_info (dict): info of processed image
  123. """
  124. im = im.transpose((2, 0, 1)).copy()
  125. return im, im_info
  126. class PadStride(object):
  127. """ padding image for model with FPN, instead PadBatch(pad_to_stride) in original config
  128. Args:
  129. stride (bool): model with FPN need image shape % stride == 0
  130. """
  131. def __init__(self, stride=0):
  132. self.coarsest_stride = stride
  133. def __call__(self, im, im_info):
  134. """
  135. Args:
  136. im (np.ndarray): image (np.ndarray)
  137. im_info (dict): info of image
  138. Returns:
  139. im (np.ndarray): processed image (np.ndarray)
  140. im_info (dict): info of processed image
  141. """
  142. coarsest_stride = self.coarsest_stride
  143. if coarsest_stride <= 0:
  144. return im, im_info
  145. im_c, im_h, im_w = im.shape
  146. pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
  147. pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
  148. padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
  149. padding_im[:, :im_h, :im_w] = im
  150. return padding_im, im_info
  151. class LetterBoxResize(object):
  152. def __init__(self, target_size):
  153. """
  154. Resize image to target size, convert normalized xywh to pixel xyxy
  155. format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]).
  156. Args:
  157. target_size (int|list): image target size.
  158. """
  159. super(LetterBoxResize, self).__init__()
  160. if isinstance(target_size, int):
  161. target_size = [target_size, target_size]
  162. self.target_size = target_size
  163. def letterbox(self, img, height, width, color=(127.5, 127.5, 127.5)):
  164. # letterbox: resize a rectangular image to a padded rectangular
  165. shape = img.shape[:2] # [height, width]
  166. ratio_h = float(height) / shape[0]
  167. ratio_w = float(width) / shape[1]
  168. ratio = min(ratio_h, ratio_w)
  169. new_shape = (round(shape[1] * ratio),
  170. round(shape[0] * ratio)) # [width, height]
  171. padw = (width - new_shape[0]) / 2
  172. padh = (height - new_shape[1]) / 2
  173. top, bottom = round(padh - 0.1), round(padh + 0.1)
  174. left, right = round(padw - 0.1), round(padw + 0.1)
  175. img = cv2.resize(
  176. img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border
  177. img = cv2.copyMakeBorder(
  178. img, top, bottom, left, right, cv2.BORDER_CONSTANT,
  179. value=color) # padded rectangular
  180. return img, ratio, padw, padh
  181. def __call__(self, im, im_info):
  182. """
  183. Args:
  184. im (np.ndarray): image (np.ndarray)
  185. im_info (dict): info of image
  186. Returns:
  187. im (np.ndarray): processed image (np.ndarray)
  188. im_info (dict): info of processed image
  189. """
  190. assert len(self.target_size) == 2
  191. assert self.target_size[0] > 0 and self.target_size[1] > 0
  192. height, width = self.target_size
  193. h, w = im.shape[:2]
  194. im, ratio, padw, padh = self.letterbox(im, height=height, width=width)
  195. new_shape = [round(h * ratio), round(w * ratio)]
  196. im_info['im_shape'] = np.array(new_shape, dtype=np.float32)
  197. im_info['scale_factor'] = np.array([ratio, ratio], dtype=np.float32)
  198. return im, im_info
  199. class Pad(object):
  200. def __init__(self, size, fill_value=[114.0, 114.0, 114.0]):
  201. """
  202. Pad image to a specified size.
  203. Args:
  204. size (list[int]): image target size
  205. fill_value (list[float]): rgb value of pad area, default (114.0, 114.0, 114.0)
  206. """
  207. super(Pad, self).__init__()
  208. if isinstance(size, int):
  209. size = [size, size]
  210. self.size = size
  211. self.fill_value = fill_value
  212. def __call__(self, im, im_info):
  213. im_h, im_w = im.shape[:2]
  214. h, w = self.size
  215. if h == im_h and w == im_w:
  216. im = im.astype(np.float32)
  217. return im, im_info
  218. canvas = np.ones((h, w, 3), dtype=np.float32)
  219. canvas *= np.array(self.fill_value, dtype=np.float32)
  220. canvas[0:im_h, 0:im_w, :] = im.astype(np.float32)
  221. im = canvas
  222. return im, im_info
  223. def rotate_point(pt, angle_rad):
  224. """Rotate a point by an angle.
  225. Args:
  226. pt (list[float]): 2 dimensional point to be rotated
  227. angle_rad (float): rotation angle by radian
  228. Returns:
  229. list[float]: Rotated point.
  230. """
  231. assert len(pt) == 2
  232. sn, cs = np.sin(angle_rad), np.cos(angle_rad)
  233. new_x = pt[0] * cs - pt[1] * sn
  234. new_y = pt[0] * sn + pt[1] * cs
  235. rotated_pt = [new_x, new_y]
  236. return rotated_pt
  237. def _get_3rd_point(a, b):
  238. """To calculate the affine matrix, three pairs of points are required. This
  239. function is used to get the 3rd point, given 2D points a & b.
  240. The 3rd point is defined by rotating vector `a - b` by 90 degrees
  241. anticlockwise, using b as the rotation center.
  242. Args:
  243. a (np.ndarray): point(x,y)
  244. b (np.ndarray): point(x,y)
  245. Returns:
  246. np.ndarray: The 3rd point.
  247. """
  248. assert len(a) == 2
  249. assert len(b) == 2
  250. direction = a - b
  251. third_pt = b + np.array([-direction[1], direction[0]], dtype=np.float32)
  252. return third_pt
  253. def get_affine_transform(center,
  254. input_size,
  255. rot,
  256. output_size,
  257. shift=(0., 0.),
  258. inv=False):
  259. """Get the affine transform matrix, given the center/scale/rot/output_size.
  260. Args:
  261. center (np.ndarray[2, ]): Center of the bounding box (x, y).
  262. scale (np.ndarray[2, ]): Scale of the bounding box
  263. wrt [width, height].
  264. rot (float): Rotation angle (degree).
  265. output_size (np.ndarray[2, ]): Size of the destination heatmaps.
  266. shift (0-100%): Shift translation ratio wrt the width/height.
  267. Default (0., 0.).
  268. inv (bool): Option to inverse the affine transform direction.
  269. (inv=False: src->dst or inv=True: dst->src)
  270. Returns:
  271. np.ndarray: The transform matrix.
  272. """
  273. assert len(center) == 2
  274. assert len(output_size) == 2
  275. assert len(shift) == 2
  276. if not isinstance(input_size, (np.ndarray, list)):
  277. input_size = np.array([input_size, input_size], dtype=np.float32)
  278. scale_tmp = input_size
  279. shift = np.array(shift)
  280. src_w = scale_tmp[0]
  281. dst_w = output_size[0]
  282. dst_h = output_size[1]
  283. rot_rad = np.pi * rot / 180
  284. src_dir = rotate_point([0., src_w * -0.5], rot_rad)
  285. dst_dir = np.array([0., dst_w * -0.5])
  286. src = np.zeros((3, 2), dtype=np.float32)
  287. src[0, :] = center + scale_tmp * shift
  288. src[1, :] = center + src_dir + scale_tmp * shift
  289. src[2, :] = _get_3rd_point(src[0, :], src[1, :])
  290. dst = np.zeros((3, 2), dtype=np.float32)
  291. dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
  292. dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
  293. dst[2, :] = _get_3rd_point(dst[0, :], dst[1, :])
  294. if inv:
  295. trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
  296. else:
  297. trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
  298. return trans
  299. class WarpAffine(object):
  300. """Warp affine the image
  301. """
  302. def __init__(self,
  303. keep_res=False,
  304. pad=31,
  305. input_h=512,
  306. input_w=512,
  307. scale=0.4,
  308. shift=0.1):
  309. self.keep_res = keep_res
  310. self.pad = pad
  311. self.input_h = input_h
  312. self.input_w = input_w
  313. self.scale = scale
  314. self.shift = shift
  315. def __call__(self, im, im_info):
  316. """
  317. Args:
  318. im (np.ndarray): image (np.ndarray)
  319. im_info (dict): info of image
  320. Returns:
  321. im (np.ndarray): processed image (np.ndarray)
  322. im_info (dict): info of processed image
  323. """
  324. img = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
  325. h, w = img.shape[:2]
  326. if self.keep_res:
  327. input_h = (h | self.pad) + 1
  328. input_w = (w | self.pad) + 1
  329. s = np.array([input_w, input_h], dtype=np.float32)
  330. c = np.array([w // 2, h // 2], dtype=np.float32)
  331. else:
  332. s = max(h, w) * 1.0
  333. input_h, input_w = self.input_h, self.input_w
  334. c = np.array([w / 2., h / 2.], dtype=np.float32)
  335. trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
  336. img = cv2.resize(img, (w, h))
  337. inp = cv2.warpAffine(
  338. img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR)
  339. return inp, im_info
  340. # keypoint preprocess
  341. def get_warp_matrix(theta, size_input, size_dst, size_target):
  342. """This code is based on
  343. https://github.com/open-mmlab/mmpose/blob/master/mmpose/core/post_processing/post_transforms.py
  344. Calculate the transformation matrix under the constraint of unbiased.
  345. Paper ref: Huang et al. The Devil is in the Details: Delving into Unbiased
  346. Data Processing for Human Pose Estimation (CVPR 2020).
  347. Args:
  348. theta (float): Rotation angle in degrees.
  349. size_input (np.ndarray): Size of input image [w, h].
  350. size_dst (np.ndarray): Size of output image [w, h].
  351. size_target (np.ndarray): Size of ROI in input plane [w, h].
  352. Returns:
  353. matrix (np.ndarray): A matrix for transformation.
  354. """
  355. theta = np.deg2rad(theta)
  356. matrix = np.zeros((2, 3), dtype=np.float32)
  357. scale_x = size_dst[0] / size_target[0]
  358. scale_y = size_dst[1] / size_target[1]
  359. matrix[0, 0] = np.cos(theta) * scale_x
  360. matrix[0, 1] = -np.sin(theta) * scale_x
  361. matrix[0, 2] = scale_x * (
  362. -0.5 * size_input[0] * np.cos(theta) + 0.5 * size_input[1] *
  363. np.sin(theta) + 0.5 * size_target[0])
  364. matrix[1, 0] = np.sin(theta) * scale_y
  365. matrix[1, 1] = np.cos(theta) * scale_y
  366. matrix[1, 2] = scale_y * (
  367. -0.5 * size_input[0] * np.sin(theta) - 0.5 * size_input[1] *
  368. np.cos(theta) + 0.5 * size_target[1])
  369. return matrix
  370. class TopDownEvalAffine(object):
  371. """apply affine transform to image and coords
  372. Args:
  373. trainsize (list): [w, h], the standard size used to train
  374. use_udp (bool): whether to use Unbiased Data Processing.
  375. records(dict): the dict contained the image and coords
  376. Returns:
  377. records (dict): contain the image and coords after tranformed
  378. """
  379. def __init__(self, trainsize, use_udp=False):
  380. self.trainsize = trainsize
  381. self.use_udp = use_udp
  382. def __call__(self, image, im_info):
  383. rot = 0
  384. imshape = im_info['im_shape'][::-1]
  385. center = im_info['center'] if 'center' in im_info else imshape / 2.
  386. scale = im_info['scale'] if 'scale' in im_info else imshape
  387. if self.use_udp:
  388. trans = get_warp_matrix(
  389. rot, center * 2.0,
  390. [self.trainsize[0] - 1.0, self.trainsize[1] - 1.0], scale)
  391. image = cv2.warpAffine(
  392. image,
  393. trans, (int(self.trainsize[0]), int(self.trainsize[1])),
  394. flags=cv2.INTER_LINEAR)
  395. else:
  396. trans = get_affine_transform(center, scale, rot, self.trainsize)
  397. image = cv2.warpAffine(
  398. image,
  399. trans, (int(self.trainsize[0]), int(self.trainsize[1])),
  400. flags=cv2.INTER_LINEAR)
  401. return image, im_info
  402. class Compose:
  403. def __init__(self, transforms):
  404. self.transforms = []
  405. for op_info in transforms:
  406. new_op_info = op_info.copy()
  407. op_type = new_op_info.pop('type')
  408. self.transforms.append(eval(op_type)(**new_op_info))
  409. def __call__(self, img):
  410. img, im_info = decode_image(img)
  411. for t in self.transforms:
  412. img, im_info = t(img, im_info)
  413. inputs = copy.deepcopy(im_info)
  414. inputs['image'] = img
  415. return inputs