preprocess.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565
  1. import numpy as np
  2. import cv2
  3. import copy
  4. def decode_image(img_path):
  5. with open(img_path, 'rb') as f:
  6. im_read = f.read()
  7. data = np.frombuffer(im_read, dtype='uint8')
  8. im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
  9. im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
  10. img_info = {
  11. "im_shape": np.array(
  12. im.shape[:2], dtype=np.float32),
  13. "scale_factor": np.array(
  14. [1., 1.], dtype=np.float32)
  15. }
  16. return im, img_info
  17. class Resize(object):
  18. """resize image by target_size and max_size
  19. Args:
  20. target_size (int): the target size of image
  21. keep_ratio (bool): whether keep_ratio or not, default true
  22. interp (int): method of resize
  23. """
  24. def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR):
  25. if isinstance(target_size, int):
  26. target_size = [target_size, target_size]
  27. self.target_size = target_size
  28. self.keep_ratio = keep_ratio
  29. self.interp = interp
  30. def __call__(self, im, im_info):
  31. """
  32. Args:
  33. im (np.ndarray): image (np.ndarray)
  34. im_info (dict): info of image
  35. Returns:
  36. im (np.ndarray): processed image (np.ndarray)
  37. im_info (dict): info of processed image
  38. """
  39. assert len(self.target_size) == 2
  40. assert self.target_size[0] > 0 and self.target_size[1] > 0
  41. im_channel = im.shape[2]
  42. im_scale_y, im_scale_x = self.generate_scale(im)
  43. im = cv2.resize(
  44. im,
  45. None,
  46. None,
  47. fx=im_scale_x,
  48. fy=im_scale_y,
  49. interpolation=self.interp)
  50. im_info['im_shape'] = np.array(im.shape[:2]).astype('float32')
  51. im_info['scale_factor'] = np.array(
  52. [im_scale_y, im_scale_x]).astype('float32')
  53. return im, im_info
  54. def generate_scale(self, im):
  55. """
  56. Args:
  57. im (np.ndarray): image (np.ndarray)
  58. Returns:
  59. im_scale_x: the resize ratio of X
  60. im_scale_y: the resize ratio of Y
  61. """
  62. origin_shape = im.shape[:2]
  63. im_c = im.shape[2]
  64. if self.keep_ratio:
  65. im_size_min = np.min(origin_shape)
  66. im_size_max = np.max(origin_shape)
  67. target_size_min = np.min(self.target_size)
  68. target_size_max = np.max(self.target_size)
  69. im_scale = float(target_size_min) / float(im_size_min)
  70. if np.round(im_scale * im_size_max) > target_size_max:
  71. im_scale = float(target_size_max) / float(im_size_max)
  72. im_scale_x = im_scale
  73. im_scale_y = im_scale
  74. else:
  75. resize_h, resize_w = self.target_size
  76. im_scale_y = resize_h / float(origin_shape[0])
  77. im_scale_x = resize_w / float(origin_shape[1])
  78. return im_scale_y, im_scale_x
  79. class NormalizeImage(object):
  80. """normalize image
  81. Args:
  82. mean (list): im - mean
  83. std (list): im / std
  84. is_scale (bool): whether need im / 255
  85. norm_type (str): type in ['mean_std', 'none']
  86. """
  87. def __init__(self, mean, std, is_scale=True, norm_type='mean_std'):
  88. self.mean = mean
  89. self.std = std
  90. self.is_scale = is_scale
  91. self.norm_type = norm_type
  92. def __call__(self, im, im_info):
  93. """
  94. Args:
  95. im (np.ndarray): image (np.ndarray)
  96. im_info (dict): info of image
  97. Returns:
  98. im (np.ndarray): processed image (np.ndarray)
  99. im_info (dict): info of processed image
  100. """
  101. im = im.astype(np.float32, copy=False)
  102. if self.is_scale:
  103. scale = 1.0 / 255.0
  104. im *= scale
  105. if self.norm_type == 'mean_std':
  106. mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
  107. std = np.array(self.std)[np.newaxis, np.newaxis, :]
  108. im -= mean
  109. im /= std
  110. return im, im_info
  111. class Permute(object):
  112. """permute image
  113. Args:
  114. to_bgr (bool): whether convert RGB to BGR
  115. channel_first (bool): whether convert HWC to CHW
  116. """
  117. def __init__(self, ):
  118. super(Permute, self).__init__()
  119. def __call__(self, im, im_info):
  120. """
  121. Args:
  122. im (np.ndarray): image (np.ndarray)
  123. im_info (dict): info of image
  124. Returns:
  125. im (np.ndarray): processed image (np.ndarray)
  126. im_info (dict): info of processed image
  127. """
  128. im = im.transpose((2, 0, 1)).copy()
  129. return im, im_info
  130. class PadStride(object):
  131. """ padding image for model with FPN, instead PadBatch(pad_to_stride) in original config
  132. Args:
  133. stride (bool): model with FPN need image shape % stride == 0
  134. """
  135. def __init__(self, stride=0):
  136. self.coarsest_stride = stride
  137. def __call__(self, im, im_info):
  138. """
  139. Args:
  140. im (np.ndarray): image (np.ndarray)
  141. im_info (dict): info of image
  142. Returns:
  143. im (np.ndarray): processed image (np.ndarray)
  144. im_info (dict): info of processed image
  145. """
  146. coarsest_stride = self.coarsest_stride
  147. if coarsest_stride <= 0:
  148. return im, im_info
  149. im_c, im_h, im_w = im.shape
  150. pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
  151. pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
  152. padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
  153. padding_im[:, :im_h, :im_w] = im
  154. return padding_im, im_info
  155. class LetterBoxResize(object):
  156. def __init__(self, target_size):
  157. """
  158. Resize image to target size, convert normalized xywh to pixel xyxy
  159. format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]).
  160. Args:
  161. target_size (int|list): image target size.
  162. """
  163. super(LetterBoxResize, self).__init__()
  164. if isinstance(target_size, int):
  165. target_size = [target_size, target_size]
  166. self.target_size = target_size
  167. def letterbox(self, img, height, width, color=(127.5, 127.5, 127.5)):
  168. # letterbox: resize a rectangular image to a padded rectangular
  169. shape = img.shape[:2] # [height, width]
  170. ratio_h = float(height) / shape[0]
  171. ratio_w = float(width) / shape[1]
  172. ratio = min(ratio_h, ratio_w)
  173. new_shape = (round(shape[1] * ratio),
  174. round(shape[0] * ratio)) # [width, height]
  175. padw = (width - new_shape[0]) / 2
  176. padh = (height - new_shape[1]) / 2
  177. top, bottom = round(padh - 0.1), round(padh + 0.1)
  178. left, right = round(padw - 0.1), round(padw + 0.1)
  179. img = cv2.resize(
  180. img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border
  181. img = cv2.copyMakeBorder(
  182. img, top, bottom, left, right, cv2.BORDER_CONSTANT,
  183. value=color) # padded rectangular
  184. return img, ratio, padw, padh
  185. def __call__(self, im, im_info):
  186. """
  187. Args:
  188. im (np.ndarray): image (np.ndarray)
  189. im_info (dict): info of image
  190. Returns:
  191. im (np.ndarray): processed image (np.ndarray)
  192. im_info (dict): info of processed image
  193. """
  194. assert len(self.target_size) == 2
  195. assert self.target_size[0] > 0 and self.target_size[1] > 0
  196. height, width = self.target_size
  197. h, w = im.shape[:2]
  198. im, ratio, padw, padh = self.letterbox(im, height=height, width=width)
  199. new_shape = [round(h * ratio), round(w * ratio)]
  200. im_info['im_shape'] = np.array(new_shape, dtype=np.float32)
  201. im_info['scale_factor'] = np.array([ratio, ratio], dtype=np.float32)
  202. return im, im_info
  203. class Pad(object):
  204. def __init__(self, size, fill_value=[114.0, 114.0, 114.0]):
  205. """
  206. Pad image to a specified size.
  207. Args:
  208. size (list[int]): image target size
  209. fill_value (list[float]): rgb value of pad area, default (114.0, 114.0, 114.0)
  210. """
  211. super(Pad, self).__init__()
  212. if isinstance(size, int):
  213. size = [size, size]
  214. self.size = size
  215. self.fill_value = fill_value
  216. def __call__(self, im, im_info):
  217. im_h, im_w = im.shape[:2]
  218. h, w = self.size
  219. if h == im_h and w == im_w:
  220. im = im.astype(np.float32)
  221. return im, im_info
  222. canvas = np.ones((h, w, 3), dtype=np.float32)
  223. canvas *= np.array(self.fill_value, dtype=np.float32)
  224. canvas[0:im_h, 0:im_w, :] = im.astype(np.float32)
  225. im = canvas
  226. return im, im_info
  227. def rotate_point(pt, angle_rad):
  228. """Rotate a point by an angle.
  229. Args:
  230. pt (list[float]): 2 dimensional point to be rotated
  231. angle_rad (float): rotation angle by radian
  232. Returns:
  233. list[float]: Rotated point.
  234. """
  235. assert len(pt) == 2
  236. sn, cs = np.sin(angle_rad), np.cos(angle_rad)
  237. new_x = pt[0] * cs - pt[1] * sn
  238. new_y = pt[0] * sn + pt[1] * cs
  239. rotated_pt = [new_x, new_y]
  240. return rotated_pt
  241. def _get_3rd_point(a, b):
  242. """To calculate the affine matrix, three pairs of points are required. This
  243. function is used to get the 3rd point, given 2D points a & b.
  244. The 3rd point is defined by rotating vector `a - b` by 90 degrees
  245. anticlockwise, using b as the rotation center.
  246. Args:
  247. a (np.ndarray): point(x,y)
  248. b (np.ndarray): point(x,y)
  249. Returns:
  250. np.ndarray: The 3rd point.
  251. """
  252. assert len(a) == 2
  253. assert len(b) == 2
  254. direction = a - b
  255. third_pt = b + np.array([-direction[1], direction[0]], dtype=np.float32)
  256. return third_pt
  257. def get_affine_transform(center,
  258. input_size,
  259. rot,
  260. output_size,
  261. shift=(0., 0.),
  262. inv=False):
  263. """Get the affine transform matrix, given the center/scale/rot/output_size.
  264. Args:
  265. center (np.ndarray[2, ]): Center of the bounding box (x, y).
  266. scale (np.ndarray[2, ]): Scale of the bounding box
  267. wrt [width, height].
  268. rot (float): Rotation angle (degree).
  269. output_size (np.ndarray[2, ]): Size of the destination heatmaps.
  270. shift (0-100%): Shift translation ratio wrt the width/height.
  271. Default (0., 0.).
  272. inv (bool): Option to inverse the affine transform direction.
  273. (inv=False: src->dst or inv=True: dst->src)
  274. Returns:
  275. np.ndarray: The transform matrix.
  276. """
  277. assert len(center) == 2
  278. assert len(output_size) == 2
  279. assert len(shift) == 2
  280. if not isinstance(input_size, (np.ndarray, list)):
  281. input_size = np.array([input_size, input_size], dtype=np.float32)
  282. scale_tmp = input_size
  283. shift = np.array(shift)
  284. src_w = scale_tmp[0]
  285. dst_w = output_size[0]
  286. dst_h = output_size[1]
  287. rot_rad = np.pi * rot / 180
  288. src_dir = rotate_point([0., src_w * -0.5], rot_rad)
  289. dst_dir = np.array([0., dst_w * -0.5])
  290. src = np.zeros((3, 2), dtype=np.float32)
  291. src[0, :] = center + scale_tmp * shift
  292. src[1, :] = center + src_dir + scale_tmp * shift
  293. src[2, :] = _get_3rd_point(src[0, :], src[1, :])
  294. dst = np.zeros((3, 2), dtype=np.float32)
  295. dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
  296. dst[1, :] = np.array([dst_w * 0.5, dst_h * 0.5]) + dst_dir
  297. dst[2, :] = _get_3rd_point(dst[0, :], dst[1, :])
  298. if inv:
  299. trans = cv2.getAffineTransform(np.float32(dst), np.float32(src))
  300. else:
  301. trans = cv2.getAffineTransform(np.float32(src), np.float32(dst))
  302. return trans
  303. class WarpAffine(object):
  304. """Warp affine the image
  305. """
  306. def __init__(self,
  307. keep_res=False,
  308. pad=31,
  309. input_h=512,
  310. input_w=512,
  311. scale=0.4,
  312. shift=0.1):
  313. self.keep_res = keep_res
  314. self.pad = pad
  315. self.input_h = input_h
  316. self.input_w = input_w
  317. self.scale = scale
  318. self.shift = shift
  319. def __call__(self, im, im_info):
  320. """
  321. Args:
  322. im (np.ndarray): image (np.ndarray)
  323. im_info (dict): info of image
  324. Returns:
  325. im (np.ndarray): processed image (np.ndarray)
  326. im_info (dict): info of processed image
  327. """
  328. img = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
  329. h, w = img.shape[:2]
  330. if self.keep_res:
  331. input_h = (h | self.pad) + 1
  332. input_w = (w | self.pad) + 1
  333. s = np.array([input_w, input_h], dtype=np.float32)
  334. c = np.array([w // 2, h // 2], dtype=np.float32)
  335. else:
  336. s = max(h, w) * 1.0
  337. input_h, input_w = self.input_h, self.input_w
  338. c = np.array([w / 2., h / 2.], dtype=np.float32)
  339. trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
  340. img = cv2.resize(img, (w, h))
  341. inp = cv2.warpAffine(
  342. img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR)
  343. return inp, im_info
  344. # keypoint preprocess
  345. def get_warp_matrix(theta, size_input, size_dst, size_target):
  346. """This code is based on
  347. https://github.com/open-mmlab/mmpose/blob/master/mmpose/core/post_processing/post_transforms.py
  348. Calculate the transformation matrix under the constraint of unbiased.
  349. Paper ref: Huang et al. The Devil is in the Details: Delving into Unbiased
  350. Data Processing for Human Pose Estimation (CVPR 2020).
  351. Args:
  352. theta (float): Rotation angle in degrees.
  353. size_input (np.ndarray): Size of input image [w, h].
  354. size_dst (np.ndarray): Size of output image [w, h].
  355. size_target (np.ndarray): Size of ROI in input plane [w, h].
  356. Returns:
  357. matrix (np.ndarray): A matrix for transformation.
  358. """
  359. theta = np.deg2rad(theta)
  360. matrix = np.zeros((2, 3), dtype=np.float32)
  361. scale_x = size_dst[0] / size_target[0]
  362. scale_y = size_dst[1] / size_target[1]
  363. matrix[0, 0] = np.cos(theta) * scale_x
  364. matrix[0, 1] = -np.sin(theta) * scale_x
  365. matrix[0, 2] = scale_x * (
  366. -0.5 * size_input[0] * np.cos(theta) + 0.5 * size_input[1] *
  367. np.sin(theta) + 0.5 * size_target[0])
  368. matrix[1, 0] = np.sin(theta) * scale_y
  369. matrix[1, 1] = np.cos(theta) * scale_y
  370. matrix[1, 2] = scale_y * (
  371. -0.5 * size_input[0] * np.sin(theta) - 0.5 * size_input[1] *
  372. np.cos(theta) + 0.5 * size_target[1])
  373. return matrix
  374. class TopDownEvalAffine(object):
  375. """apply affine transform to image and coords
  376. Args:
  377. trainsize (list): [w, h], the standard size used to train
  378. use_udp (bool): whether to use Unbiased Data Processing.
  379. records(dict): the dict contained the image and coords
  380. Returns:
  381. records (dict): contain the image and coords after tranformed
  382. """
  383. def __init__(self, trainsize, use_udp=False):
  384. self.trainsize = trainsize
  385. self.use_udp = use_udp
  386. def __call__(self, image, im_info):
  387. rot = 0
  388. imshape = im_info['im_shape'][::-1]
  389. center = im_info['center'] if 'center' in im_info else imshape / 2.
  390. scale = im_info['scale'] if 'scale' in im_info else imshape
  391. if self.use_udp:
  392. trans = get_warp_matrix(
  393. rot, center * 2.0,
  394. [self.trainsize[0] - 1.0, self.trainsize[1] - 1.0], scale)
  395. image = cv2.warpAffine(
  396. image,
  397. trans, (int(self.trainsize[0]), int(self.trainsize[1])),
  398. flags=cv2.INTER_LINEAR)
  399. else:
  400. trans = get_affine_transform(center, scale, rot, self.trainsize)
  401. image = cv2.warpAffine(
  402. image,
  403. trans, (int(self.trainsize[0]), int(self.trainsize[1])),
  404. flags=cv2.INTER_LINEAR)
  405. return image, im_info
  406. class Compose:
  407. def __init__(self, transforms):
  408. self.transforms = []
  409. for op_info in transforms:
  410. new_op_info = op_info.copy()
  411. op_type = new_op_info.pop('type')
  412. self.transforms.append(eval(op_type)(**new_op_info))
  413. def __call__(self, img_path):
  414. img, im_info = decode_image(img_path)
  415. for t in self.transforms:
  416. img, im_info = t(img, im_info)
  417. inputs = copy.deepcopy(im_info)
  418. inputs['image'] = np.ascontiguousarray(img.astype('float32'))
  419. return inputs
  420. coco_clsid2catid = {
  421. 0: 1,
  422. 1: 2,
  423. 2: 3,
  424. 3: 4,
  425. 4: 5,
  426. 5: 6,
  427. 6: 7,
  428. 7: 8,
  429. 8: 9,
  430. 9: 10,
  431. 10: 11,
  432. 11: 13,
  433. 12: 14,
  434. 13: 15,
  435. 14: 16,
  436. 15: 17,
  437. 16: 18,
  438. 17: 19,
  439. 18: 20,
  440. 19: 21,
  441. 20: 22,
  442. 21: 23,
  443. 22: 24,
  444. 23: 25,
  445. 24: 27,
  446. 25: 28,
  447. 26: 31,
  448. 27: 32,
  449. 28: 33,
  450. 29: 34,
  451. 30: 35,
  452. 31: 36,
  453. 32: 37,
  454. 33: 38,
  455. 34: 39,
  456. 35: 40,
  457. 36: 41,
  458. 37: 42,
  459. 38: 43,
  460. 39: 44,
  461. 40: 46,
  462. 41: 47,
  463. 42: 48,
  464. 43: 49,
  465. 44: 50,
  466. 45: 51,
  467. 46: 52,
  468. 47: 53,
  469. 48: 54,
  470. 49: 55,
  471. 50: 56,
  472. 51: 57,
  473. 52: 58,
  474. 53: 59,
  475. 54: 60,
  476. 55: 61,
  477. 56: 62,
  478. 57: 63,
  479. 58: 64,
  480. 59: 65,
  481. 60: 67,
  482. 61: 70,
  483. 62: 72,
  484. 63: 73,
  485. 64: 74,
  486. 65: 75,
  487. 66: 76,
  488. 67: 77,
  489. 68: 78,
  490. 69: 79,
  491. 70: 80,
  492. 71: 81,
  493. 72: 82,
  494. 73: 84,
  495. 74: 85,
  496. 75: 86,
  497. 76: 87,
  498. 77: 88,
  499. 78: 89,
  500. 79: 90
  501. }