preprocess.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import cv2
  15. import numpy as np
  16. from keypoint_preprocess import get_affine_transform
  17. from PIL import Image
  18. def decode_image(im_file, im_info):
  19. """read rgb image
  20. Args:
  21. im_file (str|np.ndarray): input can be image path or np.ndarray
  22. im_info (dict): info of image
  23. Returns:
  24. im (np.ndarray): processed image (np.ndarray)
  25. im_info (dict): info of processed image
  26. """
  27. if isinstance(im_file, str):
  28. with open(im_file, 'rb') as f:
  29. im_read = f.read()
  30. data = np.frombuffer(im_read, dtype='uint8')
  31. im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
  32. im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
  33. else:
  34. im = im_file
  35. im_info['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
  36. im_info['scale_factor'] = np.array([1., 1.], dtype=np.float32)
  37. return im, im_info
  38. class Resize_Mult32(object):
  39. """resize image by target_size and max_size
  40. Args:
  41. target_size (int): the target size of image
  42. keep_ratio (bool): whether keep_ratio or not, default true
  43. interp (int): method of resize
  44. """
  45. def __init__(self, limit_side_len, limit_type, interp=cv2.INTER_LINEAR):
  46. self.limit_side_len = limit_side_len
  47. self.limit_type = limit_type
  48. self.interp = interp
  49. def __call__(self, im, im_info):
  50. """
  51. Args:
  52. im (np.ndarray): image (np.ndarray)
  53. im_info (dict): info of image
  54. Returns:
  55. im (np.ndarray): processed image (np.ndarray)
  56. im_info (dict): info of processed image
  57. """
  58. im_channel = im.shape[2]
  59. im_scale_y, im_scale_x = self.generate_scale(im)
  60. im = cv2.resize(
  61. im,
  62. None,
  63. None,
  64. fx=im_scale_x,
  65. fy=im_scale_y,
  66. interpolation=self.interp)
  67. im_info['im_shape'] = np.array(im.shape[:2]).astype('float32')
  68. im_info['scale_factor'] = np.array(
  69. [im_scale_y, im_scale_x]).astype('float32')
  70. return im, im_info
  71. def generate_scale(self, img):
  72. """
  73. Args:
  74. img (np.ndarray): image (np.ndarray)
  75. Returns:
  76. im_scale_x: the resize ratio of X
  77. im_scale_y: the resize ratio of Y
  78. """
  79. limit_side_len = self.limit_side_len
  80. h, w, c = img.shape
  81. # limit the max side
  82. if self.limit_type == 'max':
  83. if h > w:
  84. ratio = float(limit_side_len) / h
  85. else:
  86. ratio = float(limit_side_len) / w
  87. elif self.limit_type == 'min':
  88. if h < w:
  89. ratio = float(limit_side_len) / h
  90. else:
  91. ratio = float(limit_side_len) / w
  92. elif self.limit_type == 'resize_long':
  93. ratio = float(limit_side_len) / max(h, w)
  94. else:
  95. raise Exception('not support limit type, image ')
  96. resize_h = int(h * ratio)
  97. resize_w = int(w * ratio)
  98. resize_h = max(int(round(resize_h / 32) * 32), 32)
  99. resize_w = max(int(round(resize_w / 32) * 32), 32)
  100. im_scale_y = resize_h / float(h)
  101. im_scale_x = resize_w / float(w)
  102. return im_scale_y, im_scale_x
  103. class Resize(object):
  104. """resize image by target_size and max_size
  105. Args:
  106. target_size (int): the target size of image
  107. keep_ratio (bool): whether keep_ratio or not, default true
  108. interp (int): method of resize
  109. """
  110. def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR):
  111. if isinstance(target_size, int):
  112. target_size = [target_size, target_size]
  113. self.target_size = target_size
  114. self.keep_ratio = keep_ratio
  115. self.interp = interp
  116. def __call__(self, im, im_info):
  117. """
  118. Args:
  119. im (np.ndarray): image (np.ndarray)
  120. im_info (dict): info of image
  121. Returns:
  122. im (np.ndarray): processed image (np.ndarray)
  123. im_info (dict): info of processed image
  124. """
  125. assert len(self.target_size) == 2
  126. assert self.target_size[0] > 0 and self.target_size[1] > 0
  127. im_channel = im.shape[2]
  128. im_scale_y, im_scale_x = self.generate_scale(im)
  129. im = cv2.resize(
  130. im,
  131. None,
  132. None,
  133. fx=im_scale_x,
  134. fy=im_scale_y,
  135. interpolation=self.interp)
  136. im_info['im_shape'] = np.array(im.shape[:2]).astype('float32')
  137. im_info['scale_factor'] = np.array(
  138. [im_scale_y, im_scale_x]).astype('float32')
  139. return im, im_info
  140. def generate_scale(self, im):
  141. """
  142. Args:
  143. im (np.ndarray): image (np.ndarray)
  144. Returns:
  145. im_scale_x: the resize ratio of X
  146. im_scale_y: the resize ratio of Y
  147. """
  148. origin_shape = im.shape[:2]
  149. im_c = im.shape[2]
  150. if self.keep_ratio:
  151. im_size_min = np.min(origin_shape)
  152. im_size_max = np.max(origin_shape)
  153. target_size_min = np.min(self.target_size)
  154. target_size_max = np.max(self.target_size)
  155. im_scale = float(target_size_min) / float(im_size_min)
  156. if np.round(im_scale * im_size_max) > target_size_max:
  157. im_scale = float(target_size_max) / float(im_size_max)
  158. im_scale_x = im_scale
  159. im_scale_y = im_scale
  160. else:
  161. resize_h, resize_w = self.target_size
  162. im_scale_y = resize_h / float(origin_shape[0])
  163. im_scale_x = resize_w / float(origin_shape[1])
  164. return im_scale_y, im_scale_x
  165. class ShortSizeScale(object):
  166. """
  167. Scale images by short size.
  168. Args:
  169. short_size(float | int): Short size of an image will be scaled to the short_size.
  170. fixed_ratio(bool): Set whether to zoom according to a fixed ratio. default: True
  171. do_round(bool): Whether to round up when calculating the zoom ratio. default: False
  172. backend(str): Choose pillow or cv2 as the graphics processing backend. default: 'pillow'
  173. """
  174. def __init__(self,
  175. short_size,
  176. fixed_ratio=True,
  177. keep_ratio=None,
  178. do_round=False,
  179. backend='pillow'):
  180. self.short_size = short_size
  181. assert (fixed_ratio and not keep_ratio) or (
  182. not fixed_ratio
  183. ), "fixed_ratio and keep_ratio cannot be true at the same time"
  184. self.fixed_ratio = fixed_ratio
  185. self.keep_ratio = keep_ratio
  186. self.do_round = do_round
  187. assert backend in [
  188. 'pillow', 'cv2'
  189. ], "Scale's backend must be pillow or cv2, but get {backend}"
  190. self.backend = backend
  191. def __call__(self, img):
  192. """
  193. Performs resize operations.
  194. Args:
  195. img (PIL.Image): a PIL.Image.
  196. return:
  197. resized_img: a PIL.Image after scaling.
  198. """
  199. result_img = None
  200. if isinstance(img, np.ndarray):
  201. h, w, _ = img.shape
  202. elif isinstance(img, Image.Image):
  203. w, h = img.size
  204. else:
  205. raise NotImplementedError
  206. if w <= h:
  207. ow = self.short_size
  208. if self.fixed_ratio: # default is True
  209. oh = int(self.short_size * 4.0 / 3.0)
  210. elif not self.keep_ratio: # no
  211. oh = self.short_size
  212. else:
  213. scale_factor = self.short_size / w
  214. oh = int(h * float(scale_factor) +
  215. 0.5) if self.do_round else int(h * self.short_size / w)
  216. ow = int(w * float(scale_factor) +
  217. 0.5) if self.do_round else int(w * self.short_size / h)
  218. else:
  219. oh = self.short_size
  220. if self.fixed_ratio:
  221. ow = int(self.short_size * 4.0 / 3.0)
  222. elif not self.keep_ratio: # no
  223. ow = self.short_size
  224. else:
  225. scale_factor = self.short_size / h
  226. oh = int(h * float(scale_factor) +
  227. 0.5) if self.do_round else int(h * self.short_size / w)
  228. ow = int(w * float(scale_factor) +
  229. 0.5) if self.do_round else int(w * self.short_size / h)
  230. if type(img) == np.ndarray:
  231. img = Image.fromarray(img, mode='RGB')
  232. if self.backend == 'pillow':
  233. result_img = img.resize((ow, oh), Image.BILINEAR)
  234. elif self.backend == 'cv2' and (self.keep_ratio is not None):
  235. result_img = cv2.resize(
  236. img, (ow, oh), interpolation=cv2.INTER_LINEAR)
  237. else:
  238. result_img = Image.fromarray(
  239. cv2.resize(
  240. np.asarray(img), (ow, oh), interpolation=cv2.INTER_LINEAR))
  241. return result_img
  242. class NormalizeImage(object):
  243. """normalize image
  244. Args:
  245. mean (list): im - mean
  246. std (list): im / std
  247. is_scale (bool): whether need im / 255
  248. norm_type (str): type in ['mean_std', 'none']
  249. """
  250. def __init__(self, mean, std, is_scale=True, norm_type='mean_std'):
  251. self.mean = mean
  252. self.std = std
  253. self.is_scale = is_scale
  254. self.norm_type = norm_type
  255. def __call__(self, im, im_info):
  256. """
  257. Args:
  258. im (np.ndarray): image (np.ndarray)
  259. im_info (dict): info of image
  260. Returns:
  261. im (np.ndarray): processed image (np.ndarray)
  262. im_info (dict): info of processed image
  263. """
  264. im = im.astype(np.float32, copy=False)
  265. if self.is_scale:
  266. scale = 1.0 / 255.0
  267. im *= scale
  268. if self.norm_type == 'mean_std':
  269. mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
  270. std = np.array(self.std)[np.newaxis, np.newaxis, :]
  271. im -= mean
  272. im /= std
  273. return im, im_info
  274. class Permute(object):
  275. """permute image
  276. Args:
  277. to_bgr (bool): whether convert RGB to BGR
  278. channel_first (bool): whether convert HWC to CHW
  279. """
  280. def __init__(self, ):
  281. super(Permute, self).__init__()
  282. def __call__(self, im, im_info):
  283. """
  284. Args:
  285. im (np.ndarray): image (np.ndarray)
  286. im_info (dict): info of image
  287. Returns:
  288. im (np.ndarray): processed image (np.ndarray)
  289. im_info (dict): info of processed image
  290. """
  291. im = im.transpose((2, 0, 1)).copy()
  292. return im, im_info
  293. class PadStride(object):
  294. """ padding image for model with FPN, instead PadBatch(pad_to_stride) in original config
  295. Args:
  296. stride (bool): model with FPN need image shape % stride == 0
  297. """
  298. def __init__(self, stride=0):
  299. self.coarsest_stride = stride
  300. def __call__(self, im, im_info):
  301. """
  302. Args:
  303. im (np.ndarray): image (np.ndarray)
  304. im_info (dict): info of image
  305. Returns:
  306. im (np.ndarray): processed image (np.ndarray)
  307. im_info (dict): info of processed image
  308. """
  309. coarsest_stride = self.coarsest_stride
  310. if coarsest_stride <= 0:
  311. return im, im_info
  312. im_c, im_h, im_w = im.shape
  313. pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
  314. pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
  315. padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
  316. padding_im[:, :im_h, :im_w] = im
  317. return padding_im, im_info
  318. class LetterBoxResize(object):
  319. def __init__(self, target_size):
  320. """
  321. Resize image to target size, convert normalized xywh to pixel xyxy
  322. format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]).
  323. Args:
  324. target_size (int|list): image target size.
  325. """
  326. super(LetterBoxResize, self).__init__()
  327. if isinstance(target_size, int):
  328. target_size = [target_size, target_size]
  329. self.target_size = target_size
  330. def letterbox(self, img, height, width, color=(127.5, 127.5, 127.5)):
  331. # letterbox: resize a rectangular image to a padded rectangular
  332. shape = img.shape[:2] # [height, width]
  333. ratio_h = float(height) / shape[0]
  334. ratio_w = float(width) / shape[1]
  335. ratio = min(ratio_h, ratio_w)
  336. new_shape = (round(shape[1] * ratio),
  337. round(shape[0] * ratio)) # [width, height]
  338. padw = (width - new_shape[0]) / 2
  339. padh = (height - new_shape[1]) / 2
  340. top, bottom = round(padh - 0.1), round(padh + 0.1)
  341. left, right = round(padw - 0.1), round(padw + 0.1)
  342. img = cv2.resize(
  343. img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border
  344. img = cv2.copyMakeBorder(
  345. img, top, bottom, left, right, cv2.BORDER_CONSTANT,
  346. value=color) # padded rectangular
  347. return img, ratio, padw, padh
  348. def __call__(self, im, im_info):
  349. """
  350. Args:
  351. im (np.ndarray): image (np.ndarray)
  352. im_info (dict): info of image
  353. Returns:
  354. im (np.ndarray): processed image (np.ndarray)
  355. im_info (dict): info of processed image
  356. """
  357. assert len(self.target_size) == 2
  358. assert self.target_size[0] > 0 and self.target_size[1] > 0
  359. height, width = self.target_size
  360. h, w = im.shape[:2]
  361. im, ratio, padw, padh = self.letterbox(im, height=height, width=width)
  362. new_shape = [round(h * ratio), round(w * ratio)]
  363. im_info['im_shape'] = np.array(new_shape, dtype=np.float32)
  364. im_info['scale_factor'] = np.array([ratio, ratio], dtype=np.float32)
  365. return im, im_info
  366. class Pad(object):
  367. def __init__(self, size, fill_value=[114.0, 114.0, 114.0]):
  368. """
  369. Pad image to a specified size.
  370. Args:
  371. size (list[int]): image target size
  372. fill_value (list[float]): rgb value of pad area, default (114.0, 114.0, 114.0)
  373. """
  374. super(Pad, self).__init__()
  375. if isinstance(size, int):
  376. size = [size, size]
  377. self.size = size
  378. self.fill_value = fill_value
  379. def __call__(self, im, im_info):
  380. im_h, im_w = im.shape[:2]
  381. h, w = self.size
  382. if h == im_h and w == im_w:
  383. im = im.astype(np.float32)
  384. return im, im_info
  385. canvas = np.ones((h, w, 3), dtype=np.float32)
  386. canvas *= np.array(self.fill_value, dtype=np.float32)
  387. canvas[0:im_h, 0:im_w, :] = im.astype(np.float32)
  388. im = canvas
  389. return im, im_info
  390. class WarpAffine(object):
  391. """Warp affine the image
  392. """
  393. def __init__(self,
  394. keep_res=False,
  395. pad=31,
  396. input_h=512,
  397. input_w=512,
  398. scale=0.4,
  399. shift=0.1,
  400. down_ratio=4):
  401. self.keep_res = keep_res
  402. self.pad = pad
  403. self.input_h = input_h
  404. self.input_w = input_w
  405. self.scale = scale
  406. self.shift = shift
  407. self.down_ratio = down_ratio
  408. def __call__(self, im, im_info):
  409. """
  410. Args:
  411. im (np.ndarray): image (np.ndarray)
  412. im_info (dict): info of image
  413. Returns:
  414. im (np.ndarray): processed image (np.ndarray)
  415. im_info (dict): info of processed image
  416. """
  417. img = cv2.cvtColor(im, cv2.COLOR_RGB2BGR)
  418. h, w = img.shape[:2]
  419. if self.keep_res:
  420. # True in detection eval/infer
  421. input_h = (h | self.pad) + 1
  422. input_w = (w | self.pad) + 1
  423. s = np.array([input_w, input_h], dtype=np.float32)
  424. c = np.array([w // 2, h // 2], dtype=np.float32)
  425. else:
  426. # False in centertrack eval_mot/eval_mot
  427. s = max(h, w) * 1.0
  428. input_h, input_w = self.input_h, self.input_w
  429. c = np.array([w / 2., h / 2.], dtype=np.float32)
  430. trans_input = get_affine_transform(c, s, 0, [input_w, input_h])
  431. img = cv2.resize(img, (w, h))
  432. inp = cv2.warpAffine(
  433. img, trans_input, (input_w, input_h), flags=cv2.INTER_LINEAR)
  434. if not self.keep_res:
  435. out_h = input_h // self.down_ratio
  436. out_w = input_w // self.down_ratio
  437. trans_output = get_affine_transform(c, s, 0, [out_w, out_h])
  438. im_info.update({
  439. 'center': c,
  440. 'scale': s,
  441. 'out_height': out_h,
  442. 'out_width': out_w,
  443. 'inp_height': input_h,
  444. 'inp_width': input_w,
  445. 'trans_input': trans_input,
  446. 'trans_output': trans_output,
  447. })
  448. return inp, im_info
  449. def preprocess(im, preprocess_ops):
  450. # process image by preprocess_ops
  451. im_info = {
  452. 'scale_factor': np.array(
  453. [1., 1.], dtype=np.float32),
  454. 'im_shape': None,
  455. }
  456. im, im_info = decode_image(im, im_info)
  457. for operator in preprocess_ops:
  458. im, im_info = operator(im, im_info)
  459. return im, im_info