preprocess.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286
  1. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import cv2
  15. import numpy as np
  16. def decode_image(im_file, im_info):
  17. """read rgb image
  18. Args:
  19. im_file (str|np.ndarray): input can be image path or np.ndarray
  20. im_info (dict): info of image
  21. Returns:
  22. im (np.ndarray): processed image (np.ndarray)
  23. im_info (dict): info of processed image
  24. """
  25. if isinstance(im_file, str):
  26. with open(im_file, 'rb') as f:
  27. im_read = f.read()
  28. data = np.frombuffer(im_read, dtype='uint8')
  29. im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
  30. im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
  31. else:
  32. im = im_file
  33. im_info['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
  34. im_info['scale_factor'] = np.array([1., 1.], dtype=np.float32)
  35. return im, im_info
  36. class Resize(object):
  37. """resize image by target_size and max_size
  38. Args:
  39. target_size (int): the target size of image
  40. keep_ratio (bool): whether keep_ratio or not, default true
  41. interp (int): method of resize
  42. """
  43. def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR):
  44. if isinstance(target_size, int):
  45. target_size = [target_size, target_size]
  46. self.target_size = target_size
  47. self.keep_ratio = keep_ratio
  48. self.interp = interp
  49. def __call__(self, im, im_info):
  50. """
  51. Args:
  52. im (np.ndarray): image (np.ndarray)
  53. im_info (dict): info of image
  54. Returns:
  55. im (np.ndarray): processed image (np.ndarray)
  56. im_info (dict): info of processed image
  57. """
  58. assert len(self.target_size) == 2
  59. assert self.target_size[0] > 0 and self.target_size[1] > 0
  60. im_channel = im.shape[2]
  61. im_scale_y, im_scale_x = self.generate_scale(im)
  62. im = cv2.resize(
  63. im,
  64. None,
  65. None,
  66. fx=im_scale_x,
  67. fy=im_scale_y,
  68. interpolation=self.interp)
  69. im_info['im_shape'] = np.array(im.shape[:2]).astype('float32')
  70. im_info['scale_factor'] = np.array(
  71. [im_scale_y, im_scale_x]).astype('float32')
  72. return im, im_info
  73. def generate_scale(self, im):
  74. """
  75. Args:
  76. im (np.ndarray): image (np.ndarray)
  77. Returns:
  78. im_scale_x: the resize ratio of X
  79. im_scale_y: the resize ratio of Y
  80. """
  81. origin_shape = im.shape[:2]
  82. im_c = im.shape[2]
  83. if self.keep_ratio:
  84. im_size_min = np.min(origin_shape)
  85. im_size_max = np.max(origin_shape)
  86. target_size_min = np.min(self.target_size)
  87. target_size_max = np.max(self.target_size)
  88. im_scale = float(target_size_min) / float(im_size_min)
  89. if np.round(im_scale * im_size_max) > target_size_max:
  90. im_scale = float(target_size_max) / float(im_size_max)
  91. im_scale_x = im_scale
  92. im_scale_y = im_scale
  93. else:
  94. resize_h, resize_w = self.target_size
  95. im_scale_y = resize_h / float(origin_shape[0])
  96. im_scale_x = resize_w / float(origin_shape[1])
  97. return im_scale_y, im_scale_x
  98. class NormalizeImage(object):
  99. """normalize image
  100. Args:
  101. mean (list): im - mean
  102. std (list): im / std
  103. is_scale (bool): whether need im / 255
  104. is_channel_first (bool): if True: image shape is CHW, else: HWC
  105. """
  106. def __init__(self, mean, std, is_scale=True):
  107. self.mean = mean
  108. self.std = std
  109. self.is_scale = is_scale
  110. def __call__(self, im, im_info):
  111. """
  112. Args:
  113. im (np.ndarray): image (np.ndarray)
  114. im_info (dict): info of image
  115. Returns:
  116. im (np.ndarray): processed image (np.ndarray)
  117. im_info (dict): info of processed image
  118. """
  119. im = im.astype(np.float32, copy=False)
  120. mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
  121. std = np.array(self.std)[np.newaxis, np.newaxis, :]
  122. if self.is_scale:
  123. im = im / 255.0
  124. im -= mean
  125. im /= std
  126. return im, im_info
  127. class Permute(object):
  128. """permute image
  129. Args:
  130. to_bgr (bool): whether convert RGB to BGR
  131. channel_first (bool): whether convert HWC to CHW
  132. """
  133. def __init__(self, ):
  134. super(Permute, self).__init__()
  135. def __call__(self, im, im_info):
  136. """
  137. Args:
  138. im (np.ndarray): image (np.ndarray)
  139. im_info (dict): info of image
  140. Returns:
  141. im (np.ndarray): processed image (np.ndarray)
  142. im_info (dict): info of processed image
  143. """
  144. im = im.transpose((2, 0, 1)).copy()
  145. return im, im_info
  146. class PadStride(object):
  147. """ padding image for model with FPN, instead PadBatch(pad_to_stride) in original config
  148. Args:
  149. stride (bool): model with FPN need image shape % stride == 0
  150. """
  151. def __init__(self, stride=0):
  152. self.coarsest_stride = stride
  153. def __call__(self, im, im_info):
  154. """
  155. Args:
  156. im (np.ndarray): image (np.ndarray)
  157. im_info (dict): info of image
  158. Returns:
  159. im (np.ndarray): processed image (np.ndarray)
  160. im_info (dict): info of processed image
  161. """
  162. coarsest_stride = self.coarsest_stride
  163. if coarsest_stride <= 0:
  164. return im, im_info
  165. im_c, im_h, im_w = im.shape
  166. pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
  167. pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
  168. padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
  169. padding_im[:, :im_h, :im_w] = im
  170. return padding_im, im_info
  171. class LetterBoxResize(object):
  172. def __init__(self, target_size):
  173. """
  174. Resize image to target size, convert normalized xywh to pixel xyxy
  175. format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]).
  176. Args:
  177. target_size (int|list): image target size.
  178. """
  179. super(LetterBoxResize, self).__init__()
  180. if isinstance(target_size, int):
  181. target_size = [target_size, target_size]
  182. self.target_size = target_size
  183. def letterbox(self, img, height, width, color=(127.5, 127.5, 127.5)):
  184. # letterbox: resize a rectangular image to a padded rectangular
  185. shape = img.shape[:2] # [height, width]
  186. ratio_h = float(height) / shape[0]
  187. ratio_w = float(width) / shape[1]
  188. ratio = min(ratio_h, ratio_w)
  189. new_shape = (round(shape[1] * ratio),
  190. round(shape[0] * ratio)) # [width, height]
  191. padw = (width - new_shape[0]) / 2
  192. padh = (height - new_shape[1]) / 2
  193. top, bottom = round(padh - 0.1), round(padh + 0.1)
  194. left, right = round(padw - 0.1), round(padw + 0.1)
  195. img = cv2.resize(
  196. img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border
  197. img = cv2.copyMakeBorder(
  198. img, top, bottom, left, right, cv2.BORDER_CONSTANT,
  199. value=color) # padded rectangular
  200. return img, ratio, padw, padh
  201. def __call__(self, im, im_info):
  202. """
  203. Args:
  204. im (np.ndarray): image (np.ndarray)
  205. im_info (dict): info of image
  206. Returns:
  207. im (np.ndarray): processed image (np.ndarray)
  208. im_info (dict): info of processed image
  209. """
  210. assert len(self.target_size) == 2
  211. assert self.target_size[0] > 0 and self.target_size[1] > 0
  212. height, width = self.target_size
  213. h, w = im.shape[:2]
  214. im, ratio, padw, padh = self.letterbox(im, height=height, width=width)
  215. new_shape = [round(h * ratio), round(w * ratio)]
  216. im_info['im_shape'] = np.array(new_shape, dtype=np.float32)
  217. im_info['scale_factor'] = np.array([ratio, ratio], dtype=np.float32)
  218. return im, im_info
  219. class Pad(object):
  220. def __init__(self, size, fill_value=[114.0, 114.0, 114.0]):
  221. """
  222. Pad image to a specified size.
  223. Args:
  224. size (list[int]): image target size
  225. fill_value (list[float]): rgb value of pad area, default (114.0, 114.0, 114.0)
  226. """
  227. super(Pad, self).__init__()
  228. if isinstance(size, int):
  229. size = [size, size]
  230. self.size = size
  231. self.fill_value = fill_value
  232. def __call__(self, im, im_info):
  233. im_h, im_w = im.shape[:2]
  234. h, w = self.size
  235. if h == im_h and w == im_w:
  236. im = im.astype(np.float32)
  237. return im, im_info
  238. canvas = np.ones((h, w, 3), dtype=np.float32)
  239. canvas *= np.array(self.fill_value, dtype=np.float32)
  240. canvas[0:im_h, 0:im_w, :] = im.astype(np.float32)
  241. im = canvas
  242. return im, im_info
  243. def preprocess(im, preprocess_ops):
  244. # process image by preprocess_ops
  245. im_info = {
  246. 'scale_factor': np.array(
  247. [1., 1.], dtype=np.float32),
  248. 'im_shape': None,
  249. }
  250. im, im_info = decode_image(im, im_info)
  251. for operator in preprocess_ops:
  252. im, im_info = operator(im, im_info)
  253. return im, im_info