rotated_operators.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479
  1. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import print_function
  16. from __future__ import division
  17. try:
  18. from collections.abc import Sequence
  19. except Exception:
  20. from collections import Sequence
  21. from numbers import Number, Integral
  22. import cv2
  23. import numpy as np
  24. import math
  25. import copy
  26. from .operators import register_op, BaseOperator
  27. from ppdet.modeling.rbox_utils import poly2rbox_le135_np, poly2rbox_oc_np, rbox2poly_np
  28. from ppdet.utils.logger import setup_logger
  29. logger = setup_logger(__name__)
  30. @register_op
  31. class RRotate(BaseOperator):
  32. """ Rotate Image, Polygon, Box
  33. Args:
  34. scale (float): rotate scale
  35. angle (float): rotate angle
  36. fill_value (int, tuple): fill color
  37. auto_bound (bool): whether auto bound or not
  38. """
  39. def __init__(self, scale=1.0, angle=0., fill_value=0., auto_bound=True):
  40. super(RRotate, self).__init__()
  41. self.scale = scale
  42. self.angle = angle
  43. self.fill_value = fill_value
  44. self.auto_bound = auto_bound
  45. def get_rotated_matrix(self, angle, scale, h, w):
  46. center = ((w - 1) * 0.5, (h - 1) * 0.5)
  47. matrix = cv2.getRotationMatrix2D(center, -angle, scale)
  48. # calculate the new size
  49. cos = np.abs(matrix[0, 0])
  50. sin = np.abs(matrix[0, 1])
  51. new_w = h * sin + w * cos
  52. new_h = h * cos + w * sin
  53. # calculate offset
  54. n_w = int(np.round(new_w))
  55. n_h = int(np.round(new_h))
  56. if self.auto_bound:
  57. ratio = min(w / n_w, h / n_h)
  58. matrix = cv2.getRotationMatrix2D(center, -angle, ratio)
  59. else:
  60. matrix[0, 2] += (new_w - w) * 0.5
  61. matrix[1, 2] += (new_h - h) * 0.5
  62. w = n_w
  63. h = n_h
  64. return matrix, h, w
  65. def get_rect_from_pts(self, pts, h, w):
  66. """ get minimum rectangle of points
  67. """
  68. assert pts.shape[-1] % 2 == 0, 'the dim of input [pts] is not correct'
  69. min_x, min_y = np.min(pts[:, 0::2], axis=1), np.min(pts[:, 1::2],
  70. axis=1)
  71. max_x, max_y = np.max(pts[:, 0::2], axis=1), np.max(pts[:, 1::2],
  72. axis=1)
  73. min_x, min_y = np.clip(min_x, 0, w), np.clip(min_y, 0, h)
  74. max_x, max_y = np.clip(max_x, 0, w), np.clip(max_y, 0, h)
  75. boxes = np.stack([min_x, min_y, max_x, max_y], axis=-1)
  76. return boxes
  77. def apply_image(self, image, matrix, h, w):
  78. return cv2.warpAffine(
  79. image, matrix, (w, h), borderValue=self.fill_value)
  80. def apply_pts(self, pts, matrix, h, w):
  81. assert pts.shape[-1] % 2 == 0, 'the dim of input [pts] is not correct'
  82. # n is number of samples and m is two times the number of points due to (x, y)
  83. _, m = pts.shape
  84. # transpose points
  85. pts_ = pts.reshape(-1, 2).T
  86. # pad 1 to convert the points to homogeneous coordinates
  87. padding = np.ones((1, pts_.shape[1]), pts.dtype)
  88. rotated_pts = np.matmul(matrix, np.concatenate((pts_, padding), axis=0))
  89. return rotated_pts[:2, :].T.reshape(-1, m)
  90. def apply(self, sample, context=None):
  91. image = sample['image']
  92. h, w = image.shape[:2]
  93. matrix, h, w = self.get_rotated_matrix(self.angle, self.scale, h, w)
  94. sample['image'] = self.apply_image(image, matrix, h, w)
  95. polys = sample['gt_poly']
  96. # TODO: segment or keypoint to be processed
  97. if len(polys) > 0:
  98. pts = self.apply_pts(polys, matrix, h, w)
  99. sample['gt_poly'] = pts
  100. sample['gt_bbox'] = self.get_rect_from_pts(pts, h, w)
  101. return sample
  102. @register_op
  103. class RandomRRotate(BaseOperator):
  104. """ Random Rotate Image
  105. Args:
  106. scale (float, tuple, list): rotate scale
  107. scale_mode (str): mode of scale, [range, value, None]
  108. angle (float, tuple, list): rotate angle
  109. angle_mode (str): mode of angle, [range, value, None]
  110. fill_value (float, tuple, list): fill value
  111. rotate_prob (float): probability of rotation
  112. auto_bound (bool): whether auto bound or not
  113. """
  114. def __init__(self,
  115. scale=1.0,
  116. scale_mode=None,
  117. angle=0.,
  118. angle_mode=None,
  119. fill_value=0.,
  120. rotate_prob=1.0,
  121. auto_bound=True):
  122. super(RandomRRotate, self).__init__()
  123. self.scale = scale
  124. self.scale_mode = scale_mode
  125. self.angle = angle
  126. self.angle_mode = angle_mode
  127. self.fill_value = fill_value
  128. self.rotate_prob = rotate_prob
  129. self.auto_bound = auto_bound
  130. def get_angle(self, angle, angle_mode):
  131. assert not angle_mode or angle_mode in [
  132. 'range', 'value'
  133. ], 'angle mode should be in [range, value, None]'
  134. if not angle_mode:
  135. return angle
  136. elif angle_mode == 'range':
  137. low, high = angle
  138. return np.random.rand() * (high - low) + low
  139. elif angle_mode == 'value':
  140. return np.random.choice(angle)
  141. def get_scale(self, scale, scale_mode):
  142. assert not scale_mode or scale_mode in [
  143. 'range', 'value'
  144. ], 'scale mode should be in [range, value, None]'
  145. if not scale_mode:
  146. return scale
  147. elif scale_mode == 'range':
  148. low, high = scale
  149. return np.random.rand() * (high - low) + low
  150. elif scale_mode == 'value':
  151. return np.random.choice(scale)
  152. def apply(self, sample, context=None):
  153. if np.random.rand() > self.rotate_prob:
  154. return sample
  155. angle = self.get_angle(self.angle, self.angle_mode)
  156. scale = self.get_scale(self.scale, self.scale_mode)
  157. rotator = RRotate(scale, angle, self.fill_value, self.auto_bound)
  158. return rotator(sample)
  159. @register_op
  160. class Poly2RBox(BaseOperator):
  161. """ Polygon to Rotated Box, using new OpenCV definition since 4.5.1
  162. Args:
  163. filter_threshold (int, float): threshold to filter annotations
  164. filter_mode (str): filter mode, ['area', 'edge']
  165. rbox_type (str): rbox type, ['le135', 'oc']
  166. """
  167. def __init__(self, filter_threshold=4, filter_mode=None, rbox_type='le135'):
  168. super(Poly2RBox, self).__init__()
  169. self.filter_fn = lambda size: self.filter(size, filter_threshold, filter_mode)
  170. self.rbox_fn = poly2rbox_le135_np if rbox_type == 'le135' else poly2rbox_oc_np
  171. def filter(self, size, threshold, mode):
  172. if mode == 'area':
  173. if size[0] * size[1] < threshold:
  174. return True
  175. elif mode == 'edge':
  176. if min(size) < threshold:
  177. return True
  178. return False
  179. def get_rbox(self, polys):
  180. valid_ids, rboxes, bboxes = [], [], []
  181. for i, poly in enumerate(polys):
  182. cx, cy, w, h, angle = self.rbox_fn(poly)
  183. if self.filter_fn((w, h)):
  184. continue
  185. rboxes.append(np.array([cx, cy, w, h, angle], dtype=np.float32))
  186. valid_ids.append(i)
  187. xmin, ymin = min(poly[0::2]), min(poly[1::2])
  188. xmax, ymax = max(poly[0::2]), max(poly[1::2])
  189. bboxes.append(np.array([xmin, ymin, xmax, ymax], dtype=np.float32))
  190. if len(valid_ids) == 0:
  191. rboxes = np.zeros((0, 5), dtype=np.float32)
  192. bboxes = np.zeros((0, 4), dtype=np.float32)
  193. else:
  194. rboxes = np.stack(rboxes)
  195. bboxes = np.stack(bboxes)
  196. return rboxes, bboxes, valid_ids
  197. def apply(self, sample, context=None):
  198. rboxes, bboxes, valid_ids = self.get_rbox(sample['gt_poly'])
  199. sample['gt_rbox'] = rboxes
  200. sample['gt_bbox'] = bboxes
  201. for k in ['gt_class', 'gt_score', 'gt_poly', 'is_crowd', 'difficult']:
  202. if k in sample:
  203. sample[k] = sample[k][valid_ids]
  204. return sample
  205. @register_op
  206. class Poly2Array(BaseOperator):
  207. """ convert gt_poly to np.array for rotated bboxes
  208. """
  209. def __init__(self):
  210. super(Poly2Array, self).__init__()
  211. def apply(self, sample, context=None):
  212. if 'gt_poly' in sample:
  213. sample['gt_poly'] = np.array(
  214. sample['gt_poly'], dtype=np.float32).reshape((-1, 8))
  215. return sample
  216. @register_op
  217. class RResize(BaseOperator):
  218. def __init__(self, target_size, keep_ratio, interp=cv2.INTER_LINEAR):
  219. """
  220. Resize image to target size. if keep_ratio is True,
  221. resize the image's long side to the maximum of target_size
  222. if keep_ratio is False, resize the image to target size(h, w)
  223. Args:
  224. target_size (int|list): image target size
  225. keep_ratio (bool): whether keep_ratio or not, default true
  226. interp (int): the interpolation method
  227. """
  228. super(RResize, self).__init__()
  229. self.keep_ratio = keep_ratio
  230. self.interp = interp
  231. if not isinstance(target_size, (Integral, Sequence)):
  232. raise TypeError(
  233. "Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
  234. format(type(target_size)))
  235. if isinstance(target_size, Integral):
  236. target_size = [target_size, target_size]
  237. self.target_size = target_size
  238. def apply_image(self, image, scale):
  239. im_scale_x, im_scale_y = scale
  240. return cv2.resize(
  241. image,
  242. None,
  243. None,
  244. fx=im_scale_x,
  245. fy=im_scale_y,
  246. interpolation=self.interp)
  247. def apply_pts(self, pts, scale, size):
  248. im_scale_x, im_scale_y = scale
  249. resize_w, resize_h = size
  250. pts[:, 0::2] *= im_scale_x
  251. pts[:, 1::2] *= im_scale_y
  252. pts[:, 0::2] = np.clip(pts[:, 0::2], 0, resize_w)
  253. pts[:, 1::2] = np.clip(pts[:, 1::2], 0, resize_h)
  254. return pts
  255. def apply(self, sample, context=None):
  256. """ Resize the image numpy.
  257. """
  258. im = sample['image']
  259. if not isinstance(im, np.ndarray):
  260. raise TypeError("{}: image type is not numpy.".format(self))
  261. if len(im.shape) != 3:
  262. raise ImageError('{}: image is not 3-dimensional.'.format(self))
  263. # apply image
  264. im_shape = im.shape
  265. if self.keep_ratio:
  266. im_size_min = np.min(im_shape[0:2])
  267. im_size_max = np.max(im_shape[0:2])
  268. target_size_min = np.min(self.target_size)
  269. target_size_max = np.max(self.target_size)
  270. im_scale = min(target_size_min / im_size_min,
  271. target_size_max / im_size_max)
  272. resize_h = im_scale * float(im_shape[0])
  273. resize_w = im_scale * float(im_shape[1])
  274. im_scale_x = im_scale
  275. im_scale_y = im_scale
  276. else:
  277. resize_h, resize_w = self.target_size
  278. im_scale_y = resize_h / im_shape[0]
  279. im_scale_x = resize_w / im_shape[1]
  280. im = self.apply_image(sample['image'], [im_scale_x, im_scale_y])
  281. sample['image'] = im.astype(np.float32)
  282. sample['im_shape'] = np.asarray([resize_h, resize_w], dtype=np.float32)
  283. if 'scale_factor' in sample:
  284. scale_factor = sample['scale_factor']
  285. sample['scale_factor'] = np.asarray(
  286. [scale_factor[0] * im_scale_y, scale_factor[1] * im_scale_x],
  287. dtype=np.float32)
  288. else:
  289. sample['scale_factor'] = np.asarray(
  290. [im_scale_y, im_scale_x], dtype=np.float32)
  291. # apply bbox
  292. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  293. sample['gt_bbox'] = self.apply_pts(sample['gt_bbox'],
  294. [im_scale_x, im_scale_y],
  295. [resize_w, resize_h])
  296. # apply polygon
  297. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  298. sample['gt_poly'] = self.apply_pts(sample['gt_poly'],
  299. [im_scale_x, im_scale_y],
  300. [resize_w, resize_h])
  301. return sample
  302. @register_op
  303. class RandomRFlip(BaseOperator):
  304. def __init__(self, prob=0.5):
  305. """
  306. Args:
  307. prob (float): the probability of flipping image
  308. """
  309. super(RandomRFlip, self).__init__()
  310. self.prob = prob
  311. if not (isinstance(self.prob, float)):
  312. raise TypeError("{}: input type is invalid.".format(self))
  313. def apply_image(self, image):
  314. return image[:, ::-1, :]
  315. def apply_pts(self, pts, width):
  316. oldx = pts[:, 0::2].copy()
  317. pts[:, 0::2] = width - oldx - 1
  318. return pts
  319. def apply(self, sample, context=None):
  320. """Filp the image and bounding box.
  321. Operators:
  322. 1. Flip the image numpy.
  323. 2. Transform the bboxes' x coordinates.
  324. (Must judge whether the coordinates are normalized!)
  325. 3. Transform the segmentations' x coordinates.
  326. (Must judge whether the coordinates are normalized!)
  327. Output:
  328. sample: the image, bounding box and segmentation part
  329. in sample are flipped.
  330. """
  331. if np.random.uniform(0, 1) < self.prob:
  332. im = sample['image']
  333. height, width = im.shape[:2]
  334. im = self.apply_image(im)
  335. if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
  336. sample['gt_bbox'] = self.apply_pts(sample['gt_bbox'], width)
  337. if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
  338. sample['gt_poly'] = self.apply_pts(sample['gt_poly'], width)
  339. sample['flipped'] = True
  340. sample['image'] = im
  341. return sample
  342. @register_op
  343. class VisibleRBox(BaseOperator):
  344. """
  345. In debug mode, visualize images according to `gt_box`.
  346. (Currently only supported when not cropping and flipping image.)
  347. """
  348. def __init__(self, output_dir='debug'):
  349. super(VisibleRBox, self).__init__()
  350. self.output_dir = output_dir
  351. if not os.path.isdir(output_dir):
  352. os.makedirs(output_dir)
  353. def apply(self, sample, context=None):
  354. image = Image.fromarray(sample['image'].astype(np.uint8))
  355. out_file_name = '{:012d}.jpg'.format(sample['im_id'][0])
  356. width = sample['w']
  357. height = sample['h']
  358. # gt_poly = sample['gt_rbox']
  359. gt_poly = sample['gt_poly']
  360. gt_class = sample['gt_class']
  361. draw = ImageDraw.Draw(image)
  362. for i in range(gt_poly.shape[0]):
  363. x1, y1, x2, y2, x3, y3, x4, y4 = gt_poly[i]
  364. draw.line(
  365. [(x1, y1), (x2, y2), (x3, y3), (x4, y4), (x1, y1)],
  366. width=2,
  367. fill='green')
  368. # draw label
  369. xmin = min(x1, x2, x3, x4)
  370. ymin = min(y1, y2, y3, y4)
  371. text = str(gt_class[i][0])
  372. tw, th = draw.textsize(text)
  373. draw.rectangle(
  374. [(xmin + 1, ymin - th), (xmin + tw + 1, ymin)], fill='green')
  375. draw.text((xmin + 1, ymin - th), text, fill=(255, 255, 255))
  376. if 'gt_keypoint' in sample.keys():
  377. gt_keypoint = sample['gt_keypoint']
  378. if self.is_normalized:
  379. for i in range(gt_keypoint.shape[1]):
  380. if i % 2:
  381. gt_keypoint[:, i] = gt_keypoint[:, i] * height
  382. else:
  383. gt_keypoint[:, i] = gt_keypoint[:, i] * width
  384. for i in range(gt_keypoint.shape[0]):
  385. keypoint = gt_keypoint[i]
  386. for j in range(int(keypoint.shape[0] / 2)):
  387. x1 = round(keypoint[2 * j]).astype(np.int32)
  388. y1 = round(keypoint[2 * j + 1]).astype(np.int32)
  389. draw.ellipse(
  390. (x1, y1, x1 + 5, y1 + 5), fill='green', outline='green')
  391. save_path = os.path.join(self.output_dir, out_file_name)
  392. image.save(save_path, quality=95)
  393. return sample
  394. @register_op
  395. class Rbox2Poly(BaseOperator):
  396. """
  397. Convert rbbox format to poly format.
  398. """
  399. def __init__(self):
  400. super(Rbox2Poly, self).__init__()
  401. def apply(self, sample, context=None):
  402. assert 'gt_rbox' in sample
  403. assert sample['gt_rbox'].shape[1] == 5
  404. rboxes = sample['gt_rbox']
  405. polys = rbox2poly_np(rboxes)
  406. sample['gt_poly'] = polys
  407. xmin, ymin = polys[:, 0::2].min(1), polys[:, 1::2].min(1)
  408. xmax, ymax = polys[:, 0::2].max(1), polys[:, 1::2].max(1)
  409. sample['gt_bbox'] = np.stack([xmin, ymin, xmin, ymin], axis=1)
  410. return sample