batch_operators.py 55 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. import typing
  18. try:
  19. from collections.abc import Sequence
  20. except Exception:
  21. from collections import Sequence
  22. import cv2
  23. import copy
  24. import math
  25. import numpy as np
  26. from .operators import register_op, BaseOperator, Resize
  27. from .op_helper import jaccard_overlap, gaussian2D, gaussian_radius, draw_umich_gaussian
  28. from .atss_assigner import ATSSAssigner
  29. from scipy import ndimage
  30. from ppdet.modeling import bbox_utils
  31. from ppdet.utils.logger import setup_logger
  32. from ppdet.modeling.keypoint_utils import get_affine_transform, affine_transform
  33. logger = setup_logger(__name__)
  34. __all__ = [
  35. 'PadBatch',
  36. 'BatchRandomResize',
  37. 'Gt2YoloTarget',
  38. 'Gt2FCOSTarget',
  39. 'Gt2TTFTarget',
  40. 'Gt2Solov2Target',
  41. 'Gt2SparseRCNNTarget',
  42. 'PadMaskBatch',
  43. 'Gt2GFLTarget',
  44. 'Gt2CenterNetTarget',
  45. 'Gt2CenterTrackTarget',
  46. 'PadGT',
  47. 'PadRGT',
  48. ]
  49. @register_op
  50. class PadBatch(BaseOperator):
  51. """
  52. Pad a batch of samples so they can be divisible by a stride.
  53. The layout of each image should be 'CHW'.
  54. Args:
  55. pad_to_stride (int): If `pad_to_stride > 0`, pad zeros to ensure
  56. height and width is divisible by `pad_to_stride`.
  57. """
  58. def __init__(self, pad_to_stride=0):
  59. super(PadBatch, self).__init__()
  60. self.pad_to_stride = pad_to_stride
  61. def __call__(self, samples, context=None):
  62. """
  63. Args:
  64. samples (list): a batch of sample, each is dict.
  65. """
  66. coarsest_stride = self.pad_to_stride
  67. # multi scale input is nested list
  68. if isinstance(samples,
  69. typing.Sequence) and len(samples) > 0 and isinstance(
  70. samples[0], typing.Sequence):
  71. inner_samples = samples[0]
  72. else:
  73. inner_samples = samples
  74. max_shape = np.array(
  75. [data['image'].shape for data in inner_samples]).max(axis=0)
  76. if coarsest_stride > 0:
  77. max_shape[1] = int(
  78. np.ceil(max_shape[1] / coarsest_stride) * coarsest_stride)
  79. max_shape[2] = int(
  80. np.ceil(max_shape[2] / coarsest_stride) * coarsest_stride)
  81. for data in inner_samples:
  82. im = data['image']
  83. im_c, im_h, im_w = im.shape[:]
  84. padding_im = np.zeros(
  85. (im_c, max_shape[1], max_shape[2]), dtype=np.float32)
  86. padding_im[:, :im_h, :im_w] = im
  87. data['image'] = padding_im
  88. if 'semantic' in data and data['semantic'] is not None:
  89. semantic = data['semantic']
  90. padding_sem = np.zeros(
  91. (1, max_shape[1], max_shape[2]), dtype=np.float32)
  92. padding_sem[:, :im_h, :im_w] = semantic
  93. data['semantic'] = padding_sem
  94. if 'gt_segm' in data and data['gt_segm'] is not None:
  95. gt_segm = data['gt_segm']
  96. padding_segm = np.zeros(
  97. (gt_segm.shape[0], max_shape[1], max_shape[2]),
  98. dtype=np.uint8)
  99. padding_segm[:, :im_h, :im_w] = gt_segm
  100. data['gt_segm'] = padding_segm
  101. return samples
  102. @register_op
  103. class BatchRandomResize(BaseOperator):
  104. """
  105. Resize image to target size randomly. random target_size and interpolation method
  106. Args:
  107. target_size (int, list, tuple): image target size, if random size is True, must be list or tuple
  108. keep_ratio (bool): whether keep_raio or not, default true
  109. interp (int): the interpolation method
  110. random_size (bool): whether random select target size of image
  111. random_interp (bool): whether random select interpolation method
  112. """
  113. def __init__(self,
  114. target_size,
  115. keep_ratio,
  116. interp=cv2.INTER_NEAREST,
  117. random_size=True,
  118. random_interp=False):
  119. super(BatchRandomResize, self).__init__()
  120. self.keep_ratio = keep_ratio
  121. self.interps = [
  122. cv2.INTER_NEAREST,
  123. cv2.INTER_LINEAR,
  124. cv2.INTER_AREA,
  125. cv2.INTER_CUBIC,
  126. cv2.INTER_LANCZOS4,
  127. ]
  128. self.interp = interp
  129. assert isinstance(target_size, (
  130. int, Sequence)), "target_size must be int, list or tuple"
  131. if random_size and not isinstance(target_size, list):
  132. raise TypeError(
  133. "Type of target_size is invalid when random_size is True. Must be List, now is {}".
  134. format(type(target_size)))
  135. self.target_size = target_size
  136. self.random_size = random_size
  137. self.random_interp = random_interp
  138. def __call__(self, samples, context=None):
  139. if self.random_size:
  140. index = np.random.choice(len(self.target_size))
  141. target_size = self.target_size[index]
  142. else:
  143. target_size = self.target_size
  144. if self.random_interp:
  145. interp = np.random.choice(self.interps)
  146. else:
  147. interp = self.interp
  148. resizer = Resize(target_size, keep_ratio=self.keep_ratio, interp=interp)
  149. return resizer(samples, context=context)
  150. @register_op
  151. class Gt2YoloTarget(BaseOperator):
  152. __shared__ = ['num_classes']
  153. """
  154. Generate YOLOv3 targets by groud truth data, this operator is only used in
  155. fine grained YOLOv3 loss mode
  156. """
  157. def __init__(self,
  158. anchors,
  159. anchor_masks,
  160. downsample_ratios,
  161. num_classes=80,
  162. iou_thresh=1.):
  163. super(Gt2YoloTarget, self).__init__()
  164. self.anchors = anchors
  165. self.anchor_masks = anchor_masks
  166. self.downsample_ratios = downsample_ratios
  167. self.num_classes = num_classes
  168. self.iou_thresh = iou_thresh
  169. def __call__(self, samples, context=None):
  170. assert len(self.anchor_masks) == len(self.downsample_ratios), \
  171. "anchor_masks', and 'downsample_ratios' should have same length."
  172. h, w = samples[0]['image'].shape[1:3]
  173. an_hw = np.array(self.anchors) / np.array([[w, h]])
  174. for sample in samples:
  175. gt_bbox = sample['gt_bbox']
  176. gt_class = sample['gt_class']
  177. if 'gt_score' not in sample:
  178. sample['gt_score'] = np.ones(
  179. (gt_bbox.shape[0], 1), dtype=np.float32)
  180. gt_score = sample['gt_score']
  181. for i, (
  182. mask, downsample_ratio
  183. ) in enumerate(zip(self.anchor_masks, self.downsample_ratios)):
  184. grid_h = int(h / downsample_ratio)
  185. grid_w = int(w / downsample_ratio)
  186. target = np.zeros(
  187. (len(mask), 6 + self.num_classes, grid_h, grid_w),
  188. dtype=np.float32)
  189. for b in range(gt_bbox.shape[0]):
  190. gx, gy, gw, gh = gt_bbox[b, :]
  191. cls = gt_class[b]
  192. score = gt_score[b]
  193. if gw <= 0. or gh <= 0. or score <= 0.:
  194. continue
  195. # find best match anchor index
  196. best_iou = 0.
  197. best_idx = -1
  198. for an_idx in range(an_hw.shape[0]):
  199. iou = jaccard_overlap(
  200. [0., 0., gw, gh],
  201. [0., 0., an_hw[an_idx, 0], an_hw[an_idx, 1]])
  202. if iou > best_iou:
  203. best_iou = iou
  204. best_idx = an_idx
  205. gi = int(gx * grid_w)
  206. gj = int(gy * grid_h)
  207. # gtbox should be regresed in this layes if best match
  208. # anchor index in anchor mask of this layer
  209. if best_idx in mask:
  210. best_n = mask.index(best_idx)
  211. # x, y, w, h, scale
  212. target[best_n, 0, gj, gi] = gx * grid_w - gi
  213. target[best_n, 1, gj, gi] = gy * grid_h - gj
  214. target[best_n, 2, gj, gi] = np.log(
  215. gw * w / self.anchors[best_idx][0])
  216. target[best_n, 3, gj, gi] = np.log(
  217. gh * h / self.anchors[best_idx][1])
  218. target[best_n, 4, gj, gi] = 2.0 - gw * gh
  219. # objectness record gt_score
  220. target[best_n, 5, gj, gi] = score
  221. # classification
  222. target[best_n, 6 + cls, gj, gi] = 1.
  223. # For non-matched anchors, calculate the target if the iou
  224. # between anchor and gt is larger than iou_thresh
  225. if self.iou_thresh < 1:
  226. for idx, mask_i in enumerate(mask):
  227. if mask_i == best_idx: continue
  228. iou = jaccard_overlap(
  229. [0., 0., gw, gh],
  230. [0., 0., an_hw[mask_i, 0], an_hw[mask_i, 1]])
  231. if iou > self.iou_thresh and target[idx, 5, gj,
  232. gi] == 0.:
  233. # x, y, w, h, scale
  234. target[idx, 0, gj, gi] = gx * grid_w - gi
  235. target[idx, 1, gj, gi] = gy * grid_h - gj
  236. target[idx, 2, gj, gi] = np.log(
  237. gw * w / self.anchors[mask_i][0])
  238. target[idx, 3, gj, gi] = np.log(
  239. gh * h / self.anchors[mask_i][1])
  240. target[idx, 4, gj, gi] = 2.0 - gw * gh
  241. # objectness record gt_score
  242. target[idx, 5, gj, gi] = score
  243. # classification
  244. target[idx, 6 + cls, gj, gi] = 1.
  245. sample['target{}'.format(i)] = target
  246. # remove useless gt_class and gt_score after target calculated
  247. sample.pop('gt_class')
  248. sample.pop('gt_score')
  249. return samples
  250. @register_op
  251. class Gt2FCOSTarget(BaseOperator):
  252. """
  253. Generate FCOS targets by groud truth data
  254. """
  255. def __init__(self,
  256. object_sizes_boundary,
  257. center_sampling_radius,
  258. downsample_ratios,
  259. num_shift=0.5,
  260. multiply_strides_reg_targets=False,
  261. norm_reg_targets=True):
  262. super(Gt2FCOSTarget, self).__init__()
  263. self.center_sampling_radius = center_sampling_radius
  264. self.downsample_ratios = downsample_ratios
  265. self.INF = np.inf
  266. self.object_sizes_boundary = [-1] + object_sizes_boundary + [self.INF]
  267. object_sizes_of_interest = []
  268. for i in range(len(self.object_sizes_boundary) - 1):
  269. object_sizes_of_interest.append([
  270. self.object_sizes_boundary[i], self.object_sizes_boundary[i + 1]
  271. ])
  272. self.object_sizes_of_interest = object_sizes_of_interest
  273. self.num_shift = num_shift
  274. self.multiply_strides_reg_targets = multiply_strides_reg_targets
  275. self.norm_reg_targets = norm_reg_targets
  276. def _compute_points(self, w, h):
  277. """
  278. compute the corresponding points in each feature map
  279. :param h: image height
  280. :param w: image width
  281. :return: points from all feature map
  282. """
  283. locations = []
  284. for stride in self.downsample_ratios:
  285. shift_x = np.arange(0, w, stride).astype(np.float32)
  286. shift_y = np.arange(0, h, stride).astype(np.float32)
  287. shift_x, shift_y = np.meshgrid(shift_x, shift_y)
  288. shift_x = shift_x.flatten()
  289. shift_y = shift_y.flatten()
  290. location = np.stack(
  291. [shift_x, shift_y], axis=1) + stride * self.num_shift
  292. locations.append(location)
  293. num_points_each_level = [len(location) for location in locations]
  294. locations = np.concatenate(locations, axis=0)
  295. return locations, num_points_each_level
  296. def _convert_xywh2xyxy(self, gt_bbox, w, h):
  297. """
  298. convert the bounding box from style xywh to xyxy
  299. :param gt_bbox: bounding boxes normalized into [0, 1]
  300. :param w: image width
  301. :param h: image height
  302. :return: bounding boxes in xyxy style
  303. """
  304. bboxes = gt_bbox.copy()
  305. bboxes[:, [0, 2]] = bboxes[:, [0, 2]] * w
  306. bboxes[:, [1, 3]] = bboxes[:, [1, 3]] * h
  307. bboxes[:, 2] = bboxes[:, 0] + bboxes[:, 2]
  308. bboxes[:, 3] = bboxes[:, 1] + bboxes[:, 3]
  309. return bboxes
  310. def _check_inside_boxes_limited(self, gt_bbox, xs, ys,
  311. num_points_each_level):
  312. """
  313. check if points is within the clipped boxes
  314. :param gt_bbox: bounding boxes
  315. :param xs: horizontal coordinate of points
  316. :param ys: vertical coordinate of points
  317. :return: the mask of points is within gt_box or not
  318. """
  319. bboxes = np.reshape(
  320. gt_bbox, newshape=[1, gt_bbox.shape[0], gt_bbox.shape[1]])
  321. bboxes = np.tile(bboxes, reps=[xs.shape[0], 1, 1])
  322. ct_x = (bboxes[:, :, 0] + bboxes[:, :, 2]) / 2
  323. ct_y = (bboxes[:, :, 1] + bboxes[:, :, 3]) / 2
  324. beg = 0
  325. clipped_box = bboxes.copy()
  326. for lvl, stride in enumerate(self.downsample_ratios):
  327. end = beg + num_points_each_level[lvl]
  328. stride_exp = self.center_sampling_radius * stride
  329. clipped_box[beg:end, :, 0] = np.maximum(
  330. bboxes[beg:end, :, 0], ct_x[beg:end, :] - stride_exp)
  331. clipped_box[beg:end, :, 1] = np.maximum(
  332. bboxes[beg:end, :, 1], ct_y[beg:end, :] - stride_exp)
  333. clipped_box[beg:end, :, 2] = np.minimum(
  334. bboxes[beg:end, :, 2], ct_x[beg:end, :] + stride_exp)
  335. clipped_box[beg:end, :, 3] = np.minimum(
  336. bboxes[beg:end, :, 3], ct_y[beg:end, :] + stride_exp)
  337. beg = end
  338. l_res = xs - clipped_box[:, :, 0]
  339. r_res = clipped_box[:, :, 2] - xs
  340. t_res = ys - clipped_box[:, :, 1]
  341. b_res = clipped_box[:, :, 3] - ys
  342. clipped_box_reg_targets = np.stack([l_res, t_res, r_res, b_res], axis=2)
  343. inside_gt_box = np.min(clipped_box_reg_targets, axis=2) > 0
  344. return inside_gt_box
  345. def __call__(self, samples, context=None):
  346. assert len(self.object_sizes_of_interest) == len(self.downsample_ratios), \
  347. "object_sizes_of_interest', and 'downsample_ratios' should have same length."
  348. for sample in samples:
  349. im = sample['image']
  350. bboxes = sample['gt_bbox']
  351. gt_class = sample['gt_class']
  352. # calculate the locations
  353. h, w = im.shape[1:3]
  354. points, num_points_each_level = self._compute_points(w, h)
  355. object_scale_exp = []
  356. for i, num_pts in enumerate(num_points_each_level):
  357. object_scale_exp.append(
  358. np.tile(
  359. np.array([self.object_sizes_of_interest[i]]),
  360. reps=[num_pts, 1]))
  361. object_scale_exp = np.concatenate(object_scale_exp, axis=0)
  362. gt_area = (bboxes[:, 2] - bboxes[:, 0]) * (
  363. bboxes[:, 3] - bboxes[:, 1])
  364. xs, ys = points[:, 0], points[:, 1]
  365. xs = np.reshape(xs, newshape=[xs.shape[0], 1])
  366. xs = np.tile(xs, reps=[1, bboxes.shape[0]])
  367. ys = np.reshape(ys, newshape=[ys.shape[0], 1])
  368. ys = np.tile(ys, reps=[1, bboxes.shape[0]])
  369. l_res = xs - bboxes[:, 0]
  370. r_res = bboxes[:, 2] - xs
  371. t_res = ys - bboxes[:, 1]
  372. b_res = bboxes[:, 3] - ys
  373. reg_targets = np.stack([l_res, t_res, r_res, b_res], axis=2)
  374. if self.center_sampling_radius > 0:
  375. is_inside_box = self._check_inside_boxes_limited(
  376. bboxes, xs, ys, num_points_each_level)
  377. else:
  378. is_inside_box = np.min(reg_targets, axis=2) > 0
  379. # check if the targets is inside the corresponding level
  380. max_reg_targets = np.max(reg_targets, axis=2)
  381. lower_bound = np.tile(
  382. np.expand_dims(
  383. object_scale_exp[:, 0], axis=1),
  384. reps=[1, max_reg_targets.shape[1]])
  385. high_bound = np.tile(
  386. np.expand_dims(
  387. object_scale_exp[:, 1], axis=1),
  388. reps=[1, max_reg_targets.shape[1]])
  389. is_match_current_level = \
  390. (max_reg_targets > lower_bound) & \
  391. (max_reg_targets < high_bound)
  392. points2gtarea = np.tile(
  393. np.expand_dims(
  394. gt_area, axis=0), reps=[xs.shape[0], 1])
  395. points2gtarea[is_inside_box == 0] = self.INF
  396. points2gtarea[is_match_current_level == 0] = self.INF
  397. points2min_area = points2gtarea.min(axis=1)
  398. points2min_area_ind = points2gtarea.argmin(axis=1)
  399. labels = gt_class[points2min_area_ind] + 1
  400. labels[points2min_area == self.INF] = 0
  401. reg_targets = reg_targets[range(xs.shape[0]), points2min_area_ind]
  402. ctn_targets = np.sqrt((reg_targets[:, [0, 2]].min(axis=1) / \
  403. reg_targets[:, [0, 2]].max(axis=1)) * \
  404. (reg_targets[:, [1, 3]].min(axis=1) / \
  405. reg_targets[:, [1, 3]].max(axis=1))).astype(np.float32)
  406. ctn_targets = np.reshape(
  407. ctn_targets, newshape=[ctn_targets.shape[0], 1])
  408. ctn_targets[labels <= 0] = 0
  409. pos_ind = np.nonzero(labels != 0)
  410. reg_targets_pos = reg_targets[pos_ind[0], :]
  411. split_sections = []
  412. beg = 0
  413. for lvl in range(len(num_points_each_level)):
  414. end = beg + num_points_each_level[lvl]
  415. split_sections.append(end)
  416. beg = end
  417. labels_by_level = np.split(labels, split_sections, axis=0)
  418. reg_targets_by_level = np.split(reg_targets, split_sections, axis=0)
  419. ctn_targets_by_level = np.split(ctn_targets, split_sections, axis=0)
  420. for lvl in range(len(self.downsample_ratios)):
  421. grid_w = int(np.ceil(w / self.downsample_ratios[lvl]))
  422. grid_h = int(np.ceil(h / self.downsample_ratios[lvl]))
  423. if self.norm_reg_targets:
  424. if self.multiply_strides_reg_targets:
  425. sample['reg_target{}'.format(lvl)] = np.reshape(
  426. reg_targets_by_level[lvl],
  427. newshape=[grid_h, grid_w, 4])
  428. else:
  429. sample['reg_target{}'.format(lvl)] = \
  430. np.reshape(
  431. reg_targets_by_level[lvl] / \
  432. self.downsample_ratios[lvl],
  433. newshape=[grid_h, grid_w, 4])
  434. else:
  435. sample['reg_target{}'.format(lvl)] = np.reshape(
  436. reg_targets_by_level[lvl],
  437. newshape=[grid_h, grid_w, 4])
  438. sample['labels{}'.format(lvl)] = np.reshape(
  439. labels_by_level[lvl], newshape=[grid_h, grid_w, 1])
  440. sample['centerness{}'.format(lvl)] = np.reshape(
  441. ctn_targets_by_level[lvl], newshape=[grid_h, grid_w, 1])
  442. sample.pop('is_crowd', None)
  443. sample.pop('difficult', None)
  444. sample.pop('gt_class', None)
  445. sample.pop('gt_bbox', None)
  446. return samples
  447. @register_op
  448. class Gt2GFLTarget(BaseOperator):
  449. __shared__ = ['num_classes']
  450. """
  451. Generate GFocal loss targets by groud truth data
  452. """
  453. def __init__(self,
  454. num_classes=80,
  455. downsample_ratios=[8, 16, 32, 64, 128],
  456. grid_cell_scale=4,
  457. cell_offset=0,
  458. compute_vlr_region=False):
  459. super(Gt2GFLTarget, self).__init__()
  460. self.num_classes = num_classes
  461. self.downsample_ratios = downsample_ratios
  462. self.grid_cell_scale = grid_cell_scale
  463. self.cell_offset = cell_offset
  464. self.compute_vlr_region = compute_vlr_region
  465. self.assigner = ATSSAssigner()
  466. def get_grid_cells(self, featmap_size, scale, stride, offset=0):
  467. """
  468. Generate grid cells of a feature map for target assignment.
  469. Args:
  470. featmap_size: Size of a single level feature map.
  471. scale: Grid cell scale.
  472. stride: Down sample stride of the feature map.
  473. offset: Offset of grid cells.
  474. return:
  475. Grid_cells xyxy position. Size should be [feat_w * feat_h, 4]
  476. """
  477. cell_size = stride * scale
  478. h, w = featmap_size
  479. x_range = (np.arange(w, dtype=np.float32) + offset) * stride
  480. y_range = (np.arange(h, dtype=np.float32) + offset) * stride
  481. x, y = np.meshgrid(x_range, y_range)
  482. y = y.flatten()
  483. x = x.flatten()
  484. grid_cells = np.stack(
  485. [
  486. x - 0.5 * cell_size, y - 0.5 * cell_size, x + 0.5 * cell_size,
  487. y + 0.5 * cell_size
  488. ],
  489. axis=-1)
  490. return grid_cells
  491. def get_sample(self, assign_gt_inds, gt_bboxes):
  492. pos_inds = np.unique(np.nonzero(assign_gt_inds > 0)[0])
  493. neg_inds = np.unique(np.nonzero(assign_gt_inds == 0)[0])
  494. pos_assigned_gt_inds = assign_gt_inds[pos_inds] - 1
  495. if gt_bboxes.size == 0:
  496. # hack for index error case
  497. assert pos_assigned_gt_inds.size == 0
  498. pos_gt_bboxes = np.empty_like(gt_bboxes).reshape(-1, 4)
  499. else:
  500. if len(gt_bboxes.shape) < 2:
  501. gt_bboxes = gt_bboxes.resize(-1, 4)
  502. pos_gt_bboxes = gt_bboxes[pos_assigned_gt_inds, :]
  503. return pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds
  504. def __call__(self, samples, context=None):
  505. assert len(samples) > 0
  506. batch_size = len(samples)
  507. # get grid cells of image
  508. h, w = samples[0]['image'].shape[1:3]
  509. multi_level_grid_cells = []
  510. for stride in self.downsample_ratios:
  511. featmap_size = (int(math.ceil(h / stride)),
  512. int(math.ceil(w / stride)))
  513. multi_level_grid_cells.append(
  514. self.get_grid_cells(featmap_size, self.grid_cell_scale, stride,
  515. self.cell_offset))
  516. mlvl_grid_cells_list = [
  517. multi_level_grid_cells for i in range(batch_size)
  518. ]
  519. # pixel cell number of multi-level feature maps
  520. num_level_cells = [
  521. grid_cells.shape[0] for grid_cells in mlvl_grid_cells_list[0]
  522. ]
  523. num_level_cells_list = [num_level_cells] * batch_size
  524. # concat all level cells and to a single array
  525. for i in range(batch_size):
  526. mlvl_grid_cells_list[i] = np.concatenate(mlvl_grid_cells_list[i])
  527. # target assign on all images
  528. for sample, grid_cells, num_level_cells in zip(
  529. samples, mlvl_grid_cells_list, num_level_cells_list):
  530. gt_bboxes = sample['gt_bbox']
  531. gt_labels = sample['gt_class'].squeeze()
  532. if gt_labels.size == 1:
  533. gt_labels = np.array([gt_labels]).astype(np.int32)
  534. gt_bboxes_ignore = None
  535. assign_gt_inds, _ = self.assigner(grid_cells, num_level_cells,
  536. gt_bboxes, gt_bboxes_ignore,
  537. gt_labels)
  538. if self.compute_vlr_region:
  539. vlr_region = self.assigner.get_vlr_region(
  540. grid_cells, num_level_cells, gt_bboxes, gt_bboxes_ignore,
  541. gt_labels)
  542. sample['vlr_regions'] = vlr_region
  543. pos_inds, neg_inds, pos_gt_bboxes, pos_assigned_gt_inds = self.get_sample(
  544. assign_gt_inds, gt_bboxes)
  545. num_cells = grid_cells.shape[0]
  546. bbox_targets = np.zeros_like(grid_cells)
  547. bbox_weights = np.zeros_like(grid_cells)
  548. labels = np.ones([num_cells], dtype=np.int64) * self.num_classes
  549. label_weights = np.zeros([num_cells], dtype=np.float32)
  550. if len(pos_inds) > 0:
  551. pos_bbox_targets = pos_gt_bboxes
  552. bbox_targets[pos_inds, :] = pos_bbox_targets
  553. bbox_weights[pos_inds, :] = 1.0
  554. if not np.any(gt_labels):
  555. labels[pos_inds] = 0
  556. else:
  557. labels[pos_inds] = gt_labels[pos_assigned_gt_inds]
  558. label_weights[pos_inds] = 1.0
  559. if len(neg_inds) > 0:
  560. label_weights[neg_inds] = 1.0
  561. sample['grid_cells'] = grid_cells
  562. sample['labels'] = labels
  563. sample['label_weights'] = label_weights
  564. sample['bbox_targets'] = bbox_targets
  565. sample['pos_num'] = max(pos_inds.size, 1)
  566. sample.pop('is_crowd', None)
  567. sample.pop('difficult', None)
  568. sample.pop('gt_class', None)
  569. sample.pop('gt_bbox', None)
  570. sample.pop('gt_score', None)
  571. return samples
  572. @register_op
  573. class Gt2TTFTarget(BaseOperator):
  574. __shared__ = ['num_classes']
  575. """
  576. Gt2TTFTarget
  577. Generate TTFNet targets by ground truth data
  578. Args:
  579. num_classes(int): the number of classes.
  580. down_ratio(int): the down ratio from images to heatmap, 4 by default.
  581. alpha(float): the alpha parameter to generate gaussian target.
  582. 0.54 by default.
  583. """
  584. def __init__(self, num_classes=80, down_ratio=4, alpha=0.54):
  585. super(Gt2TTFTarget, self).__init__()
  586. self.down_ratio = down_ratio
  587. self.num_classes = num_classes
  588. self.alpha = alpha
  589. def __call__(self, samples, context=None):
  590. output_size = samples[0]['image'].shape[1]
  591. feat_size = output_size // self.down_ratio
  592. for sample in samples:
  593. heatmap = np.zeros(
  594. (self.num_classes, feat_size, feat_size), dtype='float32')
  595. box_target = np.ones(
  596. (4, feat_size, feat_size), dtype='float32') * -1
  597. reg_weight = np.zeros((1, feat_size, feat_size), dtype='float32')
  598. gt_bbox = sample['gt_bbox']
  599. gt_class = sample['gt_class']
  600. bbox_w = gt_bbox[:, 2] - gt_bbox[:, 0] + 1
  601. bbox_h = gt_bbox[:, 3] - gt_bbox[:, 1] + 1
  602. area = bbox_w * bbox_h
  603. boxes_areas_log = np.log(area)
  604. boxes_ind = np.argsort(boxes_areas_log, axis=0)[::-1]
  605. boxes_area_topk_log = boxes_areas_log[boxes_ind]
  606. gt_bbox = gt_bbox[boxes_ind]
  607. gt_class = gt_class[boxes_ind]
  608. feat_gt_bbox = gt_bbox / self.down_ratio
  609. feat_gt_bbox = np.clip(feat_gt_bbox, 0, feat_size - 1)
  610. feat_hs, feat_ws = (feat_gt_bbox[:, 3] - feat_gt_bbox[:, 1],
  611. feat_gt_bbox[:, 2] - feat_gt_bbox[:, 0])
  612. ct_inds = np.stack(
  613. [(gt_bbox[:, 0] + gt_bbox[:, 2]) / 2,
  614. (gt_bbox[:, 1] + gt_bbox[:, 3]) / 2],
  615. axis=1) / self.down_ratio
  616. h_radiuses_alpha = (feat_hs / 2. * self.alpha).astype('int32')
  617. w_radiuses_alpha = (feat_ws / 2. * self.alpha).astype('int32')
  618. for k in range(len(gt_bbox)):
  619. cls_id = gt_class[k]
  620. fake_heatmap = np.zeros((feat_size, feat_size), dtype='float32')
  621. self.draw_truncate_gaussian(fake_heatmap, ct_inds[k],
  622. h_radiuses_alpha[k],
  623. w_radiuses_alpha[k])
  624. heatmap[cls_id] = np.maximum(heatmap[cls_id], fake_heatmap)
  625. box_target_inds = fake_heatmap > 0
  626. box_target[:, box_target_inds] = gt_bbox[k][:, None]
  627. local_heatmap = fake_heatmap[box_target_inds]
  628. ct_div = np.sum(local_heatmap)
  629. local_heatmap *= boxes_area_topk_log[k]
  630. reg_weight[0, box_target_inds] = local_heatmap / ct_div
  631. sample['ttf_heatmap'] = heatmap
  632. sample['ttf_box_target'] = box_target
  633. sample['ttf_reg_weight'] = reg_weight
  634. sample.pop('is_crowd', None)
  635. sample.pop('difficult', None)
  636. sample.pop('gt_class', None)
  637. sample.pop('gt_bbox', None)
  638. sample.pop('gt_score', None)
  639. return samples
  640. def draw_truncate_gaussian(self, heatmap, center, h_radius, w_radius):
  641. h, w = 2 * h_radius + 1, 2 * w_radius + 1
  642. sigma_x = w / 6
  643. sigma_y = h / 6
  644. gaussian = gaussian2D((h, w), sigma_x, sigma_y)
  645. x, y = int(center[0]), int(center[1])
  646. height, width = heatmap.shape[0:2]
  647. left, right = min(x, w_radius), min(width - x, w_radius + 1)
  648. top, bottom = min(y, h_radius), min(height - y, h_radius + 1)
  649. masked_heatmap = heatmap[y - top:y + bottom, x - left:x + right]
  650. masked_gaussian = gaussian[h_radius - top:h_radius + bottom, w_radius -
  651. left:w_radius + right]
  652. if min(masked_gaussian.shape) > 0 and min(masked_heatmap.shape) > 0:
  653. heatmap[y - top:y + bottom, x - left:x + right] = np.maximum(
  654. masked_heatmap, masked_gaussian)
  655. return heatmap
  656. @register_op
  657. class Gt2Solov2Target(BaseOperator):
  658. """Assign mask target and labels in SOLOv2 network.
  659. The code of this function is based on:
  660. https://github.com/WXinlong/SOLO/blob/master/mmdet/models/anchor_heads/solov2_head.py#L271
  661. Args:
  662. num_grids (list): The list of feature map grids size.
  663. scale_ranges (list): The list of mask boundary range.
  664. coord_sigma (float): The coefficient of coordinate area length.
  665. sampling_ratio (float): The ratio of down sampling.
  666. """
  667. def __init__(self,
  668. num_grids=[40, 36, 24, 16, 12],
  669. scale_ranges=[[1, 96], [48, 192], [96, 384], [192, 768],
  670. [384, 2048]],
  671. coord_sigma=0.2,
  672. sampling_ratio=4.0):
  673. super(Gt2Solov2Target, self).__init__()
  674. self.num_grids = num_grids
  675. self.scale_ranges = scale_ranges
  676. self.coord_sigma = coord_sigma
  677. self.sampling_ratio = sampling_ratio
  678. def _scale_size(self, im, scale):
  679. h, w = im.shape[:2]
  680. new_size = (int(w * float(scale) + 0.5), int(h * float(scale) + 0.5))
  681. resized_img = cv2.resize(
  682. im, None, None, fx=scale, fy=scale, interpolation=cv2.INTER_LINEAR)
  683. return resized_img
  684. def __call__(self, samples, context=None):
  685. sample_id = 0
  686. max_ins_num = [0] * len(self.num_grids)
  687. for sample in samples:
  688. gt_bboxes_raw = sample['gt_bbox']
  689. gt_labels_raw = sample['gt_class'] + 1
  690. im_c, im_h, im_w = sample['image'].shape[:]
  691. gt_masks_raw = sample['gt_segm'].astype(np.uint8)
  692. mask_feat_size = [
  693. int(im_h / self.sampling_ratio), int(im_w / self.sampling_ratio)
  694. ]
  695. gt_areas = np.sqrt((gt_bboxes_raw[:, 2] - gt_bboxes_raw[:, 0]) *
  696. (gt_bboxes_raw[:, 3] - gt_bboxes_raw[:, 1]))
  697. ins_ind_label_list = []
  698. idx = 0
  699. for (lower_bound, upper_bound), num_grid \
  700. in zip(self.scale_ranges, self.num_grids):
  701. hit_indices = ((gt_areas >= lower_bound) &
  702. (gt_areas <= upper_bound)).nonzero()[0]
  703. num_ins = len(hit_indices)
  704. ins_label = []
  705. grid_order = []
  706. cate_label = np.zeros([num_grid, num_grid], dtype=np.int64)
  707. ins_ind_label = np.zeros([num_grid**2], dtype=np.bool_)
  708. if num_ins == 0:
  709. ins_label = np.zeros(
  710. [1, mask_feat_size[0], mask_feat_size[1]],
  711. dtype=np.uint8)
  712. ins_ind_label_list.append(ins_ind_label)
  713. sample['cate_label{}'.format(idx)] = cate_label.flatten()
  714. sample['ins_label{}'.format(idx)] = ins_label
  715. sample['grid_order{}'.format(idx)] = np.asarray(
  716. [sample_id * num_grid * num_grid + 0], dtype=np.int32)
  717. idx += 1
  718. continue
  719. gt_bboxes = gt_bboxes_raw[hit_indices]
  720. gt_labels = gt_labels_raw[hit_indices]
  721. gt_masks = gt_masks_raw[hit_indices, ...]
  722. half_ws = 0.5 * (
  723. gt_bboxes[:, 2] - gt_bboxes[:, 0]) * self.coord_sigma
  724. half_hs = 0.5 * (
  725. gt_bboxes[:, 3] - gt_bboxes[:, 1]) * self.coord_sigma
  726. for seg_mask, gt_label, half_h, half_w in zip(
  727. gt_masks, gt_labels, half_hs, half_ws):
  728. if seg_mask.sum() == 0:
  729. continue
  730. # mass center
  731. upsampled_size = (mask_feat_size[0] * 4,
  732. mask_feat_size[1] * 4)
  733. center_h, center_w = ndimage.measurements.center_of_mass(
  734. seg_mask)
  735. coord_w = int(
  736. (center_w / upsampled_size[1]) // (1. / num_grid))
  737. coord_h = int(
  738. (center_h / upsampled_size[0]) // (1. / num_grid))
  739. # left, top, right, down
  740. top_box = max(0,
  741. int(((center_h - half_h) / upsampled_size[0])
  742. // (1. / num_grid)))
  743. down_box = min(num_grid - 1,
  744. int(((center_h + half_h) / upsampled_size[0])
  745. // (1. / num_grid)))
  746. left_box = max(0,
  747. int(((center_w - half_w) / upsampled_size[1])
  748. // (1. / num_grid)))
  749. right_box = min(num_grid - 1,
  750. int(((center_w + half_w) /
  751. upsampled_size[1]) // (1. / num_grid)))
  752. top = max(top_box, coord_h - 1)
  753. down = min(down_box, coord_h + 1)
  754. left = max(coord_w - 1, left_box)
  755. right = min(right_box, coord_w + 1)
  756. cate_label[top:(down + 1), left:(right + 1)] = gt_label
  757. seg_mask = self._scale_size(
  758. seg_mask, scale=1. / self.sampling_ratio)
  759. for i in range(top, down + 1):
  760. for j in range(left, right + 1):
  761. label = int(i * num_grid + j)
  762. cur_ins_label = np.zeros(
  763. [mask_feat_size[0], mask_feat_size[1]],
  764. dtype=np.uint8)
  765. cur_ins_label[:seg_mask.shape[0], :seg_mask.shape[
  766. 1]] = seg_mask
  767. ins_label.append(cur_ins_label)
  768. ins_ind_label[label] = True
  769. grid_order.append(sample_id * num_grid * num_grid +
  770. label)
  771. if ins_label == []:
  772. ins_label = np.zeros(
  773. [1, mask_feat_size[0], mask_feat_size[1]],
  774. dtype=np.uint8)
  775. ins_ind_label_list.append(ins_ind_label)
  776. sample['cate_label{}'.format(idx)] = cate_label.flatten()
  777. sample['ins_label{}'.format(idx)] = ins_label
  778. sample['grid_order{}'.format(idx)] = np.asarray(
  779. [sample_id * num_grid * num_grid + 0], dtype=np.int32)
  780. else:
  781. ins_label = np.stack(ins_label, axis=0)
  782. ins_ind_label_list.append(ins_ind_label)
  783. sample['cate_label{}'.format(idx)] = cate_label.flatten()
  784. sample['ins_label{}'.format(idx)] = ins_label
  785. sample['grid_order{}'.format(idx)] = np.asarray(
  786. grid_order, dtype=np.int32)
  787. assert len(grid_order) > 0
  788. max_ins_num[idx] = max(
  789. max_ins_num[idx],
  790. sample['ins_label{}'.format(idx)].shape[0])
  791. idx += 1
  792. ins_ind_labels = np.concatenate([
  793. ins_ind_labels_level_img
  794. for ins_ind_labels_level_img in ins_ind_label_list
  795. ])
  796. fg_num = np.sum(ins_ind_labels)
  797. sample['fg_num'] = fg_num
  798. sample_id += 1
  799. sample.pop('is_crowd')
  800. sample.pop('gt_class')
  801. sample.pop('gt_bbox')
  802. sample.pop('gt_poly')
  803. sample.pop('gt_segm')
  804. # padding batch
  805. for data in samples:
  806. for idx in range(len(self.num_grids)):
  807. gt_ins_data = np.zeros(
  808. [
  809. max_ins_num[idx],
  810. data['ins_label{}'.format(idx)].shape[1],
  811. data['ins_label{}'.format(idx)].shape[2]
  812. ],
  813. dtype=np.uint8)
  814. gt_ins_data[0:data['ins_label{}'.format(idx)].shape[
  815. 0], :, :] = data['ins_label{}'.format(idx)]
  816. gt_grid_order = np.zeros([max_ins_num[idx]], dtype=np.int32)
  817. gt_grid_order[0:data['grid_order{}'.format(idx)].shape[
  818. 0]] = data['grid_order{}'.format(idx)]
  819. data['ins_label{}'.format(idx)] = gt_ins_data
  820. data['grid_order{}'.format(idx)] = gt_grid_order
  821. return samples
  822. @register_op
  823. class Gt2SparseRCNNTarget(BaseOperator):
  824. '''
  825. Generate SparseRCNN targets by groud truth data
  826. '''
  827. def __init__(self):
  828. super(Gt2SparseRCNNTarget, self).__init__()
  829. def __call__(self, samples, context=None):
  830. for sample in samples:
  831. im = sample["image"]
  832. h, w = im.shape[1:3]
  833. img_whwh = np.array([w, h, w, h], dtype=np.int32)
  834. sample["img_whwh"] = img_whwh
  835. if "scale_factor" in sample:
  836. sample["scale_factor_wh"] = np.array(
  837. [sample["scale_factor"][1], sample["scale_factor"][0]],
  838. dtype=np.float32)
  839. else:
  840. sample["scale_factor_wh"] = np.array(
  841. [1.0, 1.0], dtype=np.float32)
  842. return samples
  843. @register_op
  844. class PadMaskBatch(BaseOperator):
  845. """
  846. Pad a batch of samples so they can be divisible by a stride.
  847. The layout of each image should be 'CHW'.
  848. Args:
  849. pad_to_stride (int): If `pad_to_stride > 0`, pad zeros to ensure
  850. height and width is divisible by `pad_to_stride`.
  851. return_pad_mask (bool): If `return_pad_mask = True`, return
  852. `pad_mask` for transformer.
  853. """
  854. def __init__(self, pad_to_stride=0, return_pad_mask=False):
  855. super(PadMaskBatch, self).__init__()
  856. self.pad_to_stride = pad_to_stride
  857. self.return_pad_mask = return_pad_mask
  858. def __call__(self, samples, context=None):
  859. """
  860. Args:
  861. samples (list): a batch of sample, each is dict.
  862. """
  863. coarsest_stride = self.pad_to_stride
  864. max_shape = np.array([data['image'].shape for data in samples]).max(
  865. axis=0)
  866. if coarsest_stride > 0:
  867. max_shape[1] = int(
  868. np.ceil(max_shape[1] / coarsest_stride) * coarsest_stride)
  869. max_shape[2] = int(
  870. np.ceil(max_shape[2] / coarsest_stride) * coarsest_stride)
  871. for data in samples:
  872. im = data['image']
  873. im_c, im_h, im_w = im.shape[:]
  874. padding_im = np.zeros(
  875. (im_c, max_shape[1], max_shape[2]), dtype=np.float32)
  876. padding_im[:, :im_h, :im_w] = im
  877. data['image'] = padding_im
  878. if 'semantic' in data and data['semantic'] is not None:
  879. semantic = data['semantic']
  880. padding_sem = np.zeros(
  881. (1, max_shape[1], max_shape[2]), dtype=np.float32)
  882. padding_sem[:, :im_h, :im_w] = semantic
  883. data['semantic'] = padding_sem
  884. if 'gt_segm' in data and data['gt_segm'] is not None:
  885. gt_segm = data['gt_segm']
  886. padding_segm = np.zeros(
  887. (gt_segm.shape[0], max_shape[1], max_shape[2]),
  888. dtype=np.uint8)
  889. padding_segm[:, :im_h, :im_w] = gt_segm
  890. data['gt_segm'] = padding_segm
  891. if self.return_pad_mask:
  892. padding_mask = np.zeros(
  893. (max_shape[1], max_shape[2]), dtype=np.float32)
  894. padding_mask[:im_h, :im_w] = 1.
  895. data['pad_mask'] = padding_mask
  896. return samples
  897. @register_op
  898. class Gt2CenterNetTarget(BaseOperator):
  899. __shared__ = ['num_classes']
  900. """Gt2CenterNetTarget
  901. Genterate CenterNet targets by ground-truth
  902. Args:
  903. down_ratio (int): The down sample ratio between output feature and
  904. input image.
  905. num_classes (int): The number of classes, 80 by default.
  906. max_objs (int): The maximum objects detected, 128 by default.
  907. """
  908. def __init__(self, num_classes=80, down_ratio=4, max_objs=128):
  909. super(Gt2CenterNetTarget, self).__init__()
  910. self.nc = num_classes
  911. self.down_ratio = down_ratio
  912. self.max_objs = max_objs
  913. def __call__(self, sample, context=None):
  914. input_h, input_w = sample['image'].shape[1:]
  915. output_h = input_h // self.down_ratio
  916. output_w = input_w // self.down_ratio
  917. gt_bbox = sample['gt_bbox']
  918. gt_class = sample['gt_class']
  919. hm = np.zeros((self.nc, output_h, output_w), dtype=np.float32)
  920. wh = np.zeros((self.max_objs, 2), dtype=np.float32)
  921. reg = np.zeros((self.max_objs, 2), dtype=np.float32)
  922. ind = np.zeros((self.max_objs), dtype=np.int64)
  923. reg_mask = np.zeros((self.max_objs), dtype=np.int32)
  924. cat_spec_wh = np.zeros((self.max_objs, self.nc * 2), dtype=np.float32)
  925. cat_spec_mask = np.zeros((self.max_objs, self.nc * 2), dtype=np.int32)
  926. trans_output = get_affine_transform(
  927. center=sample['center'],
  928. input_size=[sample['scale'], sample['scale']],
  929. rot=0,
  930. output_size=[output_w, output_h])
  931. gt_det = []
  932. for i, (bbox, cls) in enumerate(zip(gt_bbox, gt_class)):
  933. cls = int(cls)
  934. bbox[:2] = affine_transform(bbox[:2], trans_output)
  935. bbox[2:] = affine_transform(bbox[2:], trans_output)
  936. bbox_amodal = copy.deepcopy(bbox)
  937. bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
  938. bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
  939. h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
  940. if h > 0 and w > 0:
  941. radius = gaussian_radius((math.ceil(h), math.ceil(w)), 0.7)
  942. radius = max(0, int(radius))
  943. ct = np.array(
  944. [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
  945. dtype=np.float32)
  946. ct_int = ct.astype(np.int32)
  947. # get hm,wh,reg,ind,ind_mask
  948. draw_umich_gaussian(hm[cls], ct_int, radius)
  949. wh[i] = 1. * w, 1. * h
  950. reg[i] = ct - ct_int
  951. ind[i] = ct_int[1] * output_w + ct_int[0]
  952. reg_mask[i] = 1
  953. cat_spec_wh[i, cls * 2:cls * 2 + 2] = wh[i]
  954. cat_spec_mask[i, cls * 2:cls * 2 + 2] = 1
  955. gt_det.append([
  956. ct[0] - w / 2, ct[1] - h / 2, ct[0] + w / 2, ct[1] + h / 2,
  957. 1, cls
  958. ])
  959. sample.pop('gt_bbox', None)
  960. sample.pop('gt_class', None)
  961. sample.pop('center', None)
  962. sample.pop('scale', None)
  963. sample.pop('is_crowd', None)
  964. sample.pop('difficult', None)
  965. sample['index'] = ind
  966. sample['index_mask'] = reg_mask
  967. sample['heatmap'] = hm
  968. sample['size'] = wh
  969. sample['offset'] = reg
  970. return sample
  971. @register_op
  972. class PadGT(BaseOperator):
  973. """
  974. Pad 0 to `gt_class`, `gt_bbox`, `gt_score`...
  975. The num_max_boxes is the largest for batch.
  976. Args:
  977. return_gt_mask (bool): If true, return `pad_gt_mask`,
  978. 1 means bbox, 0 means no bbox.
  979. """
  980. def __init__(self, return_gt_mask=True):
  981. super(PadGT, self).__init__()
  982. self.return_gt_mask = return_gt_mask
  983. def __call__(self, samples, context=None):
  984. num_max_boxes = max([len(s['gt_bbox']) for s in samples])
  985. for sample in samples:
  986. if self.return_gt_mask:
  987. sample['pad_gt_mask'] = np.zeros(
  988. (num_max_boxes, 1), dtype=np.float32)
  989. if num_max_boxes == 0:
  990. continue
  991. num_gt = len(sample['gt_bbox'])
  992. pad_gt_class = np.zeros((num_max_boxes, 1), dtype=np.int32)
  993. pad_gt_bbox = np.zeros((num_max_boxes, 4), dtype=np.float32)
  994. if num_gt > 0:
  995. pad_gt_class[:num_gt] = sample['gt_class']
  996. pad_gt_bbox[:num_gt] = sample['gt_bbox']
  997. sample['gt_class'] = pad_gt_class
  998. sample['gt_bbox'] = pad_gt_bbox
  999. # pad_gt_mask
  1000. if 'pad_gt_mask' in sample:
  1001. sample['pad_gt_mask'][:num_gt] = 1
  1002. # gt_score
  1003. if 'gt_score' in sample:
  1004. pad_gt_score = np.zeros((num_max_boxes, 1), dtype=np.float32)
  1005. if num_gt > 0:
  1006. pad_gt_score[:num_gt] = sample['gt_score']
  1007. sample['gt_score'] = pad_gt_score
  1008. if 'is_crowd' in sample:
  1009. pad_is_crowd = np.zeros((num_max_boxes, 1), dtype=np.int32)
  1010. if num_gt > 0:
  1011. pad_is_crowd[:num_gt] = sample['is_crowd']
  1012. sample['is_crowd'] = pad_is_crowd
  1013. if 'difficult' in sample:
  1014. pad_diff = np.zeros((num_max_boxes, 1), dtype=np.int32)
  1015. if num_gt > 0:
  1016. pad_diff[:num_gt] = sample['difficult']
  1017. sample['difficult'] = pad_diff
  1018. return samples
  1019. @register_op
  1020. class PadRGT(BaseOperator):
  1021. """
  1022. Pad 0 to `gt_class`, `gt_bbox`, `gt_score`...
  1023. The num_max_boxes is the largest for batch.
  1024. Args:
  1025. return_gt_mask (bool): If true, return `pad_gt_mask`,
  1026. 1 means bbox, 0 means no bbox.
  1027. """
  1028. def __init__(self, return_gt_mask=True):
  1029. super(PadRGT, self).__init__()
  1030. self.return_gt_mask = return_gt_mask
  1031. def pad_field(self, sample, field, num_gt):
  1032. name, shape, dtype = field
  1033. if name in sample:
  1034. pad_v = np.zeros(shape, dtype=dtype)
  1035. if num_gt > 0:
  1036. pad_v[:num_gt] = sample[name]
  1037. sample[name] = pad_v
  1038. def __call__(self, samples, context=None):
  1039. num_max_boxes = max([len(s['gt_bbox']) for s in samples])
  1040. for sample in samples:
  1041. if self.return_gt_mask:
  1042. sample['pad_gt_mask'] = np.zeros(
  1043. (num_max_boxes, 1), dtype=np.float32)
  1044. if num_max_boxes == 0:
  1045. continue
  1046. num_gt = len(sample['gt_bbox'])
  1047. pad_gt_class = np.zeros((num_max_boxes, 1), dtype=np.int32)
  1048. pad_gt_bbox = np.zeros((num_max_boxes, 4), dtype=np.float32)
  1049. if num_gt > 0:
  1050. pad_gt_class[:num_gt] = sample['gt_class']
  1051. pad_gt_bbox[:num_gt] = sample['gt_bbox']
  1052. sample['gt_class'] = pad_gt_class
  1053. sample['gt_bbox'] = pad_gt_bbox
  1054. # pad_gt_mask
  1055. if 'pad_gt_mask' in sample:
  1056. sample['pad_gt_mask'][:num_gt] = 1
  1057. # gt_score
  1058. names = ['gt_score', 'is_crowd', 'difficult', 'gt_poly', 'gt_rbox']
  1059. dims = [1, 1, 1, 8, 5]
  1060. dtypes = [np.float32, np.int32, np.int32, np.float32, np.float32]
  1061. for name, dim, dtype in zip(names, dims, dtypes):
  1062. self.pad_field(sample, [name, (num_max_boxes, dim), dtype],
  1063. num_gt)
  1064. return samples
  1065. @register_op
  1066. class Gt2CenterTrackTarget(BaseOperator):
  1067. __shared__ = ['num_classes']
  1068. """Gt2CenterTrackTarget
  1069. Genterate CenterTrack targets by ground-truth
  1070. Args:
  1071. num_classes (int): The number of classes, 1 by default.
  1072. down_ratio (int): The down sample ratio between output feature and
  1073. input image.
  1074. max_objs (int): The maximum objects detected, 256 by default.
  1075. """
  1076. def __init__(self,
  1077. num_classes=1,
  1078. down_ratio=4,
  1079. max_objs=256,
  1080. hm_disturb=0.05,
  1081. lost_disturb=0.4,
  1082. fp_disturb=0.1,
  1083. pre_hm=True,
  1084. add_tracking=True,
  1085. add_ltrb_amodal=True):
  1086. super(Gt2CenterTrackTarget, self).__init__()
  1087. self.nc = num_classes
  1088. self.down_ratio = down_ratio
  1089. self.max_objs = max_objs
  1090. self.hm_disturb = hm_disturb
  1091. self.lost_disturb = lost_disturb
  1092. self.fp_disturb = fp_disturb
  1093. self.pre_hm = pre_hm
  1094. self.add_tracking = add_tracking
  1095. self.add_ltrb_amodal = add_ltrb_amodal
  1096. def _get_pre_dets(self, input_h, input_w, trans_input_pre, gt_bbox_pre,
  1097. gt_class_pre, gt_track_id_pre):
  1098. hm_h, hm_w = input_h, input_w
  1099. reutrn_hm = self.pre_hm
  1100. pre_hm = np.zeros(
  1101. (1, hm_h, hm_w), dtype=np.float32) if reutrn_hm else None
  1102. pre_cts, track_ids = [], []
  1103. for i, (
  1104. bbox, cls, track_id
  1105. ) in enumerate(zip(gt_bbox_pre, gt_class_pre, gt_track_id_pre)):
  1106. cls = int(cls)
  1107. bbox[:2] = affine_transform(bbox[:2], trans_input_pre)
  1108. bbox[2:] = affine_transform(bbox[2:], trans_input_pre)
  1109. bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, hm_w - 1)
  1110. bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, hm_h - 1)
  1111. h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
  1112. max_rad = 1
  1113. if (h > 0 and w > 0):
  1114. radius = gaussian_radius((math.ceil(h), math.ceil(w)), 0.7)
  1115. radius = max(0, int(radius))
  1116. max_rad = max(max_rad, radius)
  1117. ct = np.array(
  1118. [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
  1119. dtype=np.float32)
  1120. ct0 = ct.copy()
  1121. conf = 1
  1122. ct[0] = ct[0] + np.random.randn() * self.hm_disturb * w
  1123. ct[1] = ct[1] + np.random.randn() * self.hm_disturb * h
  1124. conf = 1 if np.random.rand() > self.lost_disturb else 0
  1125. ct_int = ct.astype(np.int32)
  1126. if conf == 0:
  1127. pre_cts.append(ct / self.down_ratio)
  1128. else:
  1129. pre_cts.append(ct0 / self.down_ratio)
  1130. track_ids.append(track_id)
  1131. if reutrn_hm:
  1132. draw_umich_gaussian(pre_hm[0], ct_int, radius, k=conf)
  1133. if np.random.rand() < self.fp_disturb and reutrn_hm:
  1134. ct2 = ct0.copy()
  1135. # Hard code heatmap disturb ratio, haven't tried other numbers.
  1136. ct2[0] = ct2[0] + np.random.randn() * 0.05 * w
  1137. ct2[1] = ct2[1] + np.random.randn() * 0.05 * h
  1138. ct2_int = ct2.astype(np.int32)
  1139. draw_umich_gaussian(pre_hm[0], ct2_int, radius, k=conf)
  1140. return pre_hm, pre_cts, track_ids
  1141. def __call__(self, sample, context=None):
  1142. input_h, input_w = sample['image'].shape[1:]
  1143. output_h = input_h // self.down_ratio
  1144. output_w = input_w // self.down_ratio
  1145. gt_bbox = sample['gt_bbox']
  1146. gt_class = sample['gt_class']
  1147. # init
  1148. hm = np.zeros((self.nc, output_h, output_w), dtype=np.float32)
  1149. wh = np.zeros((self.max_objs, 2), dtype=np.float32)
  1150. reg = np.zeros((self.max_objs, 2), dtype=np.float32)
  1151. ind = np.zeros((self.max_objs), dtype=np.int64)
  1152. reg_mask = np.zeros((self.max_objs), dtype=np.int32)
  1153. if self.add_tracking:
  1154. tr = np.zeros((self.max_objs, 2), dtype=np.float32)
  1155. if self.add_ltrb_amodal:
  1156. ltrb_amodal = np.zeros((self.max_objs, 4), dtype=np.float32)
  1157. trans_output = get_affine_transform(
  1158. center=sample['center'],
  1159. input_size=[sample['scale'], sample['scale']],
  1160. rot=0,
  1161. output_size=[output_w, output_h])
  1162. pre_hm, pre_cts, track_ids = self._get_pre_dets(
  1163. input_h, input_w, sample['trans_input'], sample['pre_gt_bbox'],
  1164. sample['pre_gt_class'], sample['pre_gt_track_id'])
  1165. for i, (bbox, cls) in enumerate(zip(gt_bbox, gt_class)):
  1166. cls = int(cls)
  1167. rect = np.array(
  1168. [[bbox[0], bbox[1]], [bbox[0], bbox[3]], [bbox[2], bbox[3]],
  1169. [bbox[2], bbox[1]]],
  1170. dtype=np.float32)
  1171. for t in range(4):
  1172. rect[t] = affine_transform(rect[t], trans_output)
  1173. bbox[:2] = rect[:, 0].min(), rect[:, 1].min()
  1174. bbox[2:] = rect[:, 0].max(), rect[:, 1].max()
  1175. bbox_amodal = copy.deepcopy(bbox)
  1176. bbox[[0, 2]] = np.clip(bbox[[0, 2]], 0, output_w - 1)
  1177. bbox[[1, 3]] = np.clip(bbox[[1, 3]], 0, output_h - 1)
  1178. h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
  1179. if h > 0 and w > 0:
  1180. radius = gaussian_radius((math.ceil(h), math.ceil(w)), 0.7)
  1181. radius = max(0, int(radius))
  1182. ct = np.array(
  1183. [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
  1184. dtype=np.float32)
  1185. ct_int = ct.astype(np.int32)
  1186. # get hm,wh,reg,ind,ind_mask
  1187. draw_umich_gaussian(hm[cls], ct_int, radius)
  1188. wh[i] = 1. * w, 1. * h
  1189. reg[i] = ct - ct_int
  1190. ind[i] = ct_int[1] * output_w + ct_int[0]
  1191. reg_mask[i] = 1
  1192. if self.add_tracking:
  1193. if sample['gt_track_id'][i] in track_ids:
  1194. pre_ct = pre_cts[track_ids.index(sample['gt_track_id'][
  1195. i])]
  1196. tr[i] = pre_ct - ct_int
  1197. if self.add_ltrb_amodal:
  1198. ltrb_amodal[i] = \
  1199. bbox_amodal[0] - ct_int[0], bbox_amodal[1] - ct_int[1], \
  1200. bbox_amodal[2] - ct_int[0], bbox_amodal[3] - ct_int[1]
  1201. new_sample = {'image': sample['image']}
  1202. new_sample['index'] = ind
  1203. new_sample['index_mask'] = reg_mask
  1204. new_sample['heatmap'] = hm
  1205. new_sample['size'] = wh
  1206. new_sample['offset'] = reg
  1207. if self.add_tracking:
  1208. new_sample['tracking'] = tr
  1209. if self.add_ltrb_amodal:
  1210. new_sample['ltrb_amodal'] = ltrb_amodal
  1211. new_sample['pre_image'] = sample['pre_image']
  1212. new_sample['pre_hm'] = pre_hm
  1213. del sample
  1214. return new_sample