target.py 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678
  1. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import numpy as np
  15. import paddle
  16. from ..bbox_utils import bbox2delta, bbox_overlaps
  17. def rpn_anchor_target(anchors,
  18. gt_boxes,
  19. rpn_batch_size_per_im,
  20. rpn_positive_overlap,
  21. rpn_negative_overlap,
  22. rpn_fg_fraction,
  23. use_random=True,
  24. batch_size=1,
  25. ignore_thresh=-1,
  26. is_crowd=None,
  27. weights=[1., 1., 1., 1.],
  28. assign_on_cpu=False):
  29. tgt_labels = []
  30. tgt_bboxes = []
  31. tgt_deltas = []
  32. for i in range(batch_size):
  33. gt_bbox = gt_boxes[i]
  34. is_crowd_i = is_crowd[i] if is_crowd else None
  35. # Step1: match anchor and gt_bbox
  36. matches, match_labels = label_box(
  37. anchors, gt_bbox, rpn_positive_overlap, rpn_negative_overlap, True,
  38. ignore_thresh, is_crowd_i, assign_on_cpu)
  39. # Step2: sample anchor
  40. fg_inds, bg_inds = subsample_labels(match_labels, rpn_batch_size_per_im,
  41. rpn_fg_fraction, 0, use_random)
  42. # Fill with the ignore label (-1), then set positive and negative labels
  43. labels = paddle.full(match_labels.shape, -1, dtype='int32')
  44. if bg_inds.shape[0] > 0:
  45. labels = paddle.scatter(labels, bg_inds, paddle.zeros_like(bg_inds))
  46. if fg_inds.shape[0] > 0:
  47. labels = paddle.scatter(labels, fg_inds, paddle.ones_like(fg_inds))
  48. # Step3: make output
  49. if gt_bbox.shape[0] == 0:
  50. matched_gt_boxes = paddle.zeros([matches.shape[0], 4])
  51. tgt_delta = paddle.zeros([matches.shape[0], 4])
  52. else:
  53. matched_gt_boxes = paddle.gather(gt_bbox, matches)
  54. tgt_delta = bbox2delta(anchors, matched_gt_boxes, weights)
  55. matched_gt_boxes.stop_gradient = True
  56. tgt_delta.stop_gradient = True
  57. labels.stop_gradient = True
  58. tgt_labels.append(labels)
  59. tgt_bboxes.append(matched_gt_boxes)
  60. tgt_deltas.append(tgt_delta)
  61. return tgt_labels, tgt_bboxes, tgt_deltas
  62. def label_box(anchors,
  63. gt_boxes,
  64. positive_overlap,
  65. negative_overlap,
  66. allow_low_quality,
  67. ignore_thresh,
  68. is_crowd=None,
  69. assign_on_cpu=False):
  70. if assign_on_cpu:
  71. device = paddle.device.get_device()
  72. paddle.set_device("cpu")
  73. iou = bbox_overlaps(gt_boxes, anchors)
  74. paddle.set_device(device)
  75. else:
  76. iou = bbox_overlaps(gt_boxes, anchors)
  77. n_gt = gt_boxes.shape[0]
  78. if n_gt == 0 or is_crowd is None:
  79. n_gt_crowd = 0
  80. else:
  81. n_gt_crowd = paddle.nonzero(is_crowd).shape[0]
  82. if iou.shape[0] == 0 or n_gt_crowd == n_gt:
  83. # No truth, assign everything to background
  84. default_matches = paddle.full((iou.shape[1], ), 0, dtype='int64')
  85. default_match_labels = paddle.full((iou.shape[1], ), 0, dtype='int32')
  86. return default_matches, default_match_labels
  87. # if ignore_thresh > 0, remove anchor if it is closed to
  88. # one of the crowded ground-truth
  89. if n_gt_crowd > 0:
  90. N_a = anchors.shape[0]
  91. ones = paddle.ones([N_a])
  92. mask = is_crowd * ones
  93. if ignore_thresh > 0:
  94. crowd_iou = iou * mask
  95. valid = (paddle.sum((crowd_iou > ignore_thresh).cast('int32'),
  96. axis=0) > 0).cast('float32')
  97. iou = iou * (1 - valid) - valid
  98. # ignore the iou between anchor and crowded ground-truth
  99. iou = iou * (1 - mask) - mask
  100. matched_vals, matches = paddle.topk(iou, k=1, axis=0)
  101. match_labels = paddle.full(matches.shape, -1, dtype='int32')
  102. # set ignored anchor with iou = -1
  103. neg_cond = paddle.logical_and(matched_vals > -1,
  104. matched_vals < negative_overlap)
  105. match_labels = paddle.where(neg_cond,
  106. paddle.zeros_like(match_labels), match_labels)
  107. match_labels = paddle.where(matched_vals >= positive_overlap,
  108. paddle.ones_like(match_labels), match_labels)
  109. if allow_low_quality:
  110. highest_quality_foreach_gt = iou.max(axis=1, keepdim=True)
  111. pred_inds_with_highest_quality = paddle.logical_and(
  112. iou > 0, iou == highest_quality_foreach_gt).cast('int32').sum(
  113. 0, keepdim=True)
  114. match_labels = paddle.where(pred_inds_with_highest_quality > 0,
  115. paddle.ones_like(match_labels),
  116. match_labels)
  117. matches = matches.flatten()
  118. match_labels = match_labels.flatten()
  119. return matches, match_labels
  120. def subsample_labels(labels,
  121. num_samples,
  122. fg_fraction,
  123. bg_label=0,
  124. use_random=True):
  125. positive = paddle.nonzero(
  126. paddle.logical_and(labels != -1, labels != bg_label))
  127. negative = paddle.nonzero(labels == bg_label)
  128. fg_num = int(num_samples * fg_fraction)
  129. fg_num = min(positive.numel(), fg_num)
  130. bg_num = num_samples - fg_num
  131. bg_num = min(negative.numel(), bg_num)
  132. if fg_num == 0 and bg_num == 0:
  133. fg_inds = paddle.zeros([0], dtype='int32')
  134. bg_inds = paddle.zeros([0], dtype='int32')
  135. return fg_inds, bg_inds
  136. # randomly select positive and negative examples
  137. negative = negative.cast('int32').flatten()
  138. bg_perm = paddle.randperm(negative.numel(), dtype='int32')
  139. bg_perm = paddle.slice(bg_perm, axes=[0], starts=[0], ends=[bg_num])
  140. if use_random:
  141. bg_inds = paddle.gather(negative, bg_perm)
  142. else:
  143. bg_inds = paddle.slice(negative, axes=[0], starts=[0], ends=[bg_num])
  144. if fg_num == 0:
  145. fg_inds = paddle.zeros([0], dtype='int32')
  146. return fg_inds, bg_inds
  147. positive = positive.cast('int32').flatten()
  148. fg_perm = paddle.randperm(positive.numel(), dtype='int32')
  149. fg_perm = paddle.slice(fg_perm, axes=[0], starts=[0], ends=[fg_num])
  150. if use_random:
  151. fg_inds = paddle.gather(positive, fg_perm)
  152. else:
  153. fg_inds = paddle.slice(positive, axes=[0], starts=[0], ends=[fg_num])
  154. return fg_inds, bg_inds
  155. def generate_proposal_target(rpn_rois,
  156. gt_classes,
  157. gt_boxes,
  158. batch_size_per_im,
  159. fg_fraction,
  160. fg_thresh,
  161. bg_thresh,
  162. num_classes,
  163. ignore_thresh=-1.,
  164. is_crowd=None,
  165. use_random=True,
  166. is_cascade=False,
  167. cascade_iou=0.5,
  168. assign_on_cpu=False,
  169. add_gt_as_proposals=True):
  170. rois_with_gt = []
  171. tgt_labels = []
  172. tgt_bboxes = []
  173. tgt_gt_inds = []
  174. new_rois_num = []
  175. # In cascade rcnn, the threshold for foreground and background
  176. # is used from cascade_iou
  177. fg_thresh = cascade_iou if is_cascade else fg_thresh
  178. bg_thresh = cascade_iou if is_cascade else bg_thresh
  179. for i, rpn_roi in enumerate(rpn_rois):
  180. gt_bbox = gt_boxes[i]
  181. is_crowd_i = is_crowd[i] if is_crowd else None
  182. gt_class = paddle.squeeze(gt_classes[i], axis=-1)
  183. # Concat RoIs and gt boxes except cascade rcnn or none gt
  184. if add_gt_as_proposals and gt_bbox.shape[0] > 0:
  185. bbox = paddle.concat([rpn_roi, gt_bbox])
  186. else:
  187. bbox = rpn_roi
  188. # Step1: label bbox
  189. matches, match_labels = label_box(bbox, gt_bbox, fg_thresh, bg_thresh,
  190. False, ignore_thresh, is_crowd_i,
  191. assign_on_cpu)
  192. # Step2: sample bbox
  193. sampled_inds, sampled_gt_classes = sample_bbox(
  194. matches, match_labels, gt_class, batch_size_per_im, fg_fraction,
  195. num_classes, use_random, is_cascade)
  196. # Step3: make output
  197. rois_per_image = bbox if is_cascade else paddle.gather(bbox,
  198. sampled_inds)
  199. sampled_gt_ind = matches if is_cascade else paddle.gather(matches,
  200. sampled_inds)
  201. if gt_bbox.shape[0] > 0:
  202. sampled_bbox = paddle.gather(gt_bbox, sampled_gt_ind)
  203. else:
  204. num = rois_per_image.shape[0]
  205. sampled_bbox = paddle.zeros([num, 4], dtype='float32')
  206. rois_per_image.stop_gradient = True
  207. sampled_gt_ind.stop_gradient = True
  208. sampled_bbox.stop_gradient = True
  209. tgt_labels.append(sampled_gt_classes)
  210. tgt_bboxes.append(sampled_bbox)
  211. rois_with_gt.append(rois_per_image)
  212. tgt_gt_inds.append(sampled_gt_ind)
  213. new_rois_num.append(paddle.shape(sampled_inds)[0])
  214. new_rois_num = paddle.concat(new_rois_num)
  215. return rois_with_gt, tgt_labels, tgt_bboxes, tgt_gt_inds, new_rois_num
  216. def sample_bbox(matches,
  217. match_labels,
  218. gt_classes,
  219. batch_size_per_im,
  220. fg_fraction,
  221. num_classes,
  222. use_random=True,
  223. is_cascade=False):
  224. n_gt = gt_classes.shape[0]
  225. if n_gt == 0:
  226. # No truth, assign everything to background
  227. gt_classes = paddle.ones(matches.shape, dtype='int32') * num_classes
  228. #return matches, match_labels + num_classes
  229. else:
  230. gt_classes = paddle.gather(gt_classes, matches)
  231. gt_classes = paddle.where(match_labels == 0,
  232. paddle.ones_like(gt_classes) * num_classes,
  233. gt_classes)
  234. gt_classes = paddle.where(match_labels == -1,
  235. paddle.ones_like(gt_classes) * -1, gt_classes)
  236. if is_cascade:
  237. index = paddle.arange(matches.shape[0])
  238. return index, gt_classes
  239. rois_per_image = int(batch_size_per_im)
  240. fg_inds, bg_inds = subsample_labels(gt_classes, rois_per_image, fg_fraction,
  241. num_classes, use_random)
  242. if fg_inds.shape[0] == 0 and bg_inds.shape[0] == 0:
  243. # fake output labeled with -1 when all boxes are neither
  244. # foreground nor background
  245. sampled_inds = paddle.zeros([1], dtype='int32')
  246. else:
  247. sampled_inds = paddle.concat([fg_inds, bg_inds])
  248. sampled_gt_classes = paddle.gather(gt_classes, sampled_inds)
  249. return sampled_inds, sampled_gt_classes
  250. def polygons_to_mask(polygons, height, width):
  251. """
  252. Convert the polygons to mask format
  253. Args:
  254. polygons (list[ndarray]): each array has shape (Nx2,)
  255. height (int): mask height
  256. width (int): mask width
  257. Returns:
  258. ndarray: a bool mask of shape (height, width)
  259. """
  260. import pycocotools.mask as mask_util
  261. assert len(polygons) > 0, "COCOAPI does not support empty polygons"
  262. rles = mask_util.frPyObjects(polygons, height, width)
  263. rle = mask_util.merge(rles)
  264. return mask_util.decode(rle).astype(np.bool_)
  265. def rasterize_polygons_within_box(poly, box, resolution):
  266. w, h = box[2] - box[0], box[3] - box[1]
  267. polygons = [np.asarray(p, dtype=np.float64) for p in poly]
  268. for p in polygons:
  269. p[0::2] = p[0::2] - box[0]
  270. p[1::2] = p[1::2] - box[1]
  271. ratio_h = resolution / max(h, 0.1)
  272. ratio_w = resolution / max(w, 0.1)
  273. if ratio_h == ratio_w:
  274. for p in polygons:
  275. p *= ratio_h
  276. else:
  277. for p in polygons:
  278. p[0::2] *= ratio_w
  279. p[1::2] *= ratio_h
  280. # 3. Rasterize the polygons with coco api
  281. mask = polygons_to_mask(polygons, resolution, resolution)
  282. mask = paddle.to_tensor(mask, dtype='int32')
  283. return mask
  284. def generate_mask_target(gt_segms, rois, labels_int32, sampled_gt_inds,
  285. num_classes, resolution):
  286. mask_rois = []
  287. mask_rois_num = []
  288. tgt_masks = []
  289. tgt_classes = []
  290. mask_index = []
  291. tgt_weights = []
  292. for k in range(len(rois)):
  293. labels_per_im = labels_int32[k]
  294. # select rois labeled with foreground
  295. fg_inds = paddle.nonzero(
  296. paddle.logical_and(labels_per_im != -1, labels_per_im !=
  297. num_classes))
  298. has_fg = True
  299. # generate fake roi if foreground is empty
  300. if fg_inds.numel() == 0:
  301. has_fg = False
  302. fg_inds = paddle.ones([1, 1], dtype='int64')
  303. inds_per_im = sampled_gt_inds[k]
  304. inds_per_im = paddle.gather(inds_per_im, fg_inds)
  305. rois_per_im = rois[k]
  306. fg_rois = paddle.gather(rois_per_im, fg_inds)
  307. # Copy the foreground roi to cpu
  308. # to generate mask target with ground-truth
  309. boxes = fg_rois.numpy()
  310. gt_segms_per_im = gt_segms[k]
  311. new_segm = []
  312. inds_per_im = inds_per_im.numpy()
  313. if len(gt_segms_per_im) > 0:
  314. for i in inds_per_im:
  315. new_segm.append(gt_segms_per_im[i])
  316. fg_inds_new = fg_inds.reshape([-1]).numpy()
  317. results = []
  318. if len(gt_segms_per_im) > 0:
  319. for j in range(fg_inds_new.shape[0]):
  320. results.append(
  321. rasterize_polygons_within_box(new_segm[j], boxes[j],
  322. resolution))
  323. else:
  324. results.append(paddle.ones([resolution, resolution], dtype='int32'))
  325. fg_classes = paddle.gather(labels_per_im, fg_inds)
  326. weight = paddle.ones([fg_rois.shape[0]], dtype='float32')
  327. if not has_fg:
  328. # now all sampled classes are background
  329. # which will cause error in loss calculation,
  330. # make fake classes with weight of 0.
  331. fg_classes = paddle.zeros([1], dtype='int32')
  332. weight = weight - 1
  333. tgt_mask = paddle.stack(results)
  334. tgt_mask.stop_gradient = True
  335. fg_rois.stop_gradient = True
  336. mask_index.append(fg_inds)
  337. mask_rois.append(fg_rois)
  338. mask_rois_num.append(paddle.shape(fg_rois)[0])
  339. tgt_classes.append(fg_classes)
  340. tgt_masks.append(tgt_mask)
  341. tgt_weights.append(weight)
  342. mask_index = paddle.concat(mask_index)
  343. mask_rois_num = paddle.concat(mask_rois_num)
  344. tgt_classes = paddle.concat(tgt_classes, axis=0)
  345. tgt_masks = paddle.concat(tgt_masks, axis=0)
  346. tgt_weights = paddle.concat(tgt_weights, axis=0)
  347. return mask_rois, mask_rois_num, tgt_classes, tgt_masks, mask_index, tgt_weights
  348. def libra_sample_pos(max_overlaps, max_classes, pos_inds, num_expected):
  349. if len(pos_inds) <= num_expected:
  350. return pos_inds
  351. else:
  352. unique_gt_inds = np.unique(max_classes[pos_inds])
  353. num_gts = len(unique_gt_inds)
  354. num_per_gt = int(round(num_expected / float(num_gts)) + 1)
  355. sampled_inds = []
  356. for i in unique_gt_inds:
  357. inds = np.nonzero(max_classes == i)[0]
  358. before_len = len(inds)
  359. inds = list(set(inds) & set(pos_inds))
  360. after_len = len(inds)
  361. if len(inds) > num_per_gt:
  362. inds = np.random.choice(inds, size=num_per_gt, replace=False)
  363. sampled_inds.extend(list(inds)) # combine as a new sampler
  364. if len(sampled_inds) < num_expected:
  365. num_extra = num_expected - len(sampled_inds)
  366. extra_inds = np.array(list(set(pos_inds) - set(sampled_inds)))
  367. assert len(sampled_inds) + len(extra_inds) == len(pos_inds), \
  368. "sum of sampled_inds({}) and extra_inds({}) length must be equal with pos_inds({})!".format(
  369. len(sampled_inds), len(extra_inds), len(pos_inds))
  370. if len(extra_inds) > num_extra:
  371. extra_inds = np.random.choice(
  372. extra_inds, size=num_extra, replace=False)
  373. sampled_inds.extend(extra_inds.tolist())
  374. elif len(sampled_inds) > num_expected:
  375. sampled_inds = np.random.choice(
  376. sampled_inds, size=num_expected, replace=False)
  377. return paddle.to_tensor(sampled_inds)
  378. def libra_sample_via_interval(max_overlaps, full_set, num_expected, floor_thr,
  379. num_bins, bg_thresh):
  380. max_iou = max_overlaps.max()
  381. iou_interval = (max_iou - floor_thr) / num_bins
  382. per_num_expected = int(num_expected / num_bins)
  383. sampled_inds = []
  384. for i in range(num_bins):
  385. start_iou = floor_thr + i * iou_interval
  386. end_iou = floor_thr + (i + 1) * iou_interval
  387. tmp_set = set(
  388. np.where(
  389. np.logical_and(max_overlaps >= start_iou, max_overlaps <
  390. end_iou))[0])
  391. tmp_inds = list(tmp_set & full_set)
  392. if len(tmp_inds) > per_num_expected:
  393. tmp_sampled_set = np.random.choice(
  394. tmp_inds, size=per_num_expected, replace=False)
  395. else:
  396. tmp_sampled_set = np.array(tmp_inds, dtype=np.int32)
  397. sampled_inds.append(tmp_sampled_set)
  398. sampled_inds = np.concatenate(sampled_inds)
  399. if len(sampled_inds) < num_expected:
  400. num_extra = num_expected - len(sampled_inds)
  401. extra_inds = np.array(list(full_set - set(sampled_inds)))
  402. assert len(sampled_inds) + len(extra_inds) == len(full_set), \
  403. "sum of sampled_inds({}) and extra_inds({}) length must be equal with full_set({})!".format(
  404. len(sampled_inds), len(extra_inds), len(full_set))
  405. if len(extra_inds) > num_extra:
  406. extra_inds = np.random.choice(extra_inds, num_extra, replace=False)
  407. sampled_inds = np.concatenate([sampled_inds, extra_inds])
  408. return sampled_inds
  409. def libra_sample_neg(max_overlaps,
  410. max_classes,
  411. neg_inds,
  412. num_expected,
  413. floor_thr=-1,
  414. floor_fraction=0,
  415. num_bins=3,
  416. bg_thresh=0.5):
  417. if len(neg_inds) <= num_expected:
  418. return neg_inds
  419. else:
  420. # balance sampling for negative samples
  421. neg_set = set(neg_inds.tolist())
  422. if floor_thr > 0:
  423. floor_set = set(
  424. np.where(
  425. np.logical_and(max_overlaps >= 0, max_overlaps < floor_thr))
  426. [0])
  427. iou_sampling_set = set(np.where(max_overlaps >= floor_thr)[0])
  428. elif floor_thr == 0:
  429. floor_set = set(np.where(max_overlaps == 0)[0])
  430. iou_sampling_set = set(np.where(max_overlaps > floor_thr)[0])
  431. else:
  432. floor_set = set()
  433. iou_sampling_set = set(np.where(max_overlaps > floor_thr)[0])
  434. floor_thr = 0
  435. floor_neg_inds = list(floor_set & neg_set)
  436. iou_sampling_neg_inds = list(iou_sampling_set & neg_set)
  437. num_expected_iou_sampling = int(num_expected * (1 - floor_fraction))
  438. if len(iou_sampling_neg_inds) > num_expected_iou_sampling:
  439. if num_bins >= 2:
  440. iou_sampled_inds = libra_sample_via_interval(
  441. max_overlaps,
  442. set(iou_sampling_neg_inds), num_expected_iou_sampling,
  443. floor_thr, num_bins, bg_thresh)
  444. else:
  445. iou_sampled_inds = np.random.choice(
  446. iou_sampling_neg_inds,
  447. size=num_expected_iou_sampling,
  448. replace=False)
  449. else:
  450. iou_sampled_inds = np.array(iou_sampling_neg_inds, dtype=np.int32)
  451. num_expected_floor = num_expected - len(iou_sampled_inds)
  452. if len(floor_neg_inds) > num_expected_floor:
  453. sampled_floor_inds = np.random.choice(
  454. floor_neg_inds, size=num_expected_floor, replace=False)
  455. else:
  456. sampled_floor_inds = np.array(floor_neg_inds, dtype=np.int32)
  457. sampled_inds = np.concatenate((sampled_floor_inds, iou_sampled_inds))
  458. if len(sampled_inds) < num_expected:
  459. num_extra = num_expected - len(sampled_inds)
  460. extra_inds = np.array(list(neg_set - set(sampled_inds)))
  461. if len(extra_inds) > num_extra:
  462. extra_inds = np.random.choice(
  463. extra_inds, size=num_extra, replace=False)
  464. sampled_inds = np.concatenate((sampled_inds, extra_inds))
  465. return paddle.to_tensor(sampled_inds)
  466. def libra_label_box(anchors, gt_boxes, gt_classes, positive_overlap,
  467. negative_overlap, num_classes):
  468. # TODO: use paddle API to speed up
  469. gt_classes = gt_classes.numpy()
  470. gt_overlaps = np.zeros((anchors.shape[0], num_classes))
  471. matches = np.zeros((anchors.shape[0]), dtype=np.int32)
  472. if len(gt_boxes) > 0:
  473. proposal_to_gt_overlaps = bbox_overlaps(anchors, gt_boxes).numpy()
  474. overlaps_argmax = proposal_to_gt_overlaps.argmax(axis=1)
  475. overlaps_max = proposal_to_gt_overlaps.max(axis=1)
  476. # Boxes which with non-zero overlap with gt boxes
  477. overlapped_boxes_ind = np.where(overlaps_max > 0)[0]
  478. overlapped_boxes_gt_classes = gt_classes[overlaps_argmax[
  479. overlapped_boxes_ind]]
  480. for idx in range(len(overlapped_boxes_ind)):
  481. gt_overlaps[overlapped_boxes_ind[idx], overlapped_boxes_gt_classes[
  482. idx]] = overlaps_max[overlapped_boxes_ind[idx]]
  483. matches[overlapped_boxes_ind[idx]] = overlaps_argmax[
  484. overlapped_boxes_ind[idx]]
  485. gt_overlaps = paddle.to_tensor(gt_overlaps)
  486. matches = paddle.to_tensor(matches)
  487. matched_vals = paddle.max(gt_overlaps, axis=1)
  488. match_labels = paddle.full(matches.shape, -1, dtype='int32')
  489. match_labels = paddle.where(matched_vals < negative_overlap,
  490. paddle.zeros_like(match_labels), match_labels)
  491. match_labels = paddle.where(matched_vals >= positive_overlap,
  492. paddle.ones_like(match_labels), match_labels)
  493. return matches, match_labels, matched_vals
  494. def libra_sample_bbox(matches,
  495. match_labels,
  496. matched_vals,
  497. gt_classes,
  498. batch_size_per_im,
  499. num_classes,
  500. fg_fraction,
  501. fg_thresh,
  502. bg_thresh,
  503. num_bins,
  504. use_random=True,
  505. is_cascade_rcnn=False):
  506. rois_per_image = int(batch_size_per_im)
  507. fg_rois_per_im = int(np.round(fg_fraction * rois_per_image))
  508. bg_rois_per_im = rois_per_image - fg_rois_per_im
  509. if is_cascade_rcnn:
  510. fg_inds = paddle.nonzero(matched_vals >= fg_thresh)
  511. bg_inds = paddle.nonzero(matched_vals < bg_thresh)
  512. else:
  513. matched_vals_np = matched_vals.numpy()
  514. match_labels_np = match_labels.numpy()
  515. # sample fg
  516. fg_inds = paddle.nonzero(matched_vals >= fg_thresh).flatten()
  517. fg_nums = int(np.minimum(fg_rois_per_im, fg_inds.shape[0]))
  518. if (fg_inds.shape[0] > fg_nums) and use_random:
  519. fg_inds = libra_sample_pos(matched_vals_np, match_labels_np,
  520. fg_inds.numpy(), fg_rois_per_im)
  521. fg_inds = fg_inds[:fg_nums]
  522. # sample bg
  523. bg_inds = paddle.nonzero(matched_vals < bg_thresh).flatten()
  524. bg_nums = int(np.minimum(rois_per_image - fg_nums, bg_inds.shape[0]))
  525. if (bg_inds.shape[0] > bg_nums) and use_random:
  526. bg_inds = libra_sample_neg(
  527. matched_vals_np,
  528. match_labels_np,
  529. bg_inds.numpy(),
  530. bg_rois_per_im,
  531. num_bins=num_bins,
  532. bg_thresh=bg_thresh)
  533. bg_inds = bg_inds[:bg_nums]
  534. sampled_inds = paddle.concat([fg_inds, bg_inds])
  535. gt_classes = paddle.gather(gt_classes, matches)
  536. gt_classes = paddle.where(match_labels == 0,
  537. paddle.ones_like(gt_classes) * num_classes,
  538. gt_classes)
  539. gt_classes = paddle.where(match_labels == -1,
  540. paddle.ones_like(gt_classes) * -1, gt_classes)
  541. sampled_gt_classes = paddle.gather(gt_classes, sampled_inds)
  542. return sampled_inds, sampled_gt_classes
  543. def libra_generate_proposal_target(rpn_rois,
  544. gt_classes,
  545. gt_boxes,
  546. batch_size_per_im,
  547. fg_fraction,
  548. fg_thresh,
  549. bg_thresh,
  550. num_classes,
  551. use_random=True,
  552. is_cascade_rcnn=False,
  553. max_overlaps=None,
  554. num_bins=3):
  555. rois_with_gt = []
  556. tgt_labels = []
  557. tgt_bboxes = []
  558. sampled_max_overlaps = []
  559. tgt_gt_inds = []
  560. new_rois_num = []
  561. for i, rpn_roi in enumerate(rpn_rois):
  562. max_overlap = max_overlaps[i] if is_cascade_rcnn else None
  563. gt_bbox = gt_boxes[i]
  564. gt_class = paddle.squeeze(gt_classes[i], axis=-1)
  565. if is_cascade_rcnn:
  566. rpn_roi = filter_roi(rpn_roi, max_overlap)
  567. bbox = paddle.concat([rpn_roi, gt_bbox])
  568. # Step1: label bbox
  569. matches, match_labels, matched_vals = libra_label_box(
  570. bbox, gt_bbox, gt_class, fg_thresh, bg_thresh, num_classes)
  571. # Step2: sample bbox
  572. sampled_inds, sampled_gt_classes = libra_sample_bbox(
  573. matches, match_labels, matched_vals, gt_class, batch_size_per_im,
  574. num_classes, fg_fraction, fg_thresh, bg_thresh, num_bins,
  575. use_random, is_cascade_rcnn)
  576. # Step3: make output
  577. rois_per_image = paddle.gather(bbox, sampled_inds)
  578. sampled_gt_ind = paddle.gather(matches, sampled_inds)
  579. sampled_bbox = paddle.gather(gt_bbox, sampled_gt_ind)
  580. sampled_overlap = paddle.gather(matched_vals, sampled_inds)
  581. rois_per_image.stop_gradient = True
  582. sampled_gt_ind.stop_gradient = True
  583. sampled_bbox.stop_gradient = True
  584. sampled_overlap.stop_gradient = True
  585. tgt_labels.append(sampled_gt_classes)
  586. tgt_bboxes.append(sampled_bbox)
  587. rois_with_gt.append(rois_per_image)
  588. sampled_max_overlaps.append(sampled_overlap)
  589. tgt_gt_inds.append(sampled_gt_ind)
  590. new_rois_num.append(paddle.shape(sampled_inds)[0])
  591. new_rois_num = paddle.concat(new_rois_num)
  592. # rois_with_gt, tgt_labels, tgt_bboxes, tgt_gt_inds, new_rois_num
  593. return rois_with_gt, tgt_labels, tgt_bboxes, tgt_gt_inds, new_rois_num