annotation_cropper.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580
  1. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import copy
  15. import math
  16. import random
  17. import numpy as np
  18. from copy import deepcopy
  19. from typing import List, Tuple
  20. from collections import defaultdict
  21. from .chip_box_utils import nms, transform_chip_boxes2image_boxes
  22. from .chip_box_utils import find_chips_to_cover_overlaped_boxes
  23. from .chip_box_utils import transform_chip_box
  24. from .chip_box_utils import intersection_over_box
  25. class AnnoCropper(object):
  26. def __init__(self,
  27. image_target_sizes: List[int],
  28. valid_box_ratio_ranges: List[List[float]],
  29. chip_target_size: int,
  30. chip_target_stride: int,
  31. use_neg_chip: bool=False,
  32. max_neg_num_per_im: int=8,
  33. max_per_img: int=-1,
  34. nms_thresh: int=0.5):
  35. """
  36. Generate chips by chip_target_size and chip_target_stride.
  37. These two parameters just like kernel_size and stride in cnn.
  38. Each image has its raw size. After resizing, then get its target size.
  39. The resizing scale = target_size / raw_size.
  40. So are chips of the image.
  41. box_ratio = box_raw_size / image_raw_size = box_target_size / image_target_size
  42. The 'size' above mentioned is the size of long-side of image, box or chip.
  43. :param image_target_sizes: [2000, 1000]
  44. :param valid_box_ratio_ranges: [[-1, 0.1],[0.08, -1]]
  45. :param chip_target_size: 500
  46. :param chip_target_stride: 200
  47. """
  48. self.target_sizes = image_target_sizes
  49. self.valid_box_ratio_ranges = valid_box_ratio_ranges
  50. assert len(self.target_sizes) == len(self.valid_box_ratio_ranges)
  51. self.scale_num = len(self.target_sizes)
  52. self.chip_target_size = chip_target_size # is target size
  53. self.chip_target_stride = chip_target_stride # is target stride
  54. self.use_neg_chip = use_neg_chip
  55. self.max_neg_num_per_im = max_neg_num_per_im
  56. self.max_per_img = max_per_img
  57. self.nms_thresh = nms_thresh
  58. def crop_anno_records(self, records: List[dict]):
  59. """
  60. The main logic:
  61. # foreach record(image):
  62. # foreach scale:
  63. # 1 generate chips by chip size and stride for each scale
  64. # 2 get pos chips
  65. # - validate boxes: current scale; h,w >= 1
  66. # - find pos chips greedily by valid gt boxes in each scale
  67. # - for every valid gt box, find its corresponding pos chips in each scale
  68. # 3 get neg chips
  69. # - If given proposals, find neg boxes in them which are not in pos chips
  70. # - If got neg boxes in last step, we find neg chips and assign neg boxes to neg chips such as 2.
  71. # 4 sample neg chips if too much each image
  72. # transform this image-scale annotations to chips(pos chips&neg chips) annotations
  73. :param records, standard coco_record but with extra key `proposals`(Px4), which are predicted by stage1
  74. model and maybe have neg boxes in them.
  75. :return: new_records, list of dict like
  76. {
  77. 'im_file': 'fake_image1.jpg',
  78. 'im_id': np.array([1]), # new _global_chip_id as im_id
  79. 'h': h, # chip height
  80. 'w': w, # chip width
  81. 'is_crowd': is_crowd, # Nx1 -> Mx1
  82. 'gt_class': gt_class, # Nx1 -> Mx1
  83. 'gt_bbox': gt_bbox, # Nx4 -> Mx4, 4 represents [x1,y1,x2,y2]
  84. 'gt_poly': gt_poly, # [None]xN -> [None]xM
  85. 'chip': [x1, y1, x2, y2] # added
  86. }
  87. Attention:
  88. ------------------------------>x
  89. |
  90. | (x1,y1)------
  91. | | |
  92. | | |
  93. | | |
  94. | | |
  95. | | |
  96. | ----------
  97. | (x2,y2)
  98. |
  99. y
  100. If we use [x1, y1, x2, y2] to represent boxes or chips,
  101. (x1,y1) is the left-top point which is in the box,
  102. but (x2,y2) is the right-bottom point which is not in the box.
  103. So x1 in [0, w-1], x2 in [1, w], y1 in [0, h-1], y2 in [1,h].
  104. And you can use x2-x1 to get width, and you can use image[y1:y2, x1:x2] to get the box area.
  105. """
  106. self.chip_records = []
  107. self._global_chip_id = 1
  108. for r in records:
  109. self._cur_im_pos_chips = [
  110. ] # element: (chip, boxes_idx), chip is [x1, y1, x2, y2], boxes_ids is List[int]
  111. self._cur_im_neg_chips = [] # element: (chip, neg_box_num)
  112. for scale_i in range(self.scale_num):
  113. self._get_current_scale_parameters(scale_i, r)
  114. # Cx4
  115. chips = self._create_chips(r['h'], r['w'], self._cur_scale)
  116. # # dict: chipid->[box_id, ...]
  117. pos_chip2boxes_idx = self._get_valid_boxes_and_pos_chips(
  118. r['gt_bbox'], chips)
  119. # dict: chipid->neg_box_num
  120. neg_chip2box_num = self._get_neg_boxes_and_chips(
  121. chips,
  122. list(pos_chip2boxes_idx.keys()), r.get('proposals', None))
  123. self._add_to_cur_im_chips(chips, pos_chip2boxes_idx,
  124. neg_chip2box_num)
  125. cur_image_records = self._trans_all_chips2annotations(r)
  126. self.chip_records.extend(cur_image_records)
  127. return self.chip_records
  128. def _add_to_cur_im_chips(self, chips, pos_chip2boxes_idx, neg_chip2box_num):
  129. for pos_chipid, boxes_idx in pos_chip2boxes_idx.items():
  130. chip = np.array(chips[pos_chipid]) # copy chips slice
  131. self._cur_im_pos_chips.append((chip, boxes_idx))
  132. if neg_chip2box_num is None:
  133. return
  134. for neg_chipid, neg_box_num in neg_chip2box_num.items():
  135. chip = np.array(chips[neg_chipid])
  136. self._cur_im_neg_chips.append((chip, neg_box_num))
  137. def _trans_all_chips2annotations(self, r):
  138. gt_bbox = r['gt_bbox']
  139. im_file = r['im_file']
  140. is_crowd = r['is_crowd']
  141. gt_class = r['gt_class']
  142. # gt_poly = r['gt_poly'] # [None]xN
  143. # remaining keys: im_id, h, w
  144. chip_records = self._trans_pos_chips2annotations(im_file, gt_bbox,
  145. is_crowd, gt_class)
  146. if not self.use_neg_chip:
  147. return chip_records
  148. sampled_neg_chips = self._sample_neg_chips()
  149. neg_chip_records = self._trans_neg_chips2annotations(im_file,
  150. sampled_neg_chips)
  151. chip_records.extend(neg_chip_records)
  152. return chip_records
  153. def _trans_pos_chips2annotations(self, im_file, gt_bbox, is_crowd,
  154. gt_class):
  155. chip_records = []
  156. for chip, boxes_idx in self._cur_im_pos_chips:
  157. chip_bbox, final_boxes_idx = transform_chip_box(gt_bbox, boxes_idx,
  158. chip)
  159. x1, y1, x2, y2 = chip
  160. chip_h = y2 - y1
  161. chip_w = x2 - x1
  162. rec = {
  163. 'im_file': im_file,
  164. 'im_id': np.array([self._global_chip_id]),
  165. 'h': chip_h,
  166. 'w': chip_w,
  167. 'gt_bbox': chip_bbox,
  168. 'is_crowd': is_crowd[final_boxes_idx].copy(),
  169. 'gt_class': gt_class[final_boxes_idx].copy(),
  170. # 'gt_poly': [None] * len(final_boxes_idx),
  171. 'chip': chip
  172. }
  173. self._global_chip_id += 1
  174. chip_records.append(rec)
  175. return chip_records
  176. def _sample_neg_chips(self):
  177. pos_num = len(self._cur_im_pos_chips)
  178. neg_num = len(self._cur_im_neg_chips)
  179. sample_num = min(pos_num + 2, self.max_neg_num_per_im)
  180. assert sample_num >= 1
  181. if neg_num <= sample_num:
  182. return self._cur_im_neg_chips
  183. candidate_num = int(sample_num * 1.5)
  184. candidate_neg_chips = sorted(
  185. self._cur_im_neg_chips, key=lambda x: -x[1])[:candidate_num]
  186. random.shuffle(candidate_neg_chips)
  187. sampled_neg_chips = candidate_neg_chips[:sample_num]
  188. return sampled_neg_chips
  189. def _trans_neg_chips2annotations(self,
  190. im_file: str,
  191. sampled_neg_chips: List[Tuple]):
  192. chip_records = []
  193. for chip, neg_box_num in sampled_neg_chips:
  194. x1, y1, x2, y2 = chip
  195. chip_h = y2 - y1
  196. chip_w = x2 - x1
  197. rec = {
  198. 'im_file': im_file,
  199. 'im_id': np.array([self._global_chip_id]),
  200. 'h': chip_h,
  201. 'w': chip_w,
  202. 'gt_bbox': np.zeros(
  203. (0, 4), dtype=np.float32),
  204. 'is_crowd': np.zeros(
  205. (0, 1), dtype=np.int32),
  206. 'gt_class': np.zeros(
  207. (0, 1), dtype=np.int32),
  208. # 'gt_poly': [],
  209. 'chip': chip
  210. }
  211. self._global_chip_id += 1
  212. chip_records.append(rec)
  213. return chip_records
  214. def _get_current_scale_parameters(self, scale_i, r):
  215. im_size = max(r['h'], r['w'])
  216. im_target_size = self.target_sizes[scale_i]
  217. self._cur_im_size, self._cur_im_target_size = im_size, im_target_size
  218. self._cur_scale = self._get_current_scale(im_target_size, im_size)
  219. self._cur_valid_ratio_range = self.valid_box_ratio_ranges[scale_i]
  220. def _get_current_scale(self, im_target_size, im_size):
  221. return im_target_size / im_size
  222. def _create_chips(self, h: int, w: int, scale: float):
  223. """
  224. Generate chips by chip_target_size and chip_target_stride.
  225. These two parameters just like kernel_size and stride in cnn.
  226. :return: chips, Cx4, xy in raw size dimension
  227. """
  228. chip_size = self.chip_target_size # omit target for simplicity
  229. stride = self.chip_target_stride
  230. width = int(scale * w)
  231. height = int(scale * h)
  232. min_chip_location_diff = 20 # in target size
  233. assert chip_size >= stride
  234. chip_overlap = chip_size - stride
  235. if (width - chip_overlap
  236. ) % stride > min_chip_location_diff: # 不能被stride整除的部分比较大,则保留
  237. w_steps = max(1, int(math.ceil((width - chip_overlap) / stride)))
  238. else: # 不能被stride整除的部分比较小,则丢弃
  239. w_steps = max(1, int(math.floor((width - chip_overlap) / stride)))
  240. if (height - chip_overlap) % stride > min_chip_location_diff:
  241. h_steps = max(1, int(math.ceil((height - chip_overlap) / stride)))
  242. else:
  243. h_steps = max(1, int(math.floor((height - chip_overlap) / stride)))
  244. chips = list()
  245. for j in range(h_steps):
  246. for i in range(w_steps):
  247. x1 = i * stride
  248. y1 = j * stride
  249. x2 = min(x1 + chip_size, width)
  250. y2 = min(y1 + chip_size, height)
  251. chips.append([x1, y1, x2, y2])
  252. # check chip size
  253. for item in chips:
  254. if item[2] - item[0] > chip_size * 1.1 or item[3] - item[
  255. 1] > chip_size * 1.1:
  256. raise ValueError(item)
  257. chips = np.array(chips, dtype=np.float32)
  258. raw_size_chips = chips / scale
  259. return raw_size_chips
  260. def _get_valid_boxes_and_pos_chips(self, gt_bbox, chips):
  261. valid_ratio_range = self._cur_valid_ratio_range
  262. im_size = self._cur_im_size
  263. scale = self._cur_scale
  264. # Nx4 N
  265. valid_boxes, valid_boxes_idx = self._validate_boxes(
  266. valid_ratio_range, im_size, gt_bbox, scale)
  267. # dict: chipid->[box_id, ...]
  268. pos_chip2boxes_idx = self._find_pos_chips(chips, valid_boxes,
  269. valid_boxes_idx)
  270. return pos_chip2boxes_idx
  271. def _validate_boxes(self,
  272. valid_ratio_range: List[float],
  273. im_size: int,
  274. gt_boxes: 'np.array of Nx4',
  275. scale: float):
  276. """
  277. :return: valid_boxes: Nx4, valid_boxes_idx: N
  278. """
  279. ws = (gt_boxes[:, 2] - gt_boxes[:, 0]).astype(np.int32)
  280. hs = (gt_boxes[:, 3] - gt_boxes[:, 1]).astype(np.int32)
  281. maxs = np.maximum(ws, hs)
  282. box_ratio = maxs / im_size
  283. mins = np.minimum(ws, hs)
  284. target_mins = mins * scale
  285. low = valid_ratio_range[0] if valid_ratio_range[0] > 0 else 0
  286. high = valid_ratio_range[1] if valid_ratio_range[1] > 0 else np.finfo(
  287. np.float32).max
  288. valid_boxes_idx = np.nonzero((low <= box_ratio) & (box_ratio < high) & (
  289. target_mins >= 2))[0]
  290. valid_boxes = gt_boxes[valid_boxes_idx]
  291. return valid_boxes, valid_boxes_idx
  292. def _find_pos_chips(self,
  293. chips: 'Cx4',
  294. valid_boxes: 'Bx4',
  295. valid_boxes_idx: 'B'):
  296. """
  297. :return: pos_chip2boxes_idx, dict: chipid->[box_id, ...]
  298. """
  299. iob = intersection_over_box(chips, valid_boxes) # overlap, CxB
  300. iob_threshold_to_find_chips = 1.
  301. pos_chip_ids, _ = self._find_chips_to_cover_overlaped_boxes(
  302. iob, iob_threshold_to_find_chips)
  303. pos_chip_ids = set(pos_chip_ids)
  304. iob_threshold_to_assign_box = 0.5
  305. pos_chip2boxes_idx = self._assign_boxes_to_pos_chips(
  306. iob, iob_threshold_to_assign_box, pos_chip_ids, valid_boxes_idx)
  307. return pos_chip2boxes_idx
  308. def _find_chips_to_cover_overlaped_boxes(self, iob, overlap_threshold):
  309. return find_chips_to_cover_overlaped_boxes(iob, overlap_threshold)
  310. def _assign_boxes_to_pos_chips(self, iob, overlap_threshold, pos_chip_ids,
  311. valid_boxes_idx):
  312. chip_ids, box_ids = np.nonzero(iob >= overlap_threshold)
  313. pos_chip2boxes_idx = defaultdict(list)
  314. for chip_id, box_id in zip(chip_ids, box_ids):
  315. if chip_id not in pos_chip_ids:
  316. continue
  317. raw_gt_box_idx = valid_boxes_idx[box_id]
  318. pos_chip2boxes_idx[chip_id].append(raw_gt_box_idx)
  319. return pos_chip2boxes_idx
  320. def _get_neg_boxes_and_chips(self,
  321. chips: 'Cx4',
  322. pos_chip_ids: 'D',
  323. proposals: 'Px4'):
  324. """
  325. :param chips:
  326. :param pos_chip_ids:
  327. :param proposals:
  328. :return: neg_chip2box_num, None or dict: chipid->neg_box_num
  329. """
  330. if not self.use_neg_chip:
  331. return None
  332. # train proposals maybe None
  333. if proposals is None or len(proposals) < 1:
  334. return None
  335. valid_ratio_range = self._cur_valid_ratio_range
  336. im_size = self._cur_im_size
  337. scale = self._cur_scale
  338. valid_props, _ = self._validate_boxes(valid_ratio_range, im_size,
  339. proposals, scale)
  340. neg_boxes = self._find_neg_boxes(chips, pos_chip_ids, valid_props)
  341. neg_chip2box_num = self._find_neg_chips(chips, pos_chip_ids, neg_boxes)
  342. return neg_chip2box_num
  343. def _find_neg_boxes(self,
  344. chips: 'Cx4',
  345. pos_chip_ids: 'D',
  346. valid_props: 'Px4'):
  347. """
  348. :return: neg_boxes: Nx4
  349. """
  350. if len(pos_chip_ids) == 0:
  351. return valid_props
  352. pos_chips = chips[pos_chip_ids]
  353. iob = intersection_over_box(pos_chips, valid_props)
  354. overlap_per_prop = np.max(iob, axis=0)
  355. non_overlap_props_idx = overlap_per_prop < 0.5
  356. neg_boxes = valid_props[non_overlap_props_idx]
  357. return neg_boxes
  358. def _find_neg_chips(self, chips: 'Cx4', pos_chip_ids: 'D',
  359. neg_boxes: 'Nx4'):
  360. """
  361. :return: neg_chip2box_num, dict: chipid->neg_box_num
  362. """
  363. neg_chip_ids = np.setdiff1d(np.arange(len(chips)), pos_chip_ids)
  364. neg_chips = chips[neg_chip_ids]
  365. iob = intersection_over_box(neg_chips, neg_boxes)
  366. iob_threshold_to_find_chips = 0.7
  367. chosen_neg_chip_ids, chip_id2overlap_box_num = \
  368. self._find_chips_to_cover_overlaped_boxes(iob, iob_threshold_to_find_chips)
  369. neg_chipid2box_num = {}
  370. for cid in chosen_neg_chip_ids:
  371. box_num = chip_id2overlap_box_num[cid]
  372. raw_chip_id = neg_chip_ids[cid]
  373. neg_chipid2box_num[raw_chip_id] = box_num
  374. return neg_chipid2box_num
  375. def crop_infer_anno_records(self, records: List[dict]):
  376. """
  377. transform image record to chips record
  378. :param records:
  379. :return: new_records, list of dict like
  380. {
  381. 'im_file': 'fake_image1.jpg',
  382. 'im_id': np.array([1]), # new _global_chip_id as im_id
  383. 'h': h, # chip height
  384. 'w': w, # chip width
  385. 'chip': [x1, y1, x2, y2] # added
  386. 'ori_im_h': ori_im_h # added, origin image height
  387. 'ori_im_w': ori_im_w # added, origin image width
  388. 'scale_i': 0 # added,
  389. }
  390. """
  391. self.chip_records = []
  392. self._global_chip_id = 1 # im_id start from 1
  393. self._global_chip_id2img_id = {}
  394. for r in records:
  395. for scale_i in range(self.scale_num):
  396. self._get_current_scale_parameters(scale_i, r)
  397. # Cx4
  398. chips = self._create_chips(r['h'], r['w'], self._cur_scale)
  399. cur_img_chip_record = self._get_chips_records(r, chips, scale_i)
  400. self.chip_records.extend(cur_img_chip_record)
  401. return self.chip_records
  402. def _get_chips_records(self, rec, chips, scale_i):
  403. cur_img_chip_records = []
  404. ori_im_h = rec["h"]
  405. ori_im_w = rec["w"]
  406. im_file = rec["im_file"]
  407. ori_im_id = rec["im_id"]
  408. for id, chip in enumerate(chips):
  409. chip_rec = {}
  410. x1, y1, x2, y2 = chip
  411. chip_h = y2 - y1
  412. chip_w = x2 - x1
  413. chip_rec["im_file"] = im_file
  414. chip_rec["im_id"] = self._global_chip_id
  415. chip_rec["h"] = chip_h
  416. chip_rec["w"] = chip_w
  417. chip_rec["chip"] = chip
  418. chip_rec["ori_im_h"] = ori_im_h
  419. chip_rec["ori_im_w"] = ori_im_w
  420. chip_rec["scale_i"] = scale_i
  421. self._global_chip_id2img_id[self._global_chip_id] = int(ori_im_id)
  422. self._global_chip_id += 1
  423. cur_img_chip_records.append(chip_rec)
  424. return cur_img_chip_records
  425. def aggregate_chips_detections(self, results, records=None):
  426. """
  427. # 1. transform chip dets to image dets
  428. # 2. nms boxes per image;
  429. # 3. format output results
  430. :param results:
  431. :param roidb:
  432. :return:
  433. """
  434. results = deepcopy(results)
  435. records = records if records else self.chip_records
  436. img_id2bbox = self._transform_chip2image_bboxes(results, records)
  437. nms_img_id2bbox = self._nms_dets(img_id2bbox)
  438. aggregate_results = self._reformat_results(nms_img_id2bbox)
  439. return aggregate_results
  440. def _transform_chip2image_bboxes(self, results, records):
  441. # 1. Transform chip dets to image dets;
  442. # 2. Filter valid range;
  443. # 3. Reformat and Aggregate chip dets to Get scale_cls_dets
  444. img_id2bbox = defaultdict(list)
  445. for result in results:
  446. bbox_locs = result['bbox']
  447. bbox_nums = result['bbox_num']
  448. if len(bbox_locs) == 1 and bbox_locs[0][
  449. 0] == -1: # current batch has no detections
  450. # bbox_locs = array([[-1.]], dtype=float32); bbox_nums = [[1]]
  451. # MultiClassNMS output: If there is no detected boxes for all images, lod will be set to {1} and Out only contains one value which is -1.
  452. continue
  453. im_ids = result['im_id'] # replace with range(len(bbox_nums))
  454. last_bbox_num = 0
  455. for idx, im_id in enumerate(im_ids):
  456. cur_bbox_len = bbox_nums[idx]
  457. bboxes = bbox_locs[last_bbox_num:last_bbox_num + cur_bbox_len]
  458. last_bbox_num += cur_bbox_len
  459. # box: [num_id, score, xmin, ymin, xmax, ymax]
  460. if len(bboxes) == 0: # current image has no detections
  461. continue
  462. chip_rec = records[int(im_id) -
  463. 1] # im_id starts from 1, type is np.int64
  464. image_size = max(chip_rec["ori_im_h"], chip_rec["ori_im_w"])
  465. bboxes = transform_chip_boxes2image_boxes(
  466. bboxes, chip_rec["chip"], chip_rec["ori_im_h"],
  467. chip_rec["ori_im_w"])
  468. scale_i = chip_rec["scale_i"]
  469. cur_scale = self._get_current_scale(self.target_sizes[scale_i],
  470. image_size)
  471. _, valid_boxes_idx = self._validate_boxes(
  472. self.valid_box_ratio_ranges[scale_i], image_size,
  473. bboxes[:, 2:], cur_scale)
  474. ori_img_id = self._global_chip_id2img_id[int(im_id)]
  475. img_id2bbox[ori_img_id].append(bboxes[valid_boxes_idx])
  476. return img_id2bbox
  477. def _nms_dets(self, img_id2bbox):
  478. # 1. NMS on each image-class
  479. # 2. Limit number of detections to MAX_PER_IMAGE if requested
  480. max_per_img = self.max_per_img
  481. nms_thresh = self.nms_thresh
  482. for img_id in img_id2bbox:
  483. box = img_id2bbox[
  484. img_id] # list of np.array of shape [N, 6], 6 is [label, score, x1, y1, x2, y2]
  485. box = np.concatenate(box, axis=0)
  486. nms_dets = nms(box, nms_thresh)
  487. if max_per_img > 0:
  488. if len(nms_dets) > max_per_img:
  489. keep = np.argsort(-nms_dets[:, 1])[:max_per_img]
  490. nms_dets = nms_dets[keep]
  491. img_id2bbox[img_id] = nms_dets
  492. return img_id2bbox
  493. def _reformat_results(self, img_id2bbox):
  494. """reformat results"""
  495. im_ids = img_id2bbox.keys()
  496. results = []
  497. for img_id in im_ids: # output by original im_id order
  498. if len(img_id2bbox[img_id]) == 0:
  499. bbox = np.array(
  500. [[-1., 0., 0., 0., 0., 0.]]) # edge case: no detections
  501. bbox_num = np.array([0])
  502. else:
  503. # np.array of shape [N, 6], 6 is [label, score, x1, y1, x2, y2]
  504. bbox = img_id2bbox[img_id]
  505. bbox_num = np.array([len(bbox)])
  506. res = dict(im_id=np.array([[img_id]]), bbox=bbox, bbox_num=bbox_num)
  507. results.append(res)
  508. return results