task_aligned_assigner_cr.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181
  1. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. from __future__ import absolute_import
  15. from __future__ import division
  16. from __future__ import print_function
  17. import paddle
  18. import paddle.nn as nn
  19. import paddle.nn.functional as F
  20. from ppdet.core.workspace import register
  21. from ..bbox_utils import batch_iou_similarity
  22. from .utils import (gather_topk_anchors, check_points_inside_bboxes,
  23. compute_max_iou_anchor)
  24. __all__ = ['TaskAlignedAssigner_CR']
  25. @register
  26. class TaskAlignedAssigner_CR(nn.Layer):
  27. """TOOD: Task-aligned One-stage Object Detection with Center R
  28. """
  29. def __init__(self,
  30. topk=13,
  31. alpha=1.0,
  32. beta=6.0,
  33. center_radius=None,
  34. eps=1e-9):
  35. super(TaskAlignedAssigner_CR, self).__init__()
  36. self.topk = topk
  37. self.alpha = alpha
  38. self.beta = beta
  39. self.center_radius = center_radius
  40. self.eps = eps
  41. @paddle.no_grad()
  42. def forward(self,
  43. pred_scores,
  44. pred_bboxes,
  45. anchor_points,
  46. stride_tensor,
  47. gt_labels,
  48. gt_bboxes,
  49. pad_gt_mask,
  50. bg_index,
  51. gt_scores=None):
  52. r"""This code is based on
  53. https://github.com/fcjian/TOOD/blob/master/mmdet/core/bbox/assigners/task_aligned_assigner.py
  54. The assignment is done in following steps
  55. 1. compute alignment metric between all bbox (bbox of all pyramid levels) and gt
  56. 2. select top-k bbox as candidates for each gt
  57. 3. limit the positive sample's center in gt (because the anchor-free detector
  58. only can predict positive distance)
  59. 4. if an anchor box is assigned to multiple gts, the one with the
  60. highest iou will be selected.
  61. Args:
  62. pred_scores (Tensor, float32): predicted class probability, shape(B, L, C)
  63. pred_bboxes (Tensor, float32): predicted bounding boxes, shape(B, L, 4)
  64. anchor_points (Tensor, float32): pre-defined anchors, shape(L, 2), "cxcy" format
  65. stride_tensor (Tensor, float32): stride of feature map, shape(L, 1)
  66. gt_labels (Tensor, int64|int32): Label of gt_bboxes, shape(B, n, 1)
  67. gt_bboxes (Tensor, float32): Ground truth bboxes, shape(B, n, 4)
  68. pad_gt_mask (Tensor, float32): 1 means bbox, 0 means no bbox, shape(B, n, 1)
  69. bg_index (int): background index
  70. gt_scores (Tensor|None, float32) Score of gt_bboxes, shape(B, n, 1)
  71. Returns:
  72. assigned_labels (Tensor): (B, L)
  73. assigned_bboxes (Tensor): (B, L, 4)
  74. assigned_scores (Tensor): (B, L, C)
  75. """
  76. assert pred_scores.ndim == pred_bboxes.ndim
  77. assert gt_labels.ndim == gt_bboxes.ndim and \
  78. gt_bboxes.ndim == 3
  79. batch_size, num_anchors, num_classes = pred_scores.shape
  80. _, num_max_boxes, _ = gt_bboxes.shape
  81. # negative batch
  82. if num_max_boxes == 0:
  83. assigned_labels = paddle.full(
  84. [batch_size, num_anchors], bg_index, dtype='int32')
  85. assigned_bboxes = paddle.zeros([batch_size, num_anchors, 4])
  86. assigned_scores = paddle.zeros(
  87. [batch_size, num_anchors, num_classes])
  88. return assigned_labels, assigned_bboxes, assigned_scores
  89. # compute iou between gt and pred bbox, [B, n, L]
  90. ious = batch_iou_similarity(gt_bboxes, pred_bboxes)
  91. # gather pred bboxes class score
  92. pred_scores = pred_scores.transpose([0, 2, 1])
  93. batch_ind = paddle.arange(
  94. end=batch_size, dtype=gt_labels.dtype).unsqueeze(-1)
  95. gt_labels_ind = paddle.stack(
  96. [batch_ind.tile([1, num_max_boxes]), gt_labels.squeeze(-1)],
  97. axis=-1)
  98. bbox_cls_scores = paddle.gather_nd(pred_scores, gt_labels_ind)
  99. # compute alignment metrics, [B, n, L]
  100. alignment_metrics = bbox_cls_scores.pow(self.alpha) * ious.pow(
  101. self.beta) * pad_gt_mask
  102. # select positive sample, [B, n, L]
  103. if self.center_radius is None:
  104. # check the positive sample's center in gt, [B, n, L]
  105. is_in_gts = check_points_inside_bboxes(
  106. anchor_points, gt_bboxes, sm_use=True)
  107. # select topk largest alignment metrics pred bbox as candidates
  108. # for each gt, [B, n, L]
  109. mask_positive = gather_topk_anchors(
  110. alignment_metrics, self.topk, topk_mask=pad_gt_mask) * is_in_gts
  111. else:
  112. is_in_gts, is_in_center = check_points_inside_bboxes(
  113. anchor_points,
  114. gt_bboxes,
  115. stride_tensor * self.center_radius,
  116. sm_use=True)
  117. is_in_gts *= pad_gt_mask
  118. is_in_center *= pad_gt_mask
  119. candidate_metrics = paddle.where(
  120. is_in_gts.sum(-1, keepdim=True) == 0,
  121. alignment_metrics + is_in_center,
  122. alignment_metrics)
  123. mask_positive = gather_topk_anchors(
  124. candidate_metrics, self.topk,
  125. topk_mask=pad_gt_mask) * paddle.cast((is_in_center > 0) |
  126. (is_in_gts > 0), 'float32')
  127. # if an anchor box is assigned to multiple gts,
  128. # the one with the highest iou will be selected, [B, n, L]
  129. mask_positive_sum = mask_positive.sum(axis=-2)
  130. if mask_positive_sum.max() > 1:
  131. mask_multiple_gts = (mask_positive_sum.unsqueeze(1) > 1).tile(
  132. [1, num_max_boxes, 1])
  133. is_max_iou = compute_max_iou_anchor(ious * mask_positive)
  134. mask_positive = paddle.where(mask_multiple_gts, is_max_iou,
  135. mask_positive)
  136. mask_positive_sum = mask_positive.sum(axis=-2)
  137. assigned_gt_index = mask_positive.argmax(axis=-2)
  138. # assigned target
  139. assigned_gt_index = assigned_gt_index + batch_ind * num_max_boxes
  140. assigned_labels = paddle.gather(
  141. gt_labels.flatten(), assigned_gt_index.flatten(), axis=0)
  142. assigned_labels = assigned_labels.reshape([batch_size, num_anchors])
  143. assigned_labels = paddle.where(
  144. mask_positive_sum > 0, assigned_labels,
  145. paddle.full_like(assigned_labels, bg_index))
  146. assigned_bboxes = paddle.gather(
  147. gt_bboxes.reshape([-1, 4]), assigned_gt_index.flatten(), axis=0)
  148. assigned_bboxes = assigned_bboxes.reshape([batch_size, num_anchors, 4])
  149. assigned_scores = F.one_hot(assigned_labels, num_classes + 1)
  150. ind = list(range(num_classes + 1))
  151. ind.remove(bg_index)
  152. assigned_scores = paddle.index_select(
  153. assigned_scores, paddle.to_tensor(ind), axis=-1)
  154. # rescale alignment metrics
  155. alignment_metrics *= mask_positive
  156. max_metrics_per_instance = alignment_metrics.max(axis=-1, keepdim=True)
  157. max_ious_per_instance = (ious * mask_positive).max(axis=-1,
  158. keepdim=True)
  159. alignment_metrics = alignment_metrics / (
  160. max_metrics_per_instance + self.eps) * max_ious_per_instance
  161. alignment_metrics = alignment_metrics.max(-2).unsqueeze(-1)
  162. assigned_scores = assigned_scores * alignment_metrics
  163. return assigned_labels, assigned_bboxes, assigned_scores