123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627 |
- # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- from __future__ import absolute_import
- from __future__ import division
- from __future__ import print_function
- try:
- from collections.abc import Sequence
- except Exception:
- from collections import Sequence
- from numbers import Integral
- import cv2
- import copy
- import numpy as np
- import random
- import math
- from .operators import BaseOperator, register_op
- from .batch_operators import Gt2TTFTarget
- from ppdet.modeling.bbox_utils import bbox_iou_np_expand
- from ppdet.utils.logger import setup_logger
- from .op_helper import gaussian_radius
- logger = setup_logger(__name__)
- __all__ = [
- 'RGBReverse', 'LetterBoxResize', 'MOTRandomAffine', 'Gt2JDETargetThres',
- 'Gt2JDETargetMax', 'Gt2FairMOTTarget'
- ]
- @register_op
- class RGBReverse(BaseOperator):
- """RGB to BGR, or BGR to RGB, sensitive to MOTRandomAffine
- """
- def __init__(self):
- super(RGBReverse, self).__init__()
- def apply(self, sample, context=None):
- im = sample['image']
- sample['image'] = np.ascontiguousarray(im[:, :, ::-1])
- return sample
- @register_op
- class LetterBoxResize(BaseOperator):
- def __init__(self, target_size):
- """
- Resize image to target size, convert normalized xywh to pixel xyxy
- format ([x_center, y_center, width, height] -> [x0, y0, x1, y1]).
- Args:
- target_size (int|list): image target size.
- """
- super(LetterBoxResize, self).__init__()
- if not isinstance(target_size, (Integral, Sequence)):
- raise TypeError(
- "Type of target_size is invalid. Must be Integer or List or Tuple, now is {}".
- format(type(target_size)))
- if isinstance(target_size, Integral):
- target_size = [target_size, target_size]
- self.target_size = target_size
- def apply_image(self, img, height, width, color=(127.5, 127.5, 127.5)):
- # letterbox: resize a rectangular image to a padded rectangular
- shape = img.shape[:2] # [height, width]
- ratio_h = float(height) / shape[0]
- ratio_w = float(width) / shape[1]
- ratio = min(ratio_h, ratio_w)
- new_shape = (round(shape[1] * ratio),
- round(shape[0] * ratio)) # [width, height]
- padw = (width - new_shape[0]) / 2
- padh = (height - new_shape[1]) / 2
- top, bottom = round(padh - 0.1), round(padh + 0.1)
- left, right = round(padw - 0.1), round(padw + 0.1)
- img = cv2.resize(
- img, new_shape, interpolation=cv2.INTER_AREA) # resized, no border
- img = cv2.copyMakeBorder(
- img, top, bottom, left, right, cv2.BORDER_CONSTANT,
- value=color) # padded rectangular
- return img, ratio, padw, padh
- def apply_bbox(self, bbox0, h, w, ratio, padw, padh):
- bboxes = bbox0.copy()
- bboxes[:, 0] = ratio * w * (bbox0[:, 0] - bbox0[:, 2] / 2) + padw
- bboxes[:, 1] = ratio * h * (bbox0[:, 1] - bbox0[:, 3] / 2) + padh
- bboxes[:, 2] = ratio * w * (bbox0[:, 0] + bbox0[:, 2] / 2) + padw
- bboxes[:, 3] = ratio * h * (bbox0[:, 1] + bbox0[:, 3] / 2) + padh
- return bboxes
- def apply(self, sample, context=None):
- """ Resize the image numpy.
- """
- im = sample['image']
- h, w = sample['im_shape']
- if not isinstance(im, np.ndarray):
- raise TypeError("{}: image type is not numpy.".format(self))
- if len(im.shape) != 3:
- from PIL import UnidentifiedImageError
- raise UnidentifiedImageError(
- '{}: image is not 3-dimensional.'.format(self))
- # apply image
- height, width = self.target_size
- img, ratio, padw, padh = self.apply_image(
- im, height=height, width=width)
- sample['image'] = img
- new_shape = (round(h * ratio), round(w * ratio))
- sample['im_shape'] = np.asarray(new_shape, dtype=np.float32)
- sample['scale_factor'] = np.asarray([ratio, ratio], dtype=np.float32)
- # apply bbox
- if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
- sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], h, w, ratio,
- padw, padh)
- return sample
- @register_op
- class MOTRandomAffine(BaseOperator):
- """
- Affine transform to image and coords to achieve the rotate, scale and
- shift effect for training image.
- Args:
- degrees (list[2]): the rotate range to apply, transform range is [min, max]
- translate (list[2]): the translate range to apply, transform range is [min, max]
- scale (list[2]): the scale range to apply, transform range is [min, max]
- shear (list[2]): the shear range to apply, transform range is [min, max]
- borderValue (list[3]): value used in case of a constant border when appling
- the perspective transformation
- reject_outside (bool): reject warped bounding bboxes outside of image
- Returns:
- records(dict): contain the image and coords after tranformed
- """
- def __init__(self,
- degrees=(-5, 5),
- translate=(0.10, 0.10),
- scale=(0.50, 1.20),
- shear=(-2, 2),
- borderValue=(127.5, 127.5, 127.5),
- reject_outside=True):
- super(MOTRandomAffine, self).__init__()
- self.degrees = degrees
- self.translate = translate
- self.scale = scale
- self.shear = shear
- self.borderValue = borderValue
- self.reject_outside = reject_outside
- def apply(self, sample, context=None):
- # https://medium.com/uruvideo/dataset-augmentation-with-random-homographies-a8f4b44830d4
- border = 0 # width of added border (optional)
- img = sample['image']
- height, width = img.shape[0], img.shape[1]
- # Rotation and Scale
- R = np.eye(3)
- a = random.random() * (self.degrees[1] - self.degrees[0]
- ) + self.degrees[0]
- s = random.random() * (self.scale[1] - self.scale[0]) + self.scale[0]
- R[:2] = cv2.getRotationMatrix2D(
- angle=a, center=(width / 2, height / 2), scale=s)
- # Translation
- T = np.eye(3)
- T[0, 2] = (
- random.random() * 2 - 1
- ) * self.translate[0] * height + border # x translation (pixels)
- T[1, 2] = (
- random.random() * 2 - 1
- ) * self.translate[1] * width + border # y translation (pixels)
- # Shear
- S = np.eye(3)
- S[0, 1] = math.tan((random.random() *
- (self.shear[1] - self.shear[0]) + self.shear[0]) *
- math.pi / 180) # x shear (deg)
- S[1, 0] = math.tan((random.random() *
- (self.shear[1] - self.shear[0]) + self.shear[0]) *
- math.pi / 180) # y shear (deg)
- M = S @T @R # Combined rotation matrix. ORDER IS IMPORTANT HERE!!
- imw = cv2.warpPerspective(
- img,
- M,
- dsize=(width, height),
- flags=cv2.INTER_LINEAR,
- borderValue=self.borderValue) # BGR order borderValue
- if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
- targets = sample['gt_bbox']
- n = targets.shape[0]
- points = targets.copy()
- area0 = (points[:, 2] - points[:, 0]) * (
- points[:, 3] - points[:, 1])
- # warp points
- xy = np.ones((n * 4, 3))
- xy[:, :2] = points[:, [0, 1, 2, 3, 0, 3, 2, 1]].reshape(
- n * 4, 2) # x1y1, x2y2, x1y2, x2y1
- xy = (xy @M.T)[:, :2].reshape(n, 8)
- # create new boxes
- x = xy[:, [0, 2, 4, 6]]
- y = xy[:, [1, 3, 5, 7]]
- xy = np.concatenate(
- (x.min(1), y.min(1), x.max(1), y.max(1))).reshape(4, n).T
- # apply angle-based reduction
- radians = a * math.pi / 180
- reduction = max(abs(math.sin(radians)), abs(math.cos(radians)))**0.5
- x = (xy[:, 2] + xy[:, 0]) / 2
- y = (xy[:, 3] + xy[:, 1]) / 2
- w = (xy[:, 2] - xy[:, 0]) * reduction
- h = (xy[:, 3] - xy[:, 1]) * reduction
- xy = np.concatenate(
- (x - w / 2, y - h / 2, x + w / 2, y + h / 2)).reshape(4, n).T
- # reject warped points outside of image
- if self.reject_outside:
- np.clip(xy[:, 0], 0, width, out=xy[:, 0])
- np.clip(xy[:, 2], 0, width, out=xy[:, 2])
- np.clip(xy[:, 1], 0, height, out=xy[:, 1])
- np.clip(xy[:, 3], 0, height, out=xy[:, 3])
- w = xy[:, 2] - xy[:, 0]
- h = xy[:, 3] - xy[:, 1]
- area = w * h
- ar = np.maximum(w / (h + 1e-16), h / (w + 1e-16))
- i = (w > 4) & (h > 4) & (area / (area0 + 1e-16) > 0.1) & (ar < 10)
- if sum(i) > 0:
- sample['gt_bbox'] = xy[i].astype(sample['gt_bbox'].dtype)
- sample['gt_class'] = sample['gt_class'][i]
- if 'difficult' in sample:
- sample['difficult'] = sample['difficult'][i]
- if 'gt_ide' in sample:
- sample['gt_ide'] = sample['gt_ide'][i]
- if 'is_crowd' in sample:
- sample['is_crowd'] = sample['is_crowd'][i]
- sample['image'] = imw
- return sample
- else:
- return sample
- @register_op
- class Gt2JDETargetThres(BaseOperator):
- __shared__ = ['num_classes']
- """
- Generate JDE targets by groud truth data when training
- Args:
- anchors (list): anchors of JDE model
- anchor_masks (list): anchor_masks of JDE model
- downsample_ratios (list): downsample ratios of JDE model
- ide_thresh (float): thresh of identity, higher is groud truth
- fg_thresh (float): thresh of foreground, higher is foreground
- bg_thresh (float): thresh of background, lower is background
- num_classes (int): number of classes
- """
- def __init__(self,
- anchors,
- anchor_masks,
- downsample_ratios,
- ide_thresh=0.5,
- fg_thresh=0.5,
- bg_thresh=0.4,
- num_classes=1):
- super(Gt2JDETargetThres, self).__init__()
- self.anchors = anchors
- self.anchor_masks = anchor_masks
- self.downsample_ratios = downsample_ratios
- self.ide_thresh = ide_thresh
- self.fg_thresh = fg_thresh
- self.bg_thresh = bg_thresh
- self.num_classes = num_classes
- def generate_anchor(self, nGh, nGw, anchor_hw):
- nA = len(anchor_hw)
- yy, xx = np.meshgrid(np.arange(nGh), np.arange(nGw))
- mesh = np.stack([xx.T, yy.T], axis=0) # [2, nGh, nGw]
- mesh = np.repeat(mesh[None, :], nA, axis=0) # [nA, 2, nGh, nGw]
- anchor_offset_mesh = anchor_hw[:, :, None][:, :, :, None]
- anchor_offset_mesh = np.repeat(anchor_offset_mesh, nGh, axis=-2)
- anchor_offset_mesh = np.repeat(anchor_offset_mesh, nGw, axis=-1)
- anchor_mesh = np.concatenate(
- [mesh, anchor_offset_mesh], axis=1) # [nA, 4, nGh, nGw]
- return anchor_mesh
- def encode_delta(self, gt_box_list, fg_anchor_list):
- px, py, pw, ph = fg_anchor_list[:, 0], fg_anchor_list[:,1], \
- fg_anchor_list[:, 2], fg_anchor_list[:,3]
- gx, gy, gw, gh = gt_box_list[:, 0], gt_box_list[:, 1], \
- gt_box_list[:, 2], gt_box_list[:, 3]
- dx = (gx - px) / pw
- dy = (gy - py) / ph
- dw = np.log(gw / pw)
- dh = np.log(gh / ph)
- return np.stack([dx, dy, dw, dh], axis=1)
- def pad_box(self, sample, num_max):
- assert 'gt_bbox' in sample
- bbox = sample['gt_bbox']
- gt_num = len(bbox)
- pad_bbox = np.zeros((num_max, 4), dtype=np.float32)
- if gt_num > 0:
- pad_bbox[:gt_num, :] = bbox[:gt_num, :]
- sample['gt_bbox'] = pad_bbox
- if 'gt_score' in sample:
- pad_score = np.zeros((num_max, ), dtype=np.float32)
- if gt_num > 0:
- pad_score[:gt_num] = sample['gt_score'][:gt_num, 0]
- sample['gt_score'] = pad_score
- if 'difficult' in sample:
- pad_diff = np.zeros((num_max, ), dtype=np.int32)
- if gt_num > 0:
- pad_diff[:gt_num] = sample['difficult'][:gt_num, 0]
- sample['difficult'] = pad_diff
- if 'is_crowd' in sample:
- pad_crowd = np.zeros((num_max, ), dtype=np.int32)
- if gt_num > 0:
- pad_crowd[:gt_num] = sample['is_crowd'][:gt_num, 0]
- sample['is_crowd'] = pad_crowd
- if 'gt_ide' in sample:
- pad_ide = np.zeros((num_max, ), dtype=np.int32)
- if gt_num > 0:
- pad_ide[:gt_num] = sample['gt_ide'][:gt_num, 0]
- sample['gt_ide'] = pad_ide
- return sample
- def __call__(self, samples, context=None):
- assert len(self.anchor_masks) == len(self.downsample_ratios), \
- "anchor_masks', and 'downsample_ratios' should have same length."
- h, w = samples[0]['image'].shape[1:3]
- num_max = 0
- for sample in samples:
- num_max = max(num_max, len(sample['gt_bbox']))
- for sample in samples:
- gt_bbox = sample['gt_bbox']
- gt_ide = sample['gt_ide']
- for i, (anchor_hw, downsample_ratio
- ) in enumerate(zip(self.anchors, self.downsample_ratios)):
- anchor_hw = np.array(
- anchor_hw, dtype=np.float32) / downsample_ratio
- nA = len(anchor_hw)
- nGh, nGw = int(h / downsample_ratio), int(w / downsample_ratio)
- tbox = np.zeros((nA, nGh, nGw, 4), dtype=np.float32)
- tconf = np.zeros((nA, nGh, nGw), dtype=np.float32)
- tid = -np.ones((nA, nGh, nGw, 1), dtype=np.float32)
- gxy, gwh = gt_bbox[:, 0:2].copy(), gt_bbox[:, 2:4].copy()
- gxy[:, 0] = gxy[:, 0] * nGw
- gxy[:, 1] = gxy[:, 1] * nGh
- gwh[:, 0] = gwh[:, 0] * nGw
- gwh[:, 1] = gwh[:, 1] * nGh
- gxy[:, 0] = np.clip(gxy[:, 0], 0, nGw - 1)
- gxy[:, 1] = np.clip(gxy[:, 1], 0, nGh - 1)
- tboxes = np.concatenate([gxy, gwh], axis=1)
- anchor_mesh = self.generate_anchor(nGh, nGw, anchor_hw)
- anchor_list = np.transpose(anchor_mesh,
- (0, 2, 3, 1)).reshape(-1, 4)
- iou_pdist = bbox_iou_np_expand(
- anchor_list, tboxes, x1y1x2y2=False)
- iou_max = np.max(iou_pdist, axis=1)
- max_gt_index = np.argmax(iou_pdist, axis=1)
- iou_map = iou_max.reshape(nA, nGh, nGw)
- gt_index_map = max_gt_index.reshape(nA, nGh, nGw)
- id_index = iou_map > self.ide_thresh
- fg_index = iou_map > self.fg_thresh
- bg_index = iou_map < self.bg_thresh
- ign_index = (iou_map < self.fg_thresh) * (
- iou_map > self.bg_thresh)
- tconf[fg_index] = 1
- tconf[bg_index] = 0
- tconf[ign_index] = -1
- gt_index = gt_index_map[fg_index]
- gt_box_list = tboxes[gt_index]
- gt_id_list = gt_ide[gt_index_map[id_index]]
- if np.sum(fg_index) > 0:
- tid[id_index] = gt_id_list
- fg_anchor_list = anchor_list.reshape(nA, nGh, nGw,
- 4)[fg_index]
- delta_target = self.encode_delta(gt_box_list,
- fg_anchor_list)
- tbox[fg_index] = delta_target
- sample['tbox{}'.format(i)] = tbox
- sample['tconf{}'.format(i)] = tconf
- sample['tide{}'.format(i)] = tid
- sample.pop('gt_class')
- sample = self.pad_box(sample, num_max)
- return samples
- @register_op
- class Gt2JDETargetMax(BaseOperator):
- __shared__ = ['num_classes']
- """
- Generate JDE targets by groud truth data when evaluating
- Args:
- anchors (list): anchors of JDE model
- anchor_masks (list): anchor_masks of JDE model
- downsample_ratios (list): downsample ratios of JDE model
- max_iou_thresh (float): iou thresh for high quality anchor
- num_classes (int): number of classes
- """
- def __init__(self,
- anchors,
- anchor_masks,
- downsample_ratios,
- max_iou_thresh=0.60,
- num_classes=1):
- super(Gt2JDETargetMax, self).__init__()
- self.anchors = anchors
- self.anchor_masks = anchor_masks
- self.downsample_ratios = downsample_ratios
- self.max_iou_thresh = max_iou_thresh
- self.num_classes = num_classes
- def __call__(self, samples, context=None):
- assert len(self.anchor_masks) == len(self.downsample_ratios), \
- "anchor_masks', and 'downsample_ratios' should have same length."
- h, w = samples[0]['image'].shape[1:3]
- for sample in samples:
- gt_bbox = sample['gt_bbox']
- gt_ide = sample['gt_ide']
- for i, (anchor_hw, downsample_ratio
- ) in enumerate(zip(self.anchors, self.downsample_ratios)):
- anchor_hw = np.array(
- anchor_hw, dtype=np.float32) / downsample_ratio
- nA = len(anchor_hw)
- nGh, nGw = int(h / downsample_ratio), int(w / downsample_ratio)
- tbox = np.zeros((nA, nGh, nGw, 4), dtype=np.float32)
- tconf = np.zeros((nA, nGh, nGw), dtype=np.float32)
- tid = -np.ones((nA, nGh, nGw, 1), dtype=np.float32)
- gxy, gwh = gt_bbox[:, 0:2].copy(), gt_bbox[:, 2:4].copy()
- gxy[:, 0] = gxy[:, 0] * nGw
- gxy[:, 1] = gxy[:, 1] * nGh
- gwh[:, 0] = gwh[:, 0] * nGw
- gwh[:, 1] = gwh[:, 1] * nGh
- gi = np.clip(gxy[:, 0], 0, nGw - 1).astype(int)
- gj = np.clip(gxy[:, 1], 0, nGh - 1).astype(int)
- # iou of targets-anchors (using wh only)
- box1 = gwh
- box2 = anchor_hw[:, None, :]
- inter_area = np.minimum(box1, box2).prod(2)
- iou = inter_area / (
- box1.prod(1) + box2.prod(2) - inter_area + 1e-16)
- # Select best iou_pred and anchor
- iou_best = iou.max(0) # best anchor [0-2] for each target
- a = np.argmax(iou, axis=0)
- # Select best unique target-anchor combinations
- iou_order = np.argsort(-iou_best) # best to worst
- # Unique anchor selection
- u = np.stack((gi, gj, a), 0)[:, iou_order]
- _, first_unique = np.unique(u, axis=1, return_index=True)
- mask = iou_order[first_unique]
- # best anchor must share significant commonality (iou) with target
- # TODO: examine arbitrary threshold
- idx = mask[iou_best[mask] > self.max_iou_thresh]
- if len(idx) > 0:
- a_i, gj_i, gi_i = a[idx], gj[idx], gi[idx]
- t_box = gt_bbox[idx]
- t_id = gt_ide[idx]
- if len(t_box.shape) == 1:
- t_box = t_box.reshape(1, 4)
- gxy, gwh = t_box[:, 0:2].copy(), t_box[:, 2:4].copy()
- gxy[:, 0] = gxy[:, 0] * nGw
- gxy[:, 1] = gxy[:, 1] * nGh
- gwh[:, 0] = gwh[:, 0] * nGw
- gwh[:, 1] = gwh[:, 1] * nGh
- # XY coordinates
- tbox[:, :, :, 0:2][a_i, gj_i, gi_i] = gxy - gxy.astype(int)
- # Width and height in yolo method
- tbox[:, :, :, 2:4][a_i, gj_i, gi_i] = np.log(gwh /
- anchor_hw[a_i])
- tconf[a_i, gj_i, gi_i] = 1
- tid[a_i, gj_i, gi_i] = t_id
- sample['tbox{}'.format(i)] = tbox
- sample['tconf{}'.format(i)] = tconf
- sample['tide{}'.format(i)] = tid
- class Gt2FairMOTTarget(Gt2TTFTarget):
- __shared__ = ['num_classes']
- """
- Generate FairMOT targets by ground truth data.
- Difference between Gt2FairMOTTarget and Gt2TTFTarget are:
- 1. the gaussian kernal radius to generate a heatmap.
- 2. the targets needed during training.
-
- Args:
- num_classes(int): the number of classes.
- down_ratio(int): the down ratio from images to heatmap, 4 by default.
- max_objs(int): the maximum number of ground truth objects in a image, 500 by default.
- """
- def __init__(self, num_classes=1, down_ratio=4, max_objs=500):
- super(Gt2TTFTarget, self).__init__()
- self.down_ratio = down_ratio
- self.num_classes = num_classes
- self.max_objs = max_objs
- def __call__(self, samples, context=None):
- for b_id, sample in enumerate(samples):
- output_h = sample['image'].shape[1] // self.down_ratio
- output_w = sample['image'].shape[2] // self.down_ratio
- heatmap = np.zeros(
- (self.num_classes, output_h, output_w), dtype='float32')
- bbox_size = np.zeros((self.max_objs, 4), dtype=np.float32)
- center_offset = np.zeros((self.max_objs, 2), dtype=np.float32)
- index = np.zeros((self.max_objs, ), dtype=np.int64)
- index_mask = np.zeros((self.max_objs, ), dtype=np.int32)
- reid = np.zeros((self.max_objs, ), dtype=np.int64)
- bbox_xys = np.zeros((self.max_objs, 4), dtype=np.float32)
- if self.num_classes > 1:
- # each category corresponds to a set of track ids
- cls_tr_ids = np.zeros(
- (self.num_classes, output_h, output_w), dtype=np.int64)
- cls_id_map = np.full((output_h, output_w), -1, dtype=np.int64)
- gt_bbox = sample['gt_bbox']
- gt_class = sample['gt_class']
- gt_ide = sample['gt_ide']
- for k in range(len(gt_bbox)):
- cls_id = gt_class[k][0]
- bbox = gt_bbox[k]
- ide = gt_ide[k][0]
- bbox[[0, 2]] = bbox[[0, 2]] * output_w
- bbox[[1, 3]] = bbox[[1, 3]] * output_h
- bbox_amodal = copy.deepcopy(bbox)
- bbox_amodal[0] = bbox_amodal[0] - bbox_amodal[2] / 2.
- bbox_amodal[1] = bbox_amodal[1] - bbox_amodal[3] / 2.
- bbox_amodal[2] = bbox_amodal[0] + bbox_amodal[2]
- bbox_amodal[3] = bbox_amodal[1] + bbox_amodal[3]
- bbox[0] = np.clip(bbox[0], 0, output_w - 1)
- bbox[1] = np.clip(bbox[1], 0, output_h - 1)
- h = bbox[3]
- w = bbox[2]
- bbox_xy = copy.deepcopy(bbox)
- bbox_xy[0] = bbox_xy[0] - bbox_xy[2] / 2
- bbox_xy[1] = bbox_xy[1] - bbox_xy[3] / 2
- bbox_xy[2] = bbox_xy[0] + bbox_xy[2]
- bbox_xy[3] = bbox_xy[1] + bbox_xy[3]
- if h > 0 and w > 0:
- radius = gaussian_radius((math.ceil(h), math.ceil(w)), 0.7)
- radius = max(0, int(radius))
- ct = np.array([bbox[0], bbox[1]], dtype=np.float32)
- ct_int = ct.astype(np.int32)
- self.draw_truncate_gaussian(heatmap[cls_id], ct_int, radius,
- radius)
- bbox_size[k] = ct[0] - bbox_amodal[0], ct[1] - bbox_amodal[1], \
- bbox_amodal[2] - ct[0], bbox_amodal[3] - ct[1]
- index[k] = ct_int[1] * output_w + ct_int[0]
- center_offset[k] = ct - ct_int
- index_mask[k] = 1
- reid[k] = ide
- bbox_xys[k] = bbox_xy
- if self.num_classes > 1:
- cls_id_map[ct_int[1], ct_int[0]] = cls_id
- cls_tr_ids[cls_id][ct_int[1]][ct_int[0]] = ide - 1
- # track id start from 0
- sample['heatmap'] = heatmap
- sample['index'] = index
- sample['offset'] = center_offset
- sample['size'] = bbox_size
- sample['index_mask'] = index_mask
- sample['reid'] = reid
- if self.num_classes > 1:
- sample['cls_id_map'] = cls_id_map
- sample['cls_tr_ids'] = cls_tr_ids
- sample['bbox_xys'] = bbox_xys
- sample.pop('is_crowd', None)
- sample.pop('difficult', None)
- sample.pop('gt_class', None)
- sample.pop('gt_bbox', None)
- sample.pop('gt_score', None)
- sample.pop('gt_ide', None)
- return samples
|