jde_tracker.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337
  1. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. """
  15. This code is based on https://github.com/Zhongdao/Towards-Realtime-MOT/blob/master/tracker/multitracker.py
  16. """
  17. import numpy as np
  18. from collections import defaultdict
  19. from ..matching import jde_matching as matching
  20. from ..motion import KalmanFilter
  21. from .base_jde_tracker import TrackState, STrack
  22. from .base_jde_tracker import joint_stracks, sub_stracks, remove_duplicate_stracks
  23. __all__ = ['JDETracker']
  24. class JDETracker(object):
  25. __shared__ = ['num_classes']
  26. """
  27. JDE tracker, support single class and multi classes
  28. Args:
  29. use_byte (bool): Whether use ByteTracker, default False
  30. num_classes (int): the number of classes
  31. det_thresh (float): threshold of detection score
  32. track_buffer (int): buffer for tracker
  33. min_box_area (int): min box area to filter out low quality boxes
  34. vertical_ratio (float): w/h, the vertical ratio of the bbox to filter
  35. bad results. If set <= 0 means no need to filter bboxes,usually set
  36. 1.6 for pedestrian tracking.
  37. tracked_thresh (float): linear assignment threshold of tracked
  38. stracks and detections
  39. r_tracked_thresh (float): linear assignment threshold of
  40. tracked stracks and unmatched detections
  41. unconfirmed_thresh (float): linear assignment threshold of
  42. unconfirmed stracks and unmatched detections
  43. conf_thres (float): confidence threshold for tracking, also used in
  44. ByteTracker as higher confidence threshold
  45. match_thres (float): linear assignment threshold of tracked
  46. stracks and detections in ByteTracker
  47. low_conf_thres (float): lower confidence threshold for tracking in
  48. ByteTracker
  49. input_size (list): input feature map size to reid model, [h, w] format,
  50. [64, 192] as default.
  51. motion (str): motion model, KalmanFilter as default
  52. metric_type (str): either "euclidean" or "cosine", the distance metric
  53. used for measurement to track association.
  54. """
  55. def __init__(self,
  56. use_byte=False,
  57. num_classes=1,
  58. det_thresh=0.3,
  59. track_buffer=30,
  60. min_box_area=0,
  61. vertical_ratio=0,
  62. tracked_thresh=0.7,
  63. r_tracked_thresh=0.5,
  64. unconfirmed_thresh=0.7,
  65. conf_thres=0,
  66. match_thres=0.8,
  67. low_conf_thres=0.2,
  68. input_size=[64, 192],
  69. motion='KalmanFilter',
  70. metric_type='euclidean'):
  71. self.use_byte = use_byte
  72. self.num_classes = num_classes
  73. self.det_thresh = det_thresh if not use_byte else conf_thres + 0.1
  74. self.track_buffer = track_buffer
  75. self.min_box_area = min_box_area
  76. self.vertical_ratio = vertical_ratio
  77. self.tracked_thresh = tracked_thresh
  78. self.r_tracked_thresh = r_tracked_thresh
  79. self.unconfirmed_thresh = unconfirmed_thresh
  80. self.conf_thres = conf_thres
  81. self.match_thres = match_thres
  82. self.low_conf_thres = low_conf_thres
  83. self.input_size = input_size
  84. if motion == 'KalmanFilter':
  85. self.motion = KalmanFilter()
  86. self.metric_type = metric_type
  87. self.frame_id = 0
  88. self.tracked_tracks_dict = defaultdict(list) # dict(list[STrack])
  89. self.lost_tracks_dict = defaultdict(list) # dict(list[STrack])
  90. self.removed_tracks_dict = defaultdict(list) # dict(list[STrack])
  91. self.max_time_lost = 0
  92. # max_time_lost will be calculated: int(frame_rate / 30.0 * track_buffer)
  93. def update(self, pred_dets, pred_embs=None):
  94. """
  95. Processes the image frame and finds bounding box(detections).
  96. Associates the detection with corresponding tracklets and also handles
  97. lost, removed, refound and active tracklets.
  98. Args:
  99. pred_dets (np.array): Detection results of the image, the shape is
  100. [N, 6], means 'cls_id, score, x0, y0, x1, y1'.
  101. pred_embs (np.array): Embedding results of the image, the shape is
  102. [N, 128] or [N, 512].
  103. Return:
  104. output_stracks_dict (dict(list)): The list contains information
  105. regarding the online_tracklets for the received image tensor.
  106. """
  107. self.frame_id += 1
  108. if self.frame_id == 1:
  109. STrack.init_count(self.num_classes)
  110. activated_tracks_dict = defaultdict(list)
  111. refined_tracks_dict = defaultdict(list)
  112. lost_tracks_dict = defaultdict(list)
  113. removed_tracks_dict = defaultdict(list)
  114. output_tracks_dict = defaultdict(list)
  115. pred_dets_dict = defaultdict(list)
  116. pred_embs_dict = defaultdict(list)
  117. # unify single and multi classes detection and embedding results
  118. for cls_id in range(self.num_classes):
  119. cls_idx = (pred_dets[:, 0:1] == cls_id).squeeze(-1)
  120. pred_dets_dict[cls_id] = pred_dets[cls_idx]
  121. if pred_embs is not None:
  122. pred_embs_dict[cls_id] = pred_embs[cls_idx]
  123. else:
  124. pred_embs_dict[cls_id] = None
  125. for cls_id in range(self.num_classes):
  126. """ Step 1: Get detections by class"""
  127. pred_dets_cls = pred_dets_dict[cls_id]
  128. pred_embs_cls = pred_embs_dict[cls_id]
  129. remain_inds = (pred_dets_cls[:, 1:2] > self.conf_thres).squeeze(-1)
  130. if remain_inds.sum() > 0:
  131. pred_dets_cls = pred_dets_cls[remain_inds]
  132. if pred_embs_cls is None:
  133. # in original ByteTrack
  134. detections = [
  135. STrack(
  136. STrack.tlbr_to_tlwh(tlbrs[2:6]),
  137. tlbrs[1],
  138. cls_id,
  139. 30,
  140. temp_feat=None) for tlbrs in pred_dets_cls
  141. ]
  142. else:
  143. pred_embs_cls = pred_embs_cls[remain_inds]
  144. detections = [
  145. STrack(
  146. STrack.tlbr_to_tlwh(tlbrs[2:6]), tlbrs[1], cls_id,
  147. 30, temp_feat) for (tlbrs, temp_feat) in
  148. zip(pred_dets_cls, pred_embs_cls)
  149. ]
  150. else:
  151. detections = []
  152. ''' Add newly detected tracklets to tracked_stracks'''
  153. unconfirmed_dict = defaultdict(list)
  154. tracked_tracks_dict = defaultdict(list)
  155. for track in self.tracked_tracks_dict[cls_id]:
  156. if not track.is_activated:
  157. # previous tracks which are not active in the current frame are added in unconfirmed list
  158. unconfirmed_dict[cls_id].append(track)
  159. else:
  160. # Active tracks are added to the local list 'tracked_stracks'
  161. tracked_tracks_dict[cls_id].append(track)
  162. """ Step 2: First association, with embedding"""
  163. # building tracking pool for the current frame
  164. track_pool_dict = defaultdict(list)
  165. track_pool_dict[cls_id] = joint_stracks(
  166. tracked_tracks_dict[cls_id], self.lost_tracks_dict[cls_id])
  167. # Predict the current location with KalmanFilter
  168. STrack.multi_predict(track_pool_dict[cls_id], self.motion)
  169. if pred_embs_cls is None:
  170. # in original ByteTrack
  171. dists = matching.iou_distance(track_pool_dict[cls_id],
  172. detections)
  173. matches, u_track, u_detection = matching.linear_assignment(
  174. dists, thresh=self.match_thres) # not self.tracked_thresh
  175. else:
  176. dists = matching.embedding_distance(
  177. track_pool_dict[cls_id],
  178. detections,
  179. metric=self.metric_type)
  180. dists = matching.fuse_motion(
  181. self.motion, dists, track_pool_dict[cls_id], detections)
  182. matches, u_track, u_detection = matching.linear_assignment(
  183. dists, thresh=self.tracked_thresh)
  184. for i_tracked, idet in matches:
  185. # i_tracked is the id of the track and idet is the detection
  186. track = track_pool_dict[cls_id][i_tracked]
  187. det = detections[idet]
  188. if track.state == TrackState.Tracked:
  189. # If the track is active, add the detection to the track
  190. track.update(detections[idet], self.frame_id)
  191. activated_tracks_dict[cls_id].append(track)
  192. else:
  193. # We have obtained a detection from a track which is not active,
  194. # hence put the track in refind_stracks list
  195. track.re_activate(det, self.frame_id, new_id=False)
  196. refined_tracks_dict[cls_id].append(track)
  197. # None of the steps below happen if there are no undetected tracks.
  198. """ Step 3: Second association, with IOU"""
  199. if self.use_byte:
  200. inds_low = pred_dets_dict[cls_id][:, 1:2] > self.low_conf_thres
  201. inds_high = pred_dets_dict[cls_id][:, 1:2] < self.conf_thres
  202. inds_second = np.logical_and(inds_low, inds_high).squeeze(-1)
  203. pred_dets_cls_second = pred_dets_dict[cls_id][inds_second]
  204. # association the untrack to the low score detections
  205. if len(pred_dets_cls_second) > 0:
  206. if pred_embs_dict[cls_id] is None:
  207. # in original ByteTrack
  208. detections_second = [
  209. STrack(
  210. STrack.tlbr_to_tlwh(tlbrs[2:6]),
  211. tlbrs[1],
  212. cls_id,
  213. 30,
  214. temp_feat=None)
  215. for tlbrs in pred_dets_cls_second
  216. ]
  217. else:
  218. pred_embs_cls_second = pred_embs_dict[cls_id][
  219. inds_second]
  220. detections_second = [
  221. STrack(
  222. STrack.tlbr_to_tlwh(tlbrs[2:6]), tlbrs[1],
  223. cls_id, 30, temp_feat) for (tlbrs, temp_feat) in
  224. zip(pred_dets_cls_second, pred_embs_cls_second)
  225. ]
  226. else:
  227. detections_second = []
  228. r_tracked_stracks = [
  229. track_pool_dict[cls_id][i] for i in u_track
  230. if track_pool_dict[cls_id][i].state == TrackState.Tracked
  231. ]
  232. dists = matching.iou_distance(r_tracked_stracks,
  233. detections_second)
  234. matches, u_track, u_detection_second = matching.linear_assignment(
  235. dists, thresh=0.4) # not r_tracked_thresh
  236. else:
  237. detections = [detections[i] for i in u_detection]
  238. r_tracked_stracks = []
  239. for i in u_track:
  240. if track_pool_dict[cls_id][i].state == TrackState.Tracked:
  241. r_tracked_stracks.append(track_pool_dict[cls_id][i])
  242. dists = matching.iou_distance(r_tracked_stracks, detections)
  243. matches, u_track, u_detection = matching.linear_assignment(
  244. dists, thresh=self.r_tracked_thresh)
  245. for i_tracked, idet in matches:
  246. track = r_tracked_stracks[i_tracked]
  247. det = detections[
  248. idet] if not self.use_byte else detections_second[idet]
  249. if track.state == TrackState.Tracked:
  250. track.update(det, self.frame_id)
  251. activated_tracks_dict[cls_id].append(track)
  252. else:
  253. track.re_activate(det, self.frame_id, new_id=False)
  254. refined_tracks_dict[cls_id].append(track)
  255. for it in u_track:
  256. track = r_tracked_stracks[it]
  257. if not track.state == TrackState.Lost:
  258. track.mark_lost()
  259. lost_tracks_dict[cls_id].append(track)
  260. '''Deal with unconfirmed tracks, usually tracks with only one beginning frame'''
  261. detections = [detections[i] for i in u_detection]
  262. dists = matching.iou_distance(unconfirmed_dict[cls_id], detections)
  263. matches, u_unconfirmed, u_detection = matching.linear_assignment(
  264. dists, thresh=self.unconfirmed_thresh)
  265. for i_tracked, idet in matches:
  266. unconfirmed_dict[cls_id][i_tracked].update(detections[idet],
  267. self.frame_id)
  268. activated_tracks_dict[cls_id].append(unconfirmed_dict[cls_id][
  269. i_tracked])
  270. for it in u_unconfirmed:
  271. track = unconfirmed_dict[cls_id][it]
  272. track.mark_removed()
  273. removed_tracks_dict[cls_id].append(track)
  274. """ Step 4: Init new stracks"""
  275. for inew in u_detection:
  276. track = detections[inew]
  277. if track.score < self.det_thresh:
  278. continue
  279. track.activate(self.motion, self.frame_id)
  280. activated_tracks_dict[cls_id].append(track)
  281. """ Step 5: Update state"""
  282. for track in self.lost_tracks_dict[cls_id]:
  283. if self.frame_id - track.end_frame > self.max_time_lost:
  284. track.mark_removed()
  285. removed_tracks_dict[cls_id].append(track)
  286. self.tracked_tracks_dict[cls_id] = [
  287. t for t in self.tracked_tracks_dict[cls_id]
  288. if t.state == TrackState.Tracked
  289. ]
  290. self.tracked_tracks_dict[cls_id] = joint_stracks(
  291. self.tracked_tracks_dict[cls_id], activated_tracks_dict[cls_id])
  292. self.tracked_tracks_dict[cls_id] = joint_stracks(
  293. self.tracked_tracks_dict[cls_id], refined_tracks_dict[cls_id])
  294. self.lost_tracks_dict[cls_id] = sub_stracks(
  295. self.lost_tracks_dict[cls_id], self.tracked_tracks_dict[cls_id])
  296. self.lost_tracks_dict[cls_id].extend(lost_tracks_dict[cls_id])
  297. self.lost_tracks_dict[cls_id] = sub_stracks(
  298. self.lost_tracks_dict[cls_id], self.removed_tracks_dict[cls_id])
  299. self.removed_tracks_dict[cls_id].extend(removed_tracks_dict[cls_id])
  300. self.tracked_tracks_dict[cls_id], self.lost_tracks_dict[
  301. cls_id] = remove_duplicate_stracks(
  302. self.tracked_tracks_dict[cls_id],
  303. self.lost_tracks_dict[cls_id])
  304. # get scores of lost tracks
  305. output_tracks_dict[cls_id] = [
  306. track for track in self.tracked_tracks_dict[cls_id]
  307. if track.is_activated
  308. ]
  309. return output_tracks_dict