mot.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638
  1. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import sys
  16. import cv2
  17. import glob
  18. import numpy as np
  19. from collections import OrderedDict, defaultdict
  20. try:
  21. from collections.abc import Sequence
  22. except Exception:
  23. from collections import Sequence
  24. from .dataset import DetDataset, _make_dataset, _is_valid_file
  25. from ppdet.core.workspace import register, serializable
  26. from ppdet.utils.logger import setup_logger
  27. logger = setup_logger(__name__)
  28. @register
  29. @serializable
  30. class MOTDataSet(DetDataset):
  31. """
  32. Load dataset with MOT format, only support single class MOT.
  33. Args:
  34. dataset_dir (str): root directory for dataset.
  35. image_lists (str|list): mot data image lists, muiti-source mot dataset.
  36. data_fields (list): key name of data dictionary, at least have 'image'.
  37. sample_num (int): number of samples to load, -1 means all.
  38. repeat (int): repeat times for dataset, use in benchmark.
  39. Notes:
  40. MOT datasets root directory following this:
  41. dataset/mot
  42. |——————image_lists
  43. | |——————caltech.train
  44. | |——————caltech.val
  45. | |——————mot16.train
  46. | |——————mot17.train
  47. | ......
  48. |——————Caltech
  49. |——————MOT17
  50. |——————......
  51. All the MOT datasets have the following structure:
  52. Caltech
  53. |——————images
  54. | └——————00001.jpg
  55. | |—————— ...
  56. | └——————0000N.jpg
  57. └——————labels_with_ids
  58. └——————00001.txt
  59. |—————— ...
  60. └——————0000N.txt
  61. or
  62. MOT17
  63. |——————images
  64. | └——————train
  65. | └——————test
  66. └——————labels_with_ids
  67. └——————train
  68. """
  69. def __init__(self,
  70. dataset_dir=None,
  71. image_lists=[],
  72. data_fields=['image'],
  73. sample_num=-1,
  74. repeat=1):
  75. super(MOTDataSet, self).__init__(
  76. dataset_dir=dataset_dir,
  77. data_fields=data_fields,
  78. sample_num=sample_num,
  79. repeat=repeat)
  80. self.dataset_dir = dataset_dir
  81. self.image_lists = image_lists
  82. if isinstance(self.image_lists, str):
  83. self.image_lists = [self.image_lists]
  84. self.roidbs = None
  85. self.cname2cid = None
  86. def get_anno(self):
  87. if self.image_lists == []:
  88. return
  89. # only used to get categories and metric
  90. # only check first data, but the label_list of all data should be same.
  91. first_mot_data = self.image_lists[0].split('.')[0]
  92. anno_file = os.path.join(self.dataset_dir, first_mot_data,
  93. 'label_list.txt')
  94. return anno_file
  95. def parse_dataset(self):
  96. self.img_files = OrderedDict()
  97. self.img_start_index = OrderedDict()
  98. self.label_files = OrderedDict()
  99. self.tid_num = OrderedDict()
  100. self.tid_start_index = OrderedDict()
  101. img_index = 0
  102. for data_name in self.image_lists:
  103. # check every data image list
  104. image_lists_dir = os.path.join(self.dataset_dir, 'image_lists')
  105. assert os.path.isdir(image_lists_dir), \
  106. "The {} is not a directory.".format(image_lists_dir)
  107. list_path = os.path.join(image_lists_dir, data_name)
  108. assert os.path.exists(list_path), \
  109. "The list path {} does not exist.".format(list_path)
  110. # record img_files, filter out empty ones
  111. with open(list_path, 'r') as file:
  112. self.img_files[data_name] = file.readlines()
  113. self.img_files[data_name] = [
  114. os.path.join(self.dataset_dir, x.strip())
  115. for x in self.img_files[data_name]
  116. ]
  117. self.img_files[data_name] = list(
  118. filter(lambda x: len(x) > 0, self.img_files[data_name]))
  119. self.img_start_index[data_name] = img_index
  120. img_index += len(self.img_files[data_name])
  121. # record label_files
  122. self.label_files[data_name] = [
  123. x.replace('images', 'labels_with_ids').replace(
  124. '.png', '.txt').replace('.jpg', '.txt')
  125. for x in self.img_files[data_name]
  126. ]
  127. for data_name, label_paths in self.label_files.items():
  128. max_index = -1
  129. for lp in label_paths:
  130. lb = np.loadtxt(lp)
  131. if len(lb) < 1:
  132. continue
  133. if len(lb.shape) < 2:
  134. img_max = lb[1]
  135. else:
  136. img_max = np.max(lb[:, 1])
  137. if img_max > max_index:
  138. max_index = img_max
  139. self.tid_num[data_name] = int(max_index + 1)
  140. last_index = 0
  141. for i, (k, v) in enumerate(self.tid_num.items()):
  142. self.tid_start_index[k] = last_index
  143. last_index += v
  144. self.num_identities_dict = defaultdict(int)
  145. self.num_identities_dict[0] = int(last_index + 1) # single class
  146. self.num_imgs_each_data = [len(x) for x in self.img_files.values()]
  147. self.total_imgs = sum(self.num_imgs_each_data)
  148. logger.info('MOT dataset summary: ')
  149. logger.info(self.tid_num)
  150. logger.info('Total images: {}'.format(self.total_imgs))
  151. logger.info('Image start index: {}'.format(self.img_start_index))
  152. logger.info('Total identities: {}'.format(self.num_identities_dict[0]))
  153. logger.info('Identity start index: {}'.format(self.tid_start_index))
  154. records = []
  155. cname2cid = mot_label()
  156. for img_index in range(self.total_imgs):
  157. for i, (k, v) in enumerate(self.img_start_index.items()):
  158. if img_index >= v:
  159. data_name = list(self.label_files.keys())[i]
  160. start_index = v
  161. img_file = self.img_files[data_name][img_index - start_index]
  162. lbl_file = self.label_files[data_name][img_index - start_index]
  163. if not os.path.exists(img_file):
  164. logger.warning('Illegal image file: {}, and it will be ignored'.
  165. format(img_file))
  166. continue
  167. if not os.path.isfile(lbl_file):
  168. logger.warning('Illegal label file: {}, and it will be ignored'.
  169. format(lbl_file))
  170. continue
  171. labels = np.loadtxt(lbl_file, dtype=np.float32).reshape(-1, 6)
  172. # each row in labels (N, 6) is [gt_class, gt_identity, cx, cy, w, h]
  173. cx, cy = labels[:, 2], labels[:, 3]
  174. w, h = labels[:, 4], labels[:, 5]
  175. gt_bbox = np.stack((cx, cy, w, h)).T.astype('float32')
  176. gt_class = labels[:, 0:1].astype('int32')
  177. gt_score = np.ones((len(labels), 1)).astype('float32')
  178. gt_ide = labels[:, 1:2].astype('int32')
  179. for i, _ in enumerate(gt_ide):
  180. if gt_ide[i] > -1:
  181. gt_ide[i] += self.tid_start_index[data_name]
  182. mot_rec = {
  183. 'im_file': img_file,
  184. 'im_id': img_index,
  185. } if 'image' in self.data_fields else {}
  186. gt_rec = {
  187. 'gt_class': gt_class,
  188. 'gt_score': gt_score,
  189. 'gt_bbox': gt_bbox,
  190. 'gt_ide': gt_ide,
  191. }
  192. for k, v in gt_rec.items():
  193. if k in self.data_fields:
  194. mot_rec[k] = v
  195. records.append(mot_rec)
  196. if self.sample_num > 0 and img_index >= self.sample_num:
  197. break
  198. assert len(records) > 0, 'not found any mot record in %s' % (
  199. self.image_lists)
  200. self.roidbs, self.cname2cid = records, cname2cid
  201. @register
  202. @serializable
  203. class MCMOTDataSet(DetDataset):
  204. """
  205. Load dataset with MOT format, support multi-class MOT.
  206. Args:
  207. dataset_dir (str): root directory for dataset.
  208. image_lists (list(str)): mcmot data image lists, muiti-source mcmot dataset.
  209. data_fields (list): key name of data dictionary, at least have 'image'.
  210. label_list (str): if use_default_label is False, will load
  211. mapping between category and class index.
  212. sample_num (int): number of samples to load, -1 means all.
  213. Notes:
  214. MCMOT datasets root directory following this:
  215. dataset/mot
  216. |——————image_lists
  217. | |——————visdrone_mcmot.train
  218. | |——————visdrone_mcmot.val
  219. visdrone_mcmot
  220. |——————images
  221. | └——————train
  222. | └——————val
  223. └——————labels_with_ids
  224. └——————train
  225. """
  226. def __init__(self,
  227. dataset_dir=None,
  228. image_lists=[],
  229. data_fields=['image'],
  230. label_list=None,
  231. sample_num=-1):
  232. super(MCMOTDataSet, self).__init__(
  233. dataset_dir=dataset_dir,
  234. data_fields=data_fields,
  235. sample_num=sample_num)
  236. self.dataset_dir = dataset_dir
  237. self.image_lists = image_lists
  238. if isinstance(self.image_lists, str):
  239. self.image_lists = [self.image_lists]
  240. self.label_list = label_list
  241. self.roidbs = None
  242. self.cname2cid = None
  243. def get_anno(self):
  244. if self.image_lists == []:
  245. return
  246. # only used to get categories and metric
  247. # only check first data, but the label_list of all data should be same.
  248. first_mot_data = self.image_lists[0].split('.')[0]
  249. anno_file = os.path.join(self.dataset_dir, first_mot_data,
  250. 'label_list.txt')
  251. return anno_file
  252. def parse_dataset(self):
  253. self.img_files = OrderedDict()
  254. self.img_start_index = OrderedDict()
  255. self.label_files = OrderedDict()
  256. self.tid_num = OrderedDict()
  257. self.tid_start_idx_of_cls_ids = defaultdict(dict) # for MCMOT
  258. img_index = 0
  259. for data_name in self.image_lists:
  260. # check every data image list
  261. image_lists_dir = os.path.join(self.dataset_dir, 'image_lists')
  262. assert os.path.isdir(image_lists_dir), \
  263. "The {} is not a directory.".format(image_lists_dir)
  264. list_path = os.path.join(image_lists_dir, data_name)
  265. assert os.path.exists(list_path), \
  266. "The list path {} does not exist.".format(list_path)
  267. # record img_files, filter out empty ones
  268. with open(list_path, 'r') as file:
  269. self.img_files[data_name] = file.readlines()
  270. self.img_files[data_name] = [
  271. os.path.join(self.dataset_dir, x.strip())
  272. for x in self.img_files[data_name]
  273. ]
  274. self.img_files[data_name] = list(
  275. filter(lambda x: len(x) > 0, self.img_files[data_name]))
  276. self.img_start_index[data_name] = img_index
  277. img_index += len(self.img_files[data_name])
  278. # record label_files
  279. self.label_files[data_name] = [
  280. x.replace('images', 'labels_with_ids').replace(
  281. '.png', '.txt').replace('.jpg', '.txt')
  282. for x in self.img_files[data_name]
  283. ]
  284. for data_name, label_paths in self.label_files.items():
  285. # using max_ids_dict rather than max_index
  286. max_ids_dict = defaultdict(int)
  287. for lp in label_paths:
  288. lb = np.loadtxt(lp)
  289. if len(lb) < 1:
  290. continue
  291. lb = lb.reshape(-1, 6)
  292. for item in lb:
  293. if item[1] > max_ids_dict[int(item[0])]:
  294. # item[0]: cls_id
  295. # item[1]: track id
  296. max_ids_dict[int(item[0])] = int(item[1])
  297. # track id number
  298. self.tid_num[data_name] = max_ids_dict
  299. last_idx_dict = defaultdict(int)
  300. for i, (k, v) in enumerate(self.tid_num.items()): # each sub dataset
  301. for cls_id, id_num in v.items(): # v is a max_ids_dict
  302. self.tid_start_idx_of_cls_ids[k][cls_id] = last_idx_dict[cls_id]
  303. last_idx_dict[cls_id] += id_num
  304. self.num_identities_dict = defaultdict(int)
  305. for k, v in last_idx_dict.items():
  306. self.num_identities_dict[k] = int(v) # total ids of each category
  307. self.num_imgs_each_data = [len(x) for x in self.img_files.values()]
  308. self.total_imgs = sum(self.num_imgs_each_data)
  309. # cname2cid and cid2cname
  310. cname2cid = {}
  311. if self.label_list is not None:
  312. # if use label_list for multi source mix dataset,
  313. # please make sure label_list in the first sub_dataset at least.
  314. sub_dataset = self.image_lists[0].split('.')[0]
  315. label_path = os.path.join(self.dataset_dir, sub_dataset,
  316. self.label_list)
  317. if not os.path.exists(label_path):
  318. logger.info(
  319. "Note: label_list {} does not exists, use VisDrone 10 classes labels as default.".
  320. format(label_path))
  321. cname2cid = visdrone_mcmot_label()
  322. else:
  323. with open(label_path, 'r') as fr:
  324. label_id = 0
  325. for line in fr.readlines():
  326. cname2cid[line.strip()] = label_id
  327. label_id += 1
  328. else:
  329. cname2cid = visdrone_mcmot_label()
  330. cid2cname = dict([(v, k) for (k, v) in cname2cid.items()])
  331. logger.info('MCMOT dataset summary: ')
  332. logger.info(self.tid_num)
  333. logger.info('Total images: {}'.format(self.total_imgs))
  334. logger.info('Image start index: {}'.format(self.img_start_index))
  335. logger.info('Total identities of each category: ')
  336. num_identities_dict = sorted(
  337. self.num_identities_dict.items(), key=lambda x: x[0])
  338. total_IDs_all_cats = 0
  339. for (k, v) in num_identities_dict:
  340. logger.info('Category {} [{}] has {} IDs.'.format(k, cid2cname[k],
  341. v))
  342. total_IDs_all_cats += v
  343. logger.info('Total identities of all categories: {}'.format(
  344. total_IDs_all_cats))
  345. logger.info('Identity start index of each category: ')
  346. for k, v in self.tid_start_idx_of_cls_ids.items():
  347. sorted_v = sorted(v.items(), key=lambda x: x[0])
  348. for (cls_id, start_idx) in sorted_v:
  349. logger.info('Start index of dataset {} category {:d} is {:d}'
  350. .format(k, cls_id, start_idx))
  351. records = []
  352. for img_index in range(self.total_imgs):
  353. for i, (k, v) in enumerate(self.img_start_index.items()):
  354. if img_index >= v:
  355. data_name = list(self.label_files.keys())[i]
  356. start_index = v
  357. img_file = self.img_files[data_name][img_index - start_index]
  358. lbl_file = self.label_files[data_name][img_index - start_index]
  359. if not os.path.exists(img_file):
  360. logger.warning('Illegal image file: {}, and it will be ignored'.
  361. format(img_file))
  362. continue
  363. if not os.path.isfile(lbl_file):
  364. logger.warning('Illegal label file: {}, and it will be ignored'.
  365. format(lbl_file))
  366. continue
  367. labels = np.loadtxt(lbl_file, dtype=np.float32).reshape(-1, 6)
  368. # each row in labels (N, 6) is [gt_class, gt_identity, cx, cy, w, h]
  369. cx, cy = labels[:, 2], labels[:, 3]
  370. w, h = labels[:, 4], labels[:, 5]
  371. gt_bbox = np.stack((cx, cy, w, h)).T.astype('float32')
  372. gt_class = labels[:, 0:1].astype('int32')
  373. gt_score = np.ones((len(labels), 1)).astype('float32')
  374. gt_ide = labels[:, 1:2].astype('int32')
  375. for i, _ in enumerate(gt_ide):
  376. if gt_ide[i] > -1:
  377. cls_id = int(gt_class[i])
  378. start_idx = self.tid_start_idx_of_cls_ids[data_name][cls_id]
  379. gt_ide[i] += start_idx
  380. mot_rec = {
  381. 'im_file': img_file,
  382. 'im_id': img_index,
  383. } if 'image' in self.data_fields else {}
  384. gt_rec = {
  385. 'gt_class': gt_class,
  386. 'gt_score': gt_score,
  387. 'gt_bbox': gt_bbox,
  388. 'gt_ide': gt_ide,
  389. }
  390. for k, v in gt_rec.items():
  391. if k in self.data_fields:
  392. mot_rec[k] = v
  393. records.append(mot_rec)
  394. if self.sample_num > 0 and img_index >= self.sample_num:
  395. break
  396. assert len(records) > 0, 'not found any mot record in %s' % (
  397. self.image_lists)
  398. self.roidbs, self.cname2cid = records, cname2cid
  399. @register
  400. @serializable
  401. class MOTImageFolder(DetDataset):
  402. """
  403. Load MOT dataset with MOT format from image folder or video .
  404. Args:
  405. video_file (str): path of the video file, default ''.
  406. frame_rate (int): frame rate of the video, use cv2 VideoCapture if not set.
  407. dataset_dir (str): root directory for dataset.
  408. keep_ori_im (bool): whether to keep original image, default False.
  409. Set True when used during MOT model inference while saving
  410. images or video, or used in DeepSORT.
  411. """
  412. def __init__(self,
  413. video_file=None,
  414. frame_rate=-1,
  415. dataset_dir=None,
  416. data_root=None,
  417. image_dir=None,
  418. sample_num=-1,
  419. keep_ori_im=False,
  420. anno_path=None,
  421. **kwargs):
  422. super(MOTImageFolder, self).__init__(
  423. dataset_dir, image_dir, sample_num=sample_num)
  424. self.video_file = video_file
  425. self.data_root = data_root
  426. self.keep_ori_im = keep_ori_im
  427. self._imid2path = {}
  428. self.roidbs = None
  429. self.frame_rate = frame_rate
  430. self.anno_path = anno_path
  431. def check_or_download_dataset(self):
  432. return
  433. def parse_dataset(self, ):
  434. if not self.roidbs:
  435. if self.video_file is None:
  436. self.frame_rate = 30 # set as default if infer image folder
  437. self.roidbs = self._load_images()
  438. else:
  439. self.roidbs = self._load_video_images()
  440. def _load_video_images(self):
  441. if self.frame_rate == -1:
  442. # if frame_rate is not set for video, use cv2.VideoCapture
  443. cap = cv2.VideoCapture(self.video_file)
  444. self.frame_rate = int(cap.get(cv2.CAP_PROP_FPS))
  445. extension = self.video_file.split('.')[-1]
  446. output_path = self.video_file.replace('.{}'.format(extension), '')
  447. frames_path = video2frames(self.video_file, output_path,
  448. self.frame_rate)
  449. self.video_frames = sorted(
  450. glob.glob(os.path.join(frames_path, '*.png')))
  451. self.video_length = len(self.video_frames)
  452. logger.info('Length of the video: {:d} frames.'.format(
  453. self.video_length))
  454. ct = 0
  455. records = []
  456. for image in self.video_frames:
  457. assert image != '' and os.path.isfile(image), \
  458. "Image {} not found".format(image)
  459. if self.sample_num > 0 and ct >= self.sample_num:
  460. break
  461. rec = {'im_id': np.array([ct]), 'im_file': image}
  462. if self.keep_ori_im:
  463. rec.update({'keep_ori_im': 1})
  464. self._imid2path[ct] = image
  465. ct += 1
  466. records.append(rec)
  467. assert len(records) > 0, "No image file found"
  468. return records
  469. def _find_images(self):
  470. image_dir = self.image_dir
  471. if not isinstance(image_dir, Sequence):
  472. image_dir = [image_dir]
  473. images = []
  474. for im_dir in image_dir:
  475. if os.path.isdir(im_dir):
  476. im_dir = os.path.join(self.dataset_dir, im_dir)
  477. images.extend(_make_dataset(im_dir))
  478. elif os.path.isfile(im_dir) and _is_valid_file(im_dir):
  479. images.append(im_dir)
  480. return images
  481. def _load_images(self):
  482. images = self._find_images()
  483. ct = 0
  484. records = []
  485. for image in images:
  486. assert image != '' and os.path.isfile(image), \
  487. "Image {} not found".format(image)
  488. if self.sample_num > 0 and ct >= self.sample_num:
  489. break
  490. rec = {'im_id': np.array([ct]), 'im_file': image}
  491. if self.keep_ori_im:
  492. rec.update({'keep_ori_im': 1})
  493. self._imid2path[ct] = image
  494. ct += 1
  495. records.append(rec)
  496. assert len(records) > 0, "No image file found"
  497. return records
  498. def get_imid2path(self):
  499. return self._imid2path
  500. def set_images(self, images):
  501. self.image_dir = images
  502. self.roidbs = self._load_images()
  503. def set_video(self, video_file, frame_rate):
  504. # update video_file and frame_rate by command line of tools/infer_mot.py
  505. self.video_file = video_file
  506. self.frame_rate = frame_rate
  507. assert os.path.isfile(self.video_file) and _is_valid_video(self.video_file), \
  508. "wrong or unsupported file format: {}".format(self.video_file)
  509. self.roidbs = self._load_video_images()
  510. def get_anno(self):
  511. return self.anno_path
  512. def _is_valid_video(f, extensions=('.mp4', '.avi', '.mov', '.rmvb', 'flv')):
  513. return f.lower().endswith(extensions)
  514. def video2frames(video_path, outpath, frame_rate, **kargs):
  515. def _dict2str(kargs):
  516. cmd_str = ''
  517. for k, v in kargs.items():
  518. cmd_str += (' ' + str(k) + ' ' + str(v))
  519. return cmd_str
  520. ffmpeg = ['ffmpeg ', ' -y -loglevel ', ' error ']
  521. vid_name = os.path.basename(video_path).split('.')[0]
  522. out_full_path = os.path.join(outpath, vid_name)
  523. if not os.path.exists(out_full_path):
  524. os.makedirs(out_full_path)
  525. # video file name
  526. outformat = os.path.join(out_full_path, '%08d.png')
  527. cmd = ffmpeg
  528. cmd = ffmpeg + [
  529. ' -i ', video_path, ' -r ', str(frame_rate), ' -f image2 ', outformat
  530. ]
  531. cmd = ''.join(cmd) + _dict2str(kargs)
  532. if os.system(cmd) != 0:
  533. raise RuntimeError('ffmpeg process video: {} error'.format(video_path))
  534. sys.exit(-1)
  535. sys.stdout.flush()
  536. return out_full_path
  537. def mot_label():
  538. labels_map = {'person': 0}
  539. return labels_map
  540. def visdrone_mcmot_label():
  541. labels_map = {
  542. 'pedestrian': 0,
  543. 'people': 1,
  544. 'bicycle': 2,
  545. 'car': 3,
  546. 'van': 4,
  547. 'truck': 5,
  548. 'tricycle': 6,
  549. 'awning-tricycle': 7,
  550. 'bus': 8,
  551. 'motor': 9,
  552. }
  553. return labels_map