video_action_infer.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import os
  15. import yaml
  16. import glob
  17. import cv2
  18. import numpy as np
  19. import math
  20. import paddle
  21. import sys
  22. from collections import Sequence
  23. import paddle.nn.functional as F
  24. # add deploy path of PadleDetection to sys.path
  25. parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 2)))
  26. sys.path.insert(0, parent_path)
  27. from paddle.inference import Config, create_predictor
  28. from python.utils import argsparser, Timer, get_current_memory_mb
  29. from python.benchmark_utils import PaddleInferBenchmark
  30. from python.infer import Detector, print_arguments
  31. from video_action_preprocess import VideoDecoder, Sampler, Scale, CenterCrop, Normalization, Image2Array
  32. def softmax(x):
  33. f_x = np.exp(x) / np.sum(np.exp(x))
  34. return f_x
  35. class VideoActionRecognizer(object):
  36. """
  37. Args:
  38. model_dir (str): root path of model.pdiparams, model.pdmodel and infer_cfg.yml
  39. device (str): Choose the device you want to run, it can be: CPU/GPU/XPU, default is CPU
  40. run_mode (str): mode of running(paddle/trt_fp32/trt_fp16)
  41. batch_size (int): size of pre batch in inference
  42. trt_min_shape (int): min shape for dynamic shape in trt
  43. trt_max_shape (int): max shape for dynamic shape in trt
  44. trt_opt_shape (int): opt shape for dynamic shape in trt
  45. trt_calib_mode (bool): If the model is produced by TRT offline quantitative
  46. calibration, trt_calib_mode need to set True
  47. cpu_threads (int): cpu threads
  48. enable_mkldnn (bool): whether to open MKLDNN
  49. """
  50. def __init__(self,
  51. model_dir,
  52. device='CPU',
  53. run_mode='paddle',
  54. num_seg=8,
  55. seg_len=1,
  56. short_size=256,
  57. target_size=224,
  58. top_k=1,
  59. batch_size=1,
  60. trt_min_shape=1,
  61. trt_max_shape=1280,
  62. trt_opt_shape=640,
  63. trt_calib_mode=False,
  64. cpu_threads=1,
  65. enable_mkldnn=False,
  66. ir_optim=True):
  67. self.num_seg = num_seg
  68. self.seg_len = seg_len
  69. self.short_size = short_size
  70. self.target_size = target_size
  71. self.top_k = top_k
  72. assert batch_size == 1, "VideoActionRecognizer only support batch_size=1 now."
  73. self.model_dir = model_dir
  74. self.device = device
  75. self.run_mode = run_mode
  76. self.batch_size = batch_size
  77. self.trt_min_shape = trt_min_shape
  78. self.trt_max_shape = trt_max_shape
  79. self.trt_opt_shape = trt_opt_shape
  80. self.trt_calib_mode = trt_calib_mode
  81. self.cpu_threads = cpu_threads
  82. self.enable_mkldnn = enable_mkldnn
  83. self.ir_optim = ir_optim
  84. self.recognize_times = Timer()
  85. model_file_path = glob.glob(os.path.join(model_dir, "*.pdmodel"))[0]
  86. params_file_path = glob.glob(os.path.join(model_dir, "*.pdiparams"))[0]
  87. self.config = Config(model_file_path, params_file_path)
  88. if device == "GPU" or device == "gpu":
  89. self.config.enable_use_gpu(8000, 0)
  90. else:
  91. self.config.disable_gpu()
  92. if self.enable_mkldnn:
  93. # cache 10 different shapes for mkldnn to avoid memory leak
  94. self.config.set_mkldnn_cache_capacity(10)
  95. self.config.enable_mkldnn()
  96. self.config.switch_ir_optim(self.ir_optim) # default true
  97. precision_map = {
  98. 'trt_int8': Config.Precision.Int8,
  99. 'trt_fp32': Config.Precision.Float32,
  100. 'trt_fp16': Config.Precision.Half
  101. }
  102. if run_mode in precision_map.keys():
  103. self.config.enable_tensorrt_engine(
  104. max_batch_size=8, precision_mode=precision_map[run_mode])
  105. self.config.enable_memory_optim()
  106. # use zero copy
  107. self.config.switch_use_feed_fetch_ops(False)
  108. self.predictor = create_predictor(self.config)
  109. @classmethod
  110. def init_with_cfg(cls, args, cfg):
  111. return cls(model_dir=cfg['model_dir'],
  112. short_size=cfg['short_size'],
  113. target_size=cfg['target_size'],
  114. batch_size=cfg['batch_size'],
  115. device=args.device,
  116. run_mode=args.run_mode,
  117. trt_min_shape=args.trt_min_shape,
  118. trt_max_shape=args.trt_max_shape,
  119. trt_opt_shape=args.trt_opt_shape,
  120. trt_calib_mode=args.trt_calib_mode,
  121. cpu_threads=args.cpu_threads,
  122. enable_mkldnn=args.enable_mkldnn)
  123. def preprocess_batch(self, file_list):
  124. batched_inputs = []
  125. for file in file_list:
  126. inputs = self.preprocess(file)
  127. batched_inputs.append(inputs)
  128. batched_inputs = [
  129. np.concatenate([item[i] for item in batched_inputs])
  130. for i in range(len(batched_inputs[0]))
  131. ]
  132. self.input_file = file_list
  133. return batched_inputs
  134. def get_timer(self):
  135. return self.recognize_times
  136. def predict(self, input):
  137. '''
  138. Args:
  139. input (str) or (list): video file path or image data list
  140. Returns:
  141. results (dict):
  142. '''
  143. input_names = self.predictor.get_input_names()
  144. input_tensor = self.predictor.get_input_handle(input_names[0])
  145. output_names = self.predictor.get_output_names()
  146. output_tensor = self.predictor.get_output_handle(output_names[0])
  147. # preprocess
  148. self.recognize_times.preprocess_time_s.start()
  149. if type(input) == str:
  150. inputs = self.preprocess_video(input)
  151. else:
  152. inputs = self.preprocess_frames(input)
  153. self.recognize_times.preprocess_time_s.end()
  154. inputs = np.expand_dims(
  155. inputs, axis=0).repeat(
  156. self.batch_size, axis=0).copy()
  157. input_tensor.copy_from_cpu(inputs)
  158. # model prediction
  159. self.recognize_times.inference_time_s.start()
  160. self.predictor.run()
  161. self.recognize_times.inference_time_s.end()
  162. output = output_tensor.copy_to_cpu()
  163. # postprocess
  164. self.recognize_times.postprocess_time_s.start()
  165. classes, scores = self.postprocess(output)
  166. self.recognize_times.postprocess_time_s.end()
  167. return classes, scores
  168. def preprocess_frames(self, frame_list):
  169. """
  170. frame_list: list, frame list
  171. return: list
  172. """
  173. results = {}
  174. results['frames_len'] = len(frame_list)
  175. results["imgs"] = frame_list
  176. img_mean = [0.485, 0.456, 0.406]
  177. img_std = [0.229, 0.224, 0.225]
  178. ops = [
  179. CenterCrop(self.target_size), Image2Array(),
  180. Normalization(img_mean, img_std)
  181. ]
  182. for op in ops:
  183. results = op(results)
  184. res = np.expand_dims(results['imgs'], axis=0).copy()
  185. return [res]
  186. def preprocess_video(self, input_file):
  187. """
  188. input_file: str, file path
  189. return: list
  190. """
  191. assert os.path.isfile(input_file) is not None, "{0} not exists".format(
  192. input_file)
  193. results = {'filename': input_file}
  194. img_mean = [0.485, 0.456, 0.406]
  195. img_std = [0.229, 0.224, 0.225]
  196. ops = [
  197. VideoDecoder(), Sampler(
  198. self.num_seg, self.seg_len, valid_mode=True),
  199. Scale(self.short_size), CenterCrop(self.target_size), Image2Array(),
  200. Normalization(img_mean, img_std)
  201. ]
  202. for op in ops:
  203. results = op(results)
  204. res = np.expand_dims(results['imgs'], axis=0).copy()
  205. return [res]
  206. def postprocess(self, output):
  207. output = output.flatten() # numpy.ndarray
  208. output = softmax(output)
  209. classes = np.argpartition(output, -self.top_k)[-self.top_k:]
  210. classes = classes[np.argsort(-output[classes])]
  211. scores = output[classes]
  212. return classes, scores
  213. def main():
  214. if not FLAGS.run_benchmark:
  215. assert FLAGS.batch_size == 1
  216. assert FLAGS.use_fp16 is False
  217. else:
  218. assert FLAGS.use_gpu is True
  219. recognizer = VideoActionRecognizer(
  220. FLAGS.model_dir,
  221. short_size=FLAGS.short_size,
  222. target_size=FLAGS.target_size,
  223. device=FLAGS.device,
  224. run_mode=FLAGS.run_mode,
  225. batch_size=FLAGS.batch_size,
  226. trt_min_shape=FLAGS.trt_min_shape,
  227. trt_max_shape=FLAGS.trt_max_shape,
  228. trt_opt_shape=FLAGS.trt_opt_shape,
  229. trt_calib_mode=FLAGS.trt_calib_mode,
  230. cpu_threads=FLAGS.cpu_threads,
  231. enable_mkldnn=FLAGS.enable_mkldnn, )
  232. if not FLAGS.run_benchmark:
  233. classes, scores = recognizer.predict(FLAGS.video_file)
  234. print("Current video file: {}".format(FLAGS.video_file))
  235. print("\ttop-1 class: {0}".format(classes[0]))
  236. print("\ttop-1 score: {0}".format(scores[0]))
  237. else:
  238. cm, gm, gu = get_current_memory_mb()
  239. mems = {'cpu_rss_mb': cm, 'gpu_rss_mb': gm, 'gpu_util': gu * 100}
  240. perf_info = recognizer.recognize_times.report()
  241. model_dir = FLAGS.model_dir
  242. mode = FLAGS.run_mode
  243. model_info = {
  244. 'model_name': model_dir.strip('/').split('/')[-1],
  245. 'precision': mode.split('_')[-1]
  246. }
  247. data_info = {
  248. 'batch_size': FLAGS.batch_size,
  249. 'shape': "dynamic_shape",
  250. 'data_num': perf_info['img_num']
  251. }
  252. recognize_log = PaddleInferBenchmark(recognizer.config, model_info,
  253. data_info, perf_info, mems)
  254. recognize_log('Fight')
  255. if __name__ == '__main__':
  256. paddle.enable_static()
  257. parser = argsparser()
  258. FLAGS = parser.parse_args()
  259. print_arguments(FLAGS)
  260. FLAGS.device = FLAGS.device.upper()
  261. assert FLAGS.device in ['CPU', 'GPU', 'XPU'
  262. ], "device should be CPU, GPU or XPU"
  263. main()