123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378 |
- # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- from __future__ import absolute_import
- from __future__ import division
- from __future__ import print_function
- import os
- import sys
- import six
- import glob
- import time
- import yaml
- import argparse
- import cv2
- import numpy as np
- import paddle
- import paddle.version as paddle_version
- from paddle.inference import Config, create_predictor, PrecisionType, get_trt_runtime_version
- TUNED_TRT_DYNAMIC_MODELS = {'DETR'}
- def check_version(version='2.2'):
- err = "PaddlePaddle version {} or higher is required, " \
- "or a suitable develop version is satisfied as well. \n" \
- "Please make sure the version is good with your code.".format(version)
- version_installed = [
- paddle_version.major, paddle_version.minor, paddle_version.patch,
- paddle_version.rc
- ]
- if version_installed == ['0', '0', '0', '0']:
- return
- if version == 'develop':
- raise Exception("PaddlePaddle develop version is required!")
- version_split = version.split('.')
- length = min(len(version_installed), len(version_split))
- for i in six.moves.range(length):
- if version_installed[i] > version_split[i]:
- return
- if version_installed[i] < version_split[i]:
- raise Exception(err)
- def check_trt_version(version='8.2'):
- err = "TensorRT version {} or higher is required," \
- "Please make sure the version is good with your code.".format(version)
- version_split = list(map(int, version.split('.')))
- version_installed = get_trt_runtime_version()
- length = min(len(version_installed), len(version_split))
- for i in six.moves.range(length):
- if version_installed[i] > version_split[i]:
- return
- if version_installed[i] < version_split[i]:
- raise Exception(err)
- # preprocess ops
- def decode_image(im_file, im_info):
- if isinstance(im_file, str):
- with open(im_file, 'rb') as f:
- im_read = f.read()
- data = np.frombuffer(im_read, dtype='uint8')
- im = cv2.imdecode(data, 1) # BGR mode, but need RGB mode
- im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
- else:
- im = im_file
- im_info['im_shape'] = np.array(im.shape[:2], dtype=np.float32)
- im_info['scale_factor'] = np.array([1., 1.], dtype=np.float32)
- return im, im_info
- class Resize(object):
- def __init__(self, target_size, keep_ratio=True, interp=cv2.INTER_LINEAR):
- if isinstance(target_size, int):
- target_size = [target_size, target_size]
- self.target_size = target_size
- self.keep_ratio = keep_ratio
- self.interp = interp
- def __call__(self, im, im_info):
- assert len(self.target_size) == 2
- assert self.target_size[0] > 0 and self.target_size[1] > 0
- im_channel = im.shape[2]
- im_scale_y, im_scale_x = self.generate_scale(im)
- im = cv2.resize(
- im,
- None,
- None,
- fx=im_scale_x,
- fy=im_scale_y,
- interpolation=self.interp)
- im_info['im_shape'] = np.array(im.shape[:2]).astype('float32')
- im_info['scale_factor'] = np.array(
- [im_scale_y, im_scale_x]).astype('float32')
- return im, im_info
- def generate_scale(self, im):
- origin_shape = im.shape[:2]
- im_c = im.shape[2]
- if self.keep_ratio:
- im_size_min = np.min(origin_shape)
- im_size_max = np.max(origin_shape)
- target_size_min = np.min(self.target_size)
- target_size_max = np.max(self.target_size)
- im_scale = float(target_size_min) / float(im_size_min)
- if np.round(im_scale * im_size_max) > target_size_max:
- im_scale = float(target_size_max) / float(im_size_max)
- im_scale_x = im_scale
- im_scale_y = im_scale
- else:
- resize_h, resize_w = self.target_size
- im_scale_y = resize_h / float(origin_shape[0])
- im_scale_x = resize_w / float(origin_shape[1])
- return im_scale_y, im_scale_x
- class Permute(object):
- def __init__(self, ):
- super(Permute, self).__init__()
- def __call__(self, im, im_info):
- im = im.transpose((2, 0, 1))
- return im, im_info
- class NormalizeImage(object):
- def __init__(self, mean, std, is_scale=True, norm_type='mean_std'):
- self.mean = mean
- self.std = std
- self.is_scale = is_scale
- self.norm_type = norm_type
- def __call__(self, im, im_info):
- im = im.astype(np.float32, copy=False)
- if self.is_scale:
- scale = 1.0 / 255.0
- im *= scale
- if self.norm_type == 'mean_std':
- mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
- std = np.array(self.std)[np.newaxis, np.newaxis, :]
- im -= mean
- im /= std
- return im, im_info
- class PadStride(object):
- def __init__(self, stride=0):
- self.coarsest_stride = stride
- def __call__(self, im, im_info):
- coarsest_stride = self.coarsest_stride
- if coarsest_stride <= 0:
- return im, im_info
- im_c, im_h, im_w = im.shape
- pad_h = int(np.ceil(float(im_h) / coarsest_stride) * coarsest_stride)
- pad_w = int(np.ceil(float(im_w) / coarsest_stride) * coarsest_stride)
- padding_im = np.zeros((im_c, pad_h, pad_w), dtype=np.float32)
- padding_im[:, :im_h, :im_w] = im
- return padding_im, im_info
- def preprocess(im, preprocess_ops):
- # process image by preprocess_ops
- im_info = {
- 'scale_factor': np.array(
- [1., 1.], dtype=np.float32),
- 'im_shape': None,
- }
- im, im_info = decode_image(im, im_info)
- for operator in preprocess_ops:
- im, im_info = operator(im, im_info)
- return im, im_info
- def parse_args():
- parser = argparse.ArgumentParser()
- parser.add_argument(
- '--model_dir', type=str, help='directory of inference model')
- parser.add_argument(
- '--run_mode', type=str, default='paddle', help='running mode')
- parser.add_argument('--batch_size', type=int, default=1, help='batch size')
- parser.add_argument(
- '--image_dir',
- type=str,
- default='/paddle/data/DOTA_1024_ss/test1024/images',
- help='directory of test images')
- parser.add_argument(
- '--warmup_iter', type=int, default=5, help='num of warmup iters')
- parser.add_argument(
- '--total_iter', type=int, default=2000, help='num of total iters')
- parser.add_argument(
- '--log_iter', type=int, default=50, help='num of log interval')
- parser.add_argument(
- '--tuned_trt_shape_file',
- type=str,
- default='shape_range_info.pbtxt',
- help='dynamic shape range info')
- args = parser.parse_args()
- return args
- def init_predictor(FLAGS):
- model_dir, run_mode, batch_size = FLAGS.model_dir, FLAGS.run_mode, FLAGS.batch_size
- yaml_file = os.path.join(model_dir, 'infer_cfg.yml')
- with open(yaml_file) as f:
- yml_conf = yaml.safe_load(f)
- config = Config(
- os.path.join(model_dir, 'model.pdmodel'),
- os.path.join(model_dir, 'model.pdiparams'))
- # initial GPU memory(M), device ID
- config.enable_use_gpu(200, 0)
- # optimize graph and fuse op
- config.switch_ir_optim(True)
- precision_map = {
- 'trt_int8': Config.Precision.Int8,
- 'trt_fp32': Config.Precision.Float32,
- 'trt_fp16': Config.Precision.Half
- }
- arch = yml_conf['arch']
- tuned_trt_shape_file = os.path.join(model_dir, FLAGS.tuned_trt_shape_file)
- if run_mode in precision_map.keys():
- if arch in TUNED_TRT_DYNAMIC_MODELS and not os.path.exists(
- tuned_trt_shape_file):
- print(
- 'dynamic shape range info is saved in {}. After that, rerun the code'.
- format(tuned_trt_shape_file))
- config.collect_shape_range_info(tuned_trt_shape_file)
- config.enable_tensorrt_engine(
- workspace_size=(1 << 25) * batch_size,
- max_batch_size=batch_size,
- min_subgraph_size=yml_conf['min_subgraph_size'],
- precision_mode=precision_map[run_mode],
- use_static=True,
- use_calib_mode=False)
- if yml_conf['use_dynamic_shape']:
- if arch in TUNED_TRT_DYNAMIC_MODELS and os.path.exists(
- tuned_trt_shape_file):
- config.enable_tuned_tensorrt_dynamic_shape(tuned_trt_shape_file,
- True)
- else:
- min_input_shape = {
- 'image': [batch_size, 3, 640, 640],
- 'scale_factor': [batch_size, 2]
- }
- max_input_shape = {
- 'image': [batch_size, 3, 1280, 1280],
- 'scale_factor': [batch_size, 2]
- }
- opt_input_shape = {
- 'image': [batch_size, 3, 1024, 1024],
- 'scale_factor': [batch_size, 2]
- }
- config.set_trt_dynamic_shape_info(
- min_input_shape, max_input_shape, opt_input_shape)
- # disable print log when predict
- config.disable_glog_info()
- # enable shared memory
- config.enable_memory_optim()
- # disable feed, fetch OP, needed by zero_copy_run
- config.switch_use_feed_fetch_ops(False)
- predictor = create_predictor(config)
- return predictor, yml_conf
- def create_preprocess_ops(yml_conf):
- preprocess_ops = []
- for op_info in yml_conf['Preprocess']:
- new_op_info = op_info.copy()
- op_type = new_op_info.pop('type')
- preprocess_ops.append(eval(op_type)(**new_op_info))
- return preprocess_ops
- def get_test_images(image_dir):
- images = set()
- infer_dir = os.path.abspath(image_dir)
- exts = ['jpg', 'jpeg', 'png', 'bmp']
- exts += [ext.upper() for ext in exts]
- for ext in exts:
- images.update(glob.glob('{}/*.{}'.format(infer_dir, ext)))
- images = list(images)
- return images
- def create_inputs(image_files, preprocess_ops):
- inputs = dict()
- im_list, im_info_list = [], []
- for im_path in image_files:
- im, im_info = preprocess(im_path, preprocess_ops)
- im_list.append(im)
- im_info_list.append(im_info)
- inputs['im_shape'] = np.stack(
- [e['im_shape'] for e in im_info_list], axis=0).astype('float32')
- inputs['scale_factor'] = np.stack(
- [e['scale_factor'] for e in im_info_list], axis=0).astype('float32')
- inputs['image'] = np.stack(im_list, axis=0).astype('float32')
- return inputs
- def measure_speed(FLAGS):
- predictor, yml_conf = init_predictor(FLAGS)
- input_names = predictor.get_input_names()
- preprocess_ops = create_preprocess_ops(yml_conf)
- image_files = get_test_images(FLAGS.image_dir)
- batch_size = FLAGS.batch_size
- warmup_iter, log_iter, total_iter = FLAGS.warmup_iter, FLAGS.log_iter, FLAGS.total_iter
- total_time = 0
- fps = 0
- for i in range(0, total_iter, batch_size):
- # make data ready
- inputs = create_inputs(image_files[i:i + batch_size], preprocess_ops)
- for name in input_names:
- input_tensor = predictor.get_input_handle(name)
- input_tensor.copy_from_cpu(inputs[name])
- paddle.device.cuda.synchronize()
- # start running
- start_time = time.perf_counter()
- predictor.run()
- paddle.device.cuda.synchronize()
- if i >= warmup_iter:
- total_time += time.perf_counter() - start_time
- if (i + 1) % log_iter == 0:
- fps = (i + 1 - warmup_iter) / total_time
- print(
- f'Done image [{i + 1:<3}/ {total_iter}], '
- f'fps: {fps:.1f} img / s, '
- f'times per image: {1000 / fps:.1f} ms / img',
- flush=True)
- if (i + 1) == total_iter:
- fps = (i + 1 - warmup_iter) / total_time
- print(
- f'Overall fps: {fps:.1f} img / s, '
- f'times per image: {1000 / fps:.1f} ms / img',
- flush=True)
- break
- if __name__ == '__main__':
- FLAGS = parse_args()
- if 'trt' in FLAGS.run_mode:
- check_version('develop')
- check_trt_version('8.2')
- else:
- check_version('2.4')
- measure_speed(FLAGS)
|