cuda-python.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161
  1. import sys
  2. import requests
  3. import cv2
  4. import random
  5. import time
  6. import numpy as np
  7. import tensorrt as trt
  8. from cuda import cudart
  9. from pathlib import Path
  10. from collections import OrderedDict, namedtuple
  11. def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleup=True, stride=32):
  12. # Resize and pad image while meeting stride-multiple constraints
  13. shape = im.shape[:2] # current shape [height, width]
  14. if isinstance(new_shape, int):
  15. new_shape = (new_shape, new_shape)
  16. # Scale ratio (new / old)
  17. r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
  18. if not scaleup: # only scale down, do not scale up (for better val mAP)
  19. r = min(r, 1.0)
  20. # Compute padding
  21. new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
  22. dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
  23. if auto: # minimum rectangle
  24. dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
  25. dw /= 2 # divide padding into 2 sides
  26. dh /= 2
  27. if shape[::-1] != new_unpad: # resize
  28. im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
  29. top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
  30. left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
  31. im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
  32. return im, r, (dw, dh)
  33. w = Path(sys.argv[1])
  34. assert w.exists() and w.suffix in ('.engine', '.plan'), 'Wrong engine path'
  35. names = ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
  36. 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
  37. 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
  38. 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
  39. 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
  40. 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
  41. 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
  42. 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
  43. 'hair drier', 'toothbrush']
  44. colors = {name: [random.randint(0, 255) for _ in range(3)] for i, name in enumerate(names)}
  45. url = 'https://oneflow-static.oss-cn-beijing.aliyuncs.com/tripleMu/image1.jpg'
  46. file = requests.get(url)
  47. img = cv2.imdecode(np.frombuffer(file.content, np.uint8), 1)
  48. _, stream = cudart.cudaStreamCreate()
  49. mean = np.array([0.485, 0.456, 0.406], dtype=np.float32).reshape(1, 3, 1, 1)
  50. std = np.array([0.229, 0.224, 0.225], dtype=np.float32).reshape(1, 3, 1, 1)
  51. # Infer TensorRT Engine
  52. Binding = namedtuple('Binding', ('name', 'dtype', 'shape', 'data', 'ptr'))
  53. logger = trt.Logger(trt.Logger.ERROR)
  54. trt.init_libnvinfer_plugins(logger, namespace="")
  55. with open(w, 'rb') as f, trt.Runtime(logger) as runtime:
  56. model = runtime.deserialize_cuda_engine(f.read())
  57. bindings = OrderedDict()
  58. fp16 = False # default updated below
  59. for index in range(model.num_bindings):
  60. name = model.get_binding_name(index)
  61. dtype = trt.nptype(model.get_binding_dtype(index))
  62. shape = tuple(model.get_binding_shape(index))
  63. data = np.empty(shape, dtype=np.dtype(dtype))
  64. _, data_ptr = cudart.cudaMallocAsync(data.nbytes, stream)
  65. bindings[name] = Binding(name, dtype, shape, data, data_ptr)
  66. if model.binding_is_input(index) and dtype == np.float16:
  67. fp16 = True
  68. binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())
  69. context = model.create_execution_context()
  70. image = img.copy()
  71. image, ratio, dwdh = letterbox(image, auto=False)
  72. image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
  73. image_copy = image.copy()
  74. image = image.transpose((2, 0, 1))
  75. image = np.expand_dims(image, 0)
  76. image = np.ascontiguousarray(image)
  77. im = image.astype(np.float32)
  78. im /= 255
  79. im -= mean
  80. im /= std
  81. _, image_ptr = cudart.cudaMallocAsync(im.nbytes, stream)
  82. cudart.cudaMemcpyAsync(image_ptr, im.ctypes.data, im.nbytes,
  83. cudart.cudaMemcpyKind.cudaMemcpyHostToDevice, stream)
  84. # warmup for 10 times
  85. for _ in range(10):
  86. tmp = np.random.randn(1, 3, 640, 640).astype(np.float32)
  87. _, tmp_ptr = cudart.cudaMallocAsync(tmp.nbytes, stream)
  88. binding_addrs['image'] = tmp_ptr
  89. context.execute_v2(list(binding_addrs.values()))
  90. start = time.perf_counter()
  91. binding_addrs['image'] = image_ptr
  92. context.execute_v2(list(binding_addrs.values()))
  93. print(f'Cost {(time.perf_counter() - start) * 1000}ms')
  94. nums = bindings['num_dets'].data
  95. boxes = bindings['det_boxes'].data
  96. scores = bindings['det_scores'].data
  97. classes = bindings['det_classes'].data
  98. cudart.cudaMemcpyAsync(nums.ctypes.data,
  99. bindings['num_dets'].ptr,
  100. nums.nbytes,
  101. cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost,
  102. stream)
  103. cudart.cudaMemcpyAsync(boxes.ctypes.data,
  104. bindings['det_boxes'].ptr,
  105. boxes.nbytes,
  106. cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost,
  107. stream)
  108. cudart.cudaMemcpyAsync(scores.ctypes.data,
  109. bindings['det_scores'].ptr,
  110. scores.nbytes,
  111. cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost,
  112. stream)
  113. cudart.cudaMemcpyAsync(classes.ctypes.data,
  114. bindings['det_classes'].ptr,
  115. classes.data.nbytes,
  116. cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost,
  117. stream)
  118. cudart.cudaStreamSynchronize(stream)
  119. cudart.cudaStreamDestroy(stream)
  120. for i in binding_addrs.values():
  121. cudart.cudaFree(i)
  122. num = int(nums[0][0])
  123. box_img = boxes[0, :num].round().astype(np.int32)
  124. score_img = scores[0, :num]
  125. clss_img = classes[0, :num]
  126. for i, (box, score, clss) in enumerate(zip(box_img, score_img, clss_img)):
  127. name = names[int(clss)]
  128. color = colors[name]
  129. cv2.rectangle(image_copy, box[:2].tolist(), box[2:].tolist(), color, 2)
  130. cv2.putText(image_copy, name, (int(box[0]), int(box[1]) - 2), cv2.FONT_HERSHEY_SIMPLEX,
  131. 0.75, [225, 255, 255], thickness=2)
  132. cv2.imshow('Result', cv2.cvtColor(image_copy, cv2.COLOR_RGB2BGR))
  133. cv2.waitKey(0)