vehicle_plateutils.py 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505
  1. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import argparse
  15. import os
  16. import sys
  17. import platform
  18. import cv2
  19. import numpy as np
  20. import paddle
  21. from PIL import Image, ImageDraw, ImageFont
  22. import math
  23. from paddle import inference
  24. import time
  25. import ast
  26. def create_predictor(args, cfg, mode):
  27. if mode == "det":
  28. model_dir = cfg['det_model_dir']
  29. else:
  30. model_dir = cfg['rec_model_dir']
  31. if model_dir is None:
  32. print("not find {} model file path {}".format(mode, model_dir))
  33. sys.exit(0)
  34. model_file_path = model_dir + "/inference.pdmodel"
  35. params_file_path = model_dir + "/inference.pdiparams"
  36. if not os.path.exists(model_file_path):
  37. raise ValueError("not find model file path {}".format(model_file_path))
  38. if not os.path.exists(params_file_path):
  39. raise ValueError("not find params file path {}".format(
  40. params_file_path))
  41. config = inference.Config(model_file_path, params_file_path)
  42. batch_size = 1
  43. if args.device == "GPU":
  44. gpu_id = get_infer_gpuid()
  45. if gpu_id is None:
  46. print(
  47. "GPU is not found in current device by nvidia-smi. Please check your device or ignore it if run on jetson."
  48. )
  49. config.enable_use_gpu(500, 0)
  50. precision_map = {
  51. 'trt_int8': inference.PrecisionType.Int8,
  52. 'trt_fp32': inference.PrecisionType.Float32,
  53. 'trt_fp16': inference.PrecisionType.Half
  54. }
  55. min_subgraph_size = 15
  56. if args.run_mode in precision_map.keys():
  57. config.enable_tensorrt_engine(
  58. workspace_size=(1 << 25) * batch_size,
  59. max_batch_size=batch_size,
  60. min_subgraph_size=min_subgraph_size,
  61. precision_mode=precision_map[args.run_mode])
  62. use_dynamic_shape = True
  63. if mode == "det":
  64. min_input_shape = {
  65. "x": [1, 3, 50, 50],
  66. "conv2d_92.tmp_0": [1, 120, 20, 20],
  67. "conv2d_91.tmp_0": [1, 24, 10, 10],
  68. "conv2d_59.tmp_0": [1, 96, 20, 20],
  69. "nearest_interp_v2_1.tmp_0": [1, 256, 10, 10],
  70. "nearest_interp_v2_2.tmp_0": [1, 256, 20, 20],
  71. "conv2d_124.tmp_0": [1, 256, 20, 20],
  72. "nearest_interp_v2_3.tmp_0": [1, 64, 20, 20],
  73. "nearest_interp_v2_4.tmp_0": [1, 64, 20, 20],
  74. "nearest_interp_v2_5.tmp_0": [1, 64, 20, 20],
  75. "elementwise_add_7": [1, 56, 2, 2],
  76. "nearest_interp_v2_0.tmp_0": [1, 256, 2, 2]
  77. }
  78. max_input_shape = {
  79. "x": [1, 3, 1536, 1536],
  80. "conv2d_92.tmp_0": [1, 120, 400, 400],
  81. "conv2d_91.tmp_0": [1, 24, 200, 200],
  82. "conv2d_59.tmp_0": [1, 96, 400, 400],
  83. "nearest_interp_v2_1.tmp_0": [1, 256, 200, 200],
  84. "conv2d_124.tmp_0": [1, 256, 400, 400],
  85. "nearest_interp_v2_2.tmp_0": [1, 256, 400, 400],
  86. "nearest_interp_v2_3.tmp_0": [1, 64, 400, 400],
  87. "nearest_interp_v2_4.tmp_0": [1, 64, 400, 400],
  88. "nearest_interp_v2_5.tmp_0": [1, 64, 400, 400],
  89. "elementwise_add_7": [1, 56, 400, 400],
  90. "nearest_interp_v2_0.tmp_0": [1, 256, 400, 400]
  91. }
  92. opt_input_shape = {
  93. "x": [1, 3, 640, 640],
  94. "conv2d_92.tmp_0": [1, 120, 160, 160],
  95. "conv2d_91.tmp_0": [1, 24, 80, 80],
  96. "conv2d_59.tmp_0": [1, 96, 160, 160],
  97. "nearest_interp_v2_1.tmp_0": [1, 256, 80, 80],
  98. "nearest_interp_v2_2.tmp_0": [1, 256, 160, 160],
  99. "conv2d_124.tmp_0": [1, 256, 160, 160],
  100. "nearest_interp_v2_3.tmp_0": [1, 64, 160, 160],
  101. "nearest_interp_v2_4.tmp_0": [1, 64, 160, 160],
  102. "nearest_interp_v2_5.tmp_0": [1, 64, 160, 160],
  103. "elementwise_add_7": [1, 56, 40, 40],
  104. "nearest_interp_v2_0.tmp_0": [1, 256, 40, 40]
  105. }
  106. min_pact_shape = {
  107. "nearest_interp_v2_26.tmp_0": [1, 256, 20, 20],
  108. "nearest_interp_v2_27.tmp_0": [1, 64, 20, 20],
  109. "nearest_interp_v2_28.tmp_0": [1, 64, 20, 20],
  110. "nearest_interp_v2_29.tmp_0": [1, 64, 20, 20]
  111. }
  112. max_pact_shape = {
  113. "nearest_interp_v2_26.tmp_0": [1, 256, 400, 400],
  114. "nearest_interp_v2_27.tmp_0": [1, 64, 400, 400],
  115. "nearest_interp_v2_28.tmp_0": [1, 64, 400, 400],
  116. "nearest_interp_v2_29.tmp_0": [1, 64, 400, 400]
  117. }
  118. opt_pact_shape = {
  119. "nearest_interp_v2_26.tmp_0": [1, 256, 160, 160],
  120. "nearest_interp_v2_27.tmp_0": [1, 64, 160, 160],
  121. "nearest_interp_v2_28.tmp_0": [1, 64, 160, 160],
  122. "nearest_interp_v2_29.tmp_0": [1, 64, 160, 160]
  123. }
  124. min_input_shape.update(min_pact_shape)
  125. max_input_shape.update(max_pact_shape)
  126. opt_input_shape.update(opt_pact_shape)
  127. elif mode == "rec":
  128. imgH = int(cfg['rec_image_shape'][-2])
  129. min_input_shape = {"x": [1, 3, imgH, 10]}
  130. max_input_shape = {"x": [batch_size, 3, imgH, 2304]}
  131. opt_input_shape = {"x": [batch_size, 3, imgH, 320]}
  132. config.exp_disable_tensorrt_ops(["transpose2"])
  133. elif mode == "cls":
  134. min_input_shape = {"x": [1, 3, 48, 10]}
  135. max_input_shape = {"x": [batch_size, 3, 48, 1024]}
  136. opt_input_shape = {"x": [batch_size, 3, 48, 320]}
  137. else:
  138. use_dynamic_shape = False
  139. if use_dynamic_shape:
  140. config.set_trt_dynamic_shape_info(
  141. min_input_shape, max_input_shape, opt_input_shape)
  142. else:
  143. config.disable_gpu()
  144. if hasattr(args, "cpu_threads"):
  145. config.set_cpu_math_library_num_threads(args.cpu_threads)
  146. else:
  147. # default cpu threads as 10
  148. config.set_cpu_math_library_num_threads(10)
  149. if args.enable_mkldnn:
  150. # cache 10 different shapes for mkldnn to avoid memory leak
  151. config.set_mkldnn_cache_capacity(10)
  152. config.enable_mkldnn()
  153. if args.run_mode == "fp16":
  154. config.enable_mkldnn_bfloat16()
  155. # enable memory optim
  156. config.enable_memory_optim()
  157. config.disable_glog_info()
  158. config.delete_pass("conv_transpose_eltwiseadd_bn_fuse_pass")
  159. config.delete_pass("matmul_transpose_reshape_fuse_pass")
  160. if mode == 'table':
  161. config.delete_pass("fc_fuse_pass") # not supported for table
  162. config.switch_use_feed_fetch_ops(False)
  163. config.switch_ir_optim(True)
  164. # create predictor
  165. predictor = inference.create_predictor(config)
  166. input_names = predictor.get_input_names()
  167. for name in input_names:
  168. input_tensor = predictor.get_input_handle(name)
  169. output_tensors = get_output_tensors(cfg, mode, predictor)
  170. return predictor, input_tensor, output_tensors, config
  171. def get_output_tensors(cfg, mode, predictor):
  172. output_names = predictor.get_output_names()
  173. output_tensors = []
  174. output_name = 'softmax_0.tmp_0'
  175. if output_name in output_names:
  176. return [predictor.get_output_handle(output_name)]
  177. else:
  178. for output_name in output_names:
  179. output_tensor = predictor.get_output_handle(output_name)
  180. output_tensors.append(output_tensor)
  181. return output_tensors
  182. def get_infer_gpuid():
  183. sysstr = platform.system()
  184. if sysstr == "Windows":
  185. return 0
  186. if not paddle.fluid.core.is_compiled_with_rocm():
  187. cmd = "env | grep CUDA_VISIBLE_DEVICES"
  188. else:
  189. cmd = "env | grep HIP_VISIBLE_DEVICES"
  190. env_cuda = os.popen(cmd).readlines()
  191. if len(env_cuda) == 0:
  192. return 0
  193. else:
  194. gpu_id = env_cuda[0].strip().split("=")[1]
  195. return int(gpu_id[0])
  196. def draw_e2e_res(dt_boxes, strs, img_path):
  197. src_im = cv2.imread(img_path)
  198. for box, str in zip(dt_boxes, strs):
  199. box = box.astype(np.int32).reshape((-1, 1, 2))
  200. cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
  201. cv2.putText(
  202. src_im,
  203. str,
  204. org=(int(box[0, 0, 0]), int(box[0, 0, 1])),
  205. fontFace=cv2.FONT_HERSHEY_COMPLEX,
  206. fontScale=0.7,
  207. color=(0, 255, 0),
  208. thickness=1)
  209. return src_im
  210. def draw_text_det_res(dt_boxes, img_path):
  211. src_im = cv2.imread(img_path)
  212. for box in dt_boxes:
  213. box = np.array(box).astype(np.int32).reshape(-1, 2)
  214. cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
  215. return src_im
  216. def resize_img(img, input_size=600):
  217. """
  218. resize img and limit the longest side of the image to input_size
  219. """
  220. img = np.array(img)
  221. im_shape = img.shape
  222. im_size_max = np.max(im_shape[0:2])
  223. im_scale = float(input_size) / float(im_size_max)
  224. img = cv2.resize(img, None, None, fx=im_scale, fy=im_scale)
  225. return img
  226. def draw_ocr(image,
  227. boxes,
  228. txts=None,
  229. scores=None,
  230. drop_score=0.5,
  231. font_path="./doc/fonts/simfang.ttf"):
  232. """
  233. Visualize the results of OCR detection and recognition
  234. args:
  235. image(Image|array): RGB image
  236. boxes(list): boxes with shape(N, 4, 2)
  237. txts(list): the texts
  238. scores(list): txxs corresponding scores
  239. drop_score(float): only scores greater than drop_threshold will be visualized
  240. font_path: the path of font which is used to draw text
  241. return(array):
  242. the visualized img
  243. """
  244. if scores is None:
  245. scores = [1] * len(boxes)
  246. box_num = len(boxes)
  247. for i in range(box_num):
  248. if scores is not None and (scores[i] < drop_score or
  249. math.isnan(scores[i])):
  250. continue
  251. box = np.reshape(np.array(boxes[i]), [-1, 1, 2]).astype(np.int64)
  252. image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2)
  253. if txts is not None:
  254. img = np.array(resize_img(image, input_size=600))
  255. txt_img = text_visual(
  256. txts,
  257. scores,
  258. img_h=img.shape[0],
  259. img_w=600,
  260. threshold=drop_score,
  261. font_path=font_path)
  262. img = np.concatenate([np.array(img), np.array(txt_img)], axis=1)
  263. return img
  264. return image
  265. def draw_ocr_box_txt(image,
  266. boxes,
  267. txts,
  268. scores=None,
  269. drop_score=0.5,
  270. font_path="./doc/simfang.ttf"):
  271. h, w = image.height, image.width
  272. img_left = image.copy()
  273. img_right = Image.new('RGB', (w, h), (255, 255, 255))
  274. import random
  275. random.seed(0)
  276. draw_left = ImageDraw.Draw(img_left)
  277. draw_right = ImageDraw.Draw(img_right)
  278. for idx, (box, txt) in enumerate(zip(boxes, txts)):
  279. if scores is not None and scores[idx] < drop_score:
  280. continue
  281. color = (random.randint(0, 255), random.randint(0, 255),
  282. random.randint(0, 255))
  283. draw_left.polygon(box, fill=color)
  284. draw_right.polygon(
  285. [
  286. box[0][0], box[0][1], box[1][0], box[1][1], box[2][0],
  287. box[2][1], box[3][0], box[3][1]
  288. ],
  289. outline=color)
  290. box_height = math.sqrt((box[0][0] - box[3][0])**2 + (box[0][1] - box[3][
  291. 1])**2)
  292. box_width = math.sqrt((box[0][0] - box[1][0])**2 + (box[0][1] - box[1][
  293. 1])**2)
  294. if box_height > 2 * box_width:
  295. font_size = max(int(box_width * 0.9), 10)
  296. font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
  297. cur_y = box[0][1]
  298. for c in txt:
  299. char_size = font.getsize(c)
  300. draw_right.text(
  301. (box[0][0] + 3, cur_y), c, fill=(0, 0, 0), font=font)
  302. cur_y += char_size[1]
  303. else:
  304. font_size = max(int(box_height * 0.8), 10)
  305. font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
  306. draw_right.text(
  307. [box[0][0], box[0][1]], txt, fill=(0, 0, 0), font=font)
  308. img_left = Image.blend(image, img_left, 0.5)
  309. img_show = Image.new('RGB', (w * 2, h), (255, 255, 255))
  310. img_show.paste(img_left, (0, 0, w, h))
  311. img_show.paste(img_right, (w, 0, w * 2, h))
  312. return np.array(img_show)
  313. def str_count(s):
  314. """
  315. Count the number of Chinese characters,
  316. a single English character and a single number
  317. equal to half the length of Chinese characters.
  318. args:
  319. s(string): the input of string
  320. return(int):
  321. the number of Chinese characters
  322. """
  323. import string
  324. count_zh = count_pu = 0
  325. s_len = len(s)
  326. en_dg_count = 0
  327. for c in s:
  328. if c in string.ascii_letters or c.isdigit() or c.isspace():
  329. en_dg_count += 1
  330. elif c.isalpha():
  331. count_zh += 1
  332. else:
  333. count_pu += 1
  334. return s_len - math.ceil(en_dg_count / 2)
  335. def text_visual(texts,
  336. scores,
  337. img_h=400,
  338. img_w=600,
  339. threshold=0.,
  340. font_path="./doc/simfang.ttf"):
  341. """
  342. create new blank img and draw txt on it
  343. args:
  344. texts(list): the text will be draw
  345. scores(list|None): corresponding score of each txt
  346. img_h(int): the height of blank img
  347. img_w(int): the width of blank img
  348. font_path: the path of font which is used to draw text
  349. return(array):
  350. """
  351. if scores is not None:
  352. assert len(texts) == len(
  353. scores), "The number of txts and corresponding scores must match"
  354. def create_blank_img():
  355. blank_img = np.ones(shape=[img_h, img_w], dtype=np.int8) * 255
  356. blank_img[:, img_w - 1:] = 0
  357. blank_img = Image.fromarray(blank_img).convert("RGB")
  358. draw_txt = ImageDraw.Draw(blank_img)
  359. return blank_img, draw_txt
  360. blank_img, draw_txt = create_blank_img()
  361. font_size = 20
  362. txt_color = (0, 0, 0)
  363. font = ImageFont.truetype(font_path, font_size, encoding="utf-8")
  364. gap = font_size + 5
  365. txt_img_list = []
  366. count, index = 1, 0
  367. for idx, txt in enumerate(texts):
  368. index += 1
  369. if scores[idx] < threshold or math.isnan(scores[idx]):
  370. index -= 1
  371. continue
  372. first_line = True
  373. while str_count(txt) >= img_w // font_size - 4:
  374. tmp = txt
  375. txt = tmp[:img_w // font_size - 4]
  376. if first_line:
  377. new_txt = str(index) + ': ' + txt
  378. first_line = False
  379. else:
  380. new_txt = ' ' + txt
  381. draw_txt.text((0, gap * count), new_txt, txt_color, font=font)
  382. txt = tmp[img_w // font_size - 4:]
  383. if count >= img_h // gap - 1:
  384. txt_img_list.append(np.array(blank_img))
  385. blank_img, draw_txt = create_blank_img()
  386. count = 0
  387. count += 1
  388. if first_line:
  389. new_txt = str(index) + ': ' + txt + ' ' + '%.3f' % (scores[idx])
  390. else:
  391. new_txt = " " + txt + " " + '%.3f' % (scores[idx])
  392. draw_txt.text((0, gap * count), new_txt, txt_color, font=font)
  393. # whether add new blank img or not
  394. if count >= img_h // gap - 1 and idx + 1 < len(texts):
  395. txt_img_list.append(np.array(blank_img))
  396. blank_img, draw_txt = create_blank_img()
  397. count = 0
  398. count += 1
  399. txt_img_list.append(np.array(blank_img))
  400. if len(txt_img_list) == 1:
  401. blank_img = np.array(txt_img_list[0])
  402. else:
  403. blank_img = np.concatenate(txt_img_list, axis=1)
  404. return np.array(blank_img)
  405. def base64_to_cv2(b64str):
  406. import base64
  407. data = base64.b64decode(b64str.encode('utf8'))
  408. data = np.fromstring(data, np.uint8)
  409. data = cv2.imdecode(data, cv2.IMREAD_COLOR)
  410. return data
  411. def draw_boxes(image, boxes, scores=None, drop_score=0.5):
  412. if scores is None:
  413. scores = [1] * len(boxes)
  414. for (box, score) in zip(boxes, scores):
  415. if score < drop_score:
  416. continue
  417. box = np.reshape(np.array(box), [-1, 1, 2]).astype(np.int64)
  418. image = cv2.polylines(np.array(image), [box], True, (255, 0, 0), 2)
  419. return image
  420. def get_rotate_crop_image(img, points):
  421. '''
  422. img_height, img_width = img.shape[0:2]
  423. left = int(np.min(points[:, 0]))
  424. right = int(np.max(points[:, 0]))
  425. top = int(np.min(points[:, 1]))
  426. bottom = int(np.max(points[:, 1]))
  427. img_crop = img[top:bottom, left:right, :].copy()
  428. points[:, 0] = points[:, 0] - left
  429. points[:, 1] = points[:, 1] - top
  430. '''
  431. assert len(points) == 4, "shape of points must be 4*2"
  432. img_crop_width = int(
  433. max(
  434. np.linalg.norm(points[0] - points[1]),
  435. np.linalg.norm(points[2] - points[3])))
  436. img_crop_height = int(
  437. max(
  438. np.linalg.norm(points[0] - points[3]),
  439. np.linalg.norm(points[1] - points[2])))
  440. pts_std = np.float32([[0, 0], [img_crop_width, 0],
  441. [img_crop_width, img_crop_height],
  442. [0, img_crop_height]])
  443. M = cv2.getPerspectiveTransform(points, pts_std)
  444. dst_img = cv2.warpPerspective(
  445. img,
  446. M, (img_crop_width, img_crop_height),
  447. borderMode=cv2.BORDER_REPLICATE,
  448. flags=cv2.INTER_CUBIC)
  449. dst_img_height, dst_img_width = dst_img.shape[0:2]
  450. if dst_img_height * 1.0 / dst_img_width >= 1.5:
  451. dst_img = np.rot90(dst_img)
  452. return dst_img
  453. def check_gpu(use_gpu):
  454. if use_gpu and not paddle.is_compiled_with_cuda():
  455. use_gpu = False
  456. return use_gpu
  457. if __name__ == '__main__':
  458. pass