ccpd2ocr_all.py 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167
  1. import cv2
  2. import os
  3. import json
  4. from tqdm import tqdm
  5. import numpy as np
  6. provinces = [
  7. "皖", "沪", "津", "渝", "冀", "晋", "蒙", "辽", "吉", "黑", "苏", "浙", "京", "闽", "赣",
  8. "鲁", "豫", "鄂", "湘", "粤", "桂", "琼", "川", "贵", "云", "藏", "陕", "甘", "青", "宁",
  9. "新", "警", "学", "O"
  10. ]
  11. alphabets = [
  12. 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q',
  13. 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'O'
  14. ]
  15. ads = [
  16. 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q',
  17. 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '0', '1', '2', '3', '4', '5',
  18. '6', '7', '8', '9', 'O'
  19. ]
  20. def make_label_2020(img_dir, save_gt_folder, phase):
  21. crop_img_save_dir = os.path.join(save_gt_folder, phase, 'crop_imgs')
  22. os.makedirs(crop_img_save_dir, exist_ok=True)
  23. f_det = open(
  24. os.path.join(save_gt_folder, phase, 'det.txt'), 'w', encoding='utf-8')
  25. f_rec = open(
  26. os.path.join(save_gt_folder, phase, 'rec.txt'), 'w', encoding='utf-8')
  27. i = 0
  28. for filename in tqdm(os.listdir(os.path.join(img_dir, phase))):
  29. str_list = filename.split('-')
  30. if len(str_list) < 5:
  31. continue
  32. coord_list = str_list[3].split('_')
  33. txt_list = str_list[4].split('_')
  34. boxes = []
  35. for coord in coord_list:
  36. boxes.append([int(x) for x in coord.split("&")])
  37. boxes = [boxes[2], boxes[3], boxes[0], boxes[1]]
  38. lp_number = provinces[int(txt_list[0])] + alphabets[int(txt_list[
  39. 1])] + ''.join([ads[int(x)] for x in txt_list[2:]])
  40. # det
  41. det_info = [{'points': boxes, 'transcription': lp_number}]
  42. f_det.write('{}\t{}\n'.format(
  43. os.path.join("CCPD2020/ccpd_green", phase, filename),
  44. json.dumps(
  45. det_info, ensure_ascii=False)))
  46. # rec
  47. boxes = np.float32(boxes)
  48. img = cv2.imread(os.path.join(img_dir, phase, filename))
  49. # crop_img = img[int(boxes[:,1].min()):int(boxes[:,1].max()),int(boxes[:,0].min()):int(boxes[:,0].max())]
  50. crop_img = get_rotate_crop_image(img, boxes)
  51. crop_img_save_filename = '{}_{}.jpg'.format(i, '_'.join(txt_list))
  52. crop_img_save_path = os.path.join(crop_img_save_dir,
  53. crop_img_save_filename)
  54. cv2.imwrite(crop_img_save_path, crop_img)
  55. f_rec.write('{}/{}/crop_imgs/{}\t{}\n'.format(
  56. "CCPD2020/PPOCR", phase, crop_img_save_filename, lp_number))
  57. i += 1
  58. f_det.close()
  59. f_rec.close()
  60. def make_label_2019(list_dir, save_gt_folder, phase):
  61. crop_img_save_dir = os.path.join(save_gt_folder, phase, 'crop_imgs')
  62. os.makedirs(crop_img_save_dir, exist_ok=True)
  63. f_det = open(
  64. os.path.join(save_gt_folder, phase, 'det.txt'), 'w', encoding='utf-8')
  65. f_rec = open(
  66. os.path.join(save_gt_folder, phase, 'rec.txt'), 'w', encoding='utf-8')
  67. with open(os.path.join(list_dir, phase + ".txt"), 'r') as rf:
  68. imglist = rf.readlines()
  69. i = 0
  70. for idx, filename in enumerate(imglist):
  71. if idx % 1000 == 0:
  72. print("{}/{}".format(idx, len(imglist)))
  73. filename = filename.strip()
  74. str_list = filename.split('-')
  75. if len(str_list) < 5:
  76. continue
  77. coord_list = str_list[3].split('_')
  78. txt_list = str_list[4].split('_')
  79. boxes = []
  80. for coord in coord_list:
  81. boxes.append([int(x) for x in coord.split("&")])
  82. boxes = [boxes[2], boxes[3], boxes[0], boxes[1]]
  83. lp_number = provinces[int(txt_list[0])] + alphabets[int(txt_list[
  84. 1])] + ''.join([ads[int(x)] for x in txt_list[2:]])
  85. # det
  86. det_info = [{'points': boxes, 'transcription': lp_number}]
  87. f_det.write('{}\t{}\n'.format(
  88. os.path.join("CCPD2019", filename),
  89. json.dumps(
  90. det_info, ensure_ascii=False)))
  91. # rec
  92. boxes = np.float32(boxes)
  93. imgpath = os.path.join(list_dir[:-7], filename)
  94. img = cv2.imread(imgpath)
  95. # crop_img = img[int(boxes[:,1].min()):int(boxes[:,1].max()),int(boxes[:,0].min()):int(boxes[:,0].max())]
  96. crop_img = get_rotate_crop_image(img, boxes)
  97. crop_img_save_filename = '{}_{}.jpg'.format(i, '_'.join(txt_list))
  98. crop_img_save_path = os.path.join(crop_img_save_dir,
  99. crop_img_save_filename)
  100. cv2.imwrite(crop_img_save_path, crop_img)
  101. f_rec.write('{}/{}/crop_imgs/{}\t{}\n'.format(
  102. "CCPD2019/PPOCR", phase, crop_img_save_filename, lp_number))
  103. i += 1
  104. f_det.close()
  105. f_rec.close()
  106. def get_rotate_crop_image(img, points):
  107. '''
  108. img_height, img_width = img.shape[0:2]
  109. left = int(np.min(points[:, 0]))
  110. right = int(np.max(points[:, 0]))
  111. top = int(np.min(points[:, 1]))
  112. bottom = int(np.max(points[:, 1]))
  113. img_crop = img[top:bottom, left:right, :].copy()
  114. points[:, 0] = points[:, 0] - left
  115. points[:, 1] = points[:, 1] - top
  116. '''
  117. assert len(points) == 4, "shape of points must be 4*2"
  118. img_crop_width = int(
  119. max(
  120. np.linalg.norm(points[0] - points[1]),
  121. np.linalg.norm(points[2] - points[3])))
  122. img_crop_height = int(
  123. max(
  124. np.linalg.norm(points[0] - points[3]),
  125. np.linalg.norm(points[1] - points[2])))
  126. pts_std = np.float32([[0, 0], [img_crop_width, 0],
  127. [img_crop_width, img_crop_height],
  128. [0, img_crop_height]])
  129. M = cv2.getPerspectiveTransform(points, pts_std)
  130. dst_img = cv2.warpPerspective(
  131. img,
  132. M, (img_crop_width, img_crop_height),
  133. borderMode=cv2.BORDER_REPLICATE,
  134. flags=cv2.INTER_CUBIC)
  135. dst_img_height, dst_img_width = dst_img.shape[0:2]
  136. if dst_img_height * 1.0 / dst_img_width >= 1.5:
  137. dst_img = np.rot90(dst_img)
  138. return dst_img
  139. img_dir = './CCPD2020/ccpd_green'
  140. save_gt_folder = './CCPD2020/PPOCR'
  141. # phase = 'train' # change to val and test to make val dataset and test dataset
  142. for phase in ['train', 'val', 'test']:
  143. make_label_2020(img_dir, save_gt_folder, phase)
  144. list_dir = './CCPD2019/splits/'
  145. save_gt_folder = './CCPD2019/PPOCR'
  146. for phase in ['train', 'val', 'test']:
  147. make_label_2019(list_dir, save_gt_folder, phase)