merge_and_divide.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171
  1. import os
  2. import json
  3. import argparse
  4. import numpy as np
  5. import glob
  6. import cv2
  7. from sklearn.model_selection import train_test_split
  8. from labelme import utils
  9. from tqdm import tqdm
  10. np.random.seed(41)
  11. # 0为背景
  12. classname_to_id = {
  13. "front": 1, # 从1开始标注
  14. "back": 2,
  15. }
  16. class Lableme2CoCo:
  17. def __init__(self):
  18. self.images = []
  19. self.annotations = []
  20. self.categories = []
  21. self.img_id = 0
  22. self.ann_id = 0
  23. def save_coco_json(self, instance, save_path):
  24. json.dump(instance, open(save_path, 'w', encoding='utf-8'), ensure_ascii=False, indent=1) # indent=2 更加美观显示
  25. # 由json文件构建COCO
  26. def to_coco(self, json_path_list):
  27. self._init_categories()
  28. for json_path in json_path_list:
  29. obj = self.read_jsonfile(json_path)
  30. self.images.append(self._image(obj, json_path))
  31. shapes = obj['shapes']
  32. for shape in shapes:
  33. annotation = self._annotation(shape)
  34. self.annotations.append(annotation)
  35. self.ann_id += 1
  36. self.img_id += 1
  37. instance = {}
  38. instance['info'] = 'spytensor created'
  39. instance['license'] = ['license']
  40. instance['images'] = self.images
  41. instance['annotations'] = self.annotations
  42. instance['categories'] = self.categories
  43. return instance
  44. # 构建类别
  45. def _init_categories(self):
  46. for k, v in classname_to_id.items():
  47. category = {}
  48. category['id'] = v
  49. category['name'] = k
  50. self.categories.append(category)
  51. # 构建COCO的image字段
  52. def _image(self, obj, path):
  53. image = {}
  54. # img_x = utils.img_b64_to_arr(obj['imageData'])
  55. # h, w = img_x.shape[:-1]
  56. image['height'] = obj['imageHeight']
  57. image['width'] = obj['imageWidth']
  58. # image['height'] = h
  59. # image['width'] = w
  60. image['id'] = self.img_id
  61. image['file_name'] = os.path.basename(path).replace(".json", ".png")
  62. return image
  63. # 构建COCO的annotation字段
  64. def _annotation(self, shape):
  65. # print('shape', shape)
  66. label = shape['label']
  67. points = shape['points']
  68. annotation = {}
  69. annotation['id'] = self.ann_id
  70. annotation['image_id'] = self.img_id
  71. annotation['category_id'] = int(classname_to_id[label])
  72. annotation['segmentation'] = [np.asarray(points).flatten().tolist()]
  73. annotation['bbox'] = self._get_box(points)
  74. annotation['iscrowd'] = 0
  75. annotation['area'] = 1.0
  76. return annotation
  77. # 读取json文件,返回一个json对象
  78. def read_jsonfile(self, path):
  79. with open(path, "r", encoding='utf-8') as f:
  80. return json.load(f)
  81. # COCO的格式: [x1,y1,w,h] 对应COCO的bbox格式
  82. def _get_box(self, points):
  83. min_x = min_y = np.inf
  84. max_x = max_y = 0
  85. for x, y in points:
  86. min_x = min(min_x, x)
  87. min_y = min(min_y, y)
  88. max_x = max(max_x, x)
  89. max_y = max(max_y, y)
  90. return [min_x, min_y, max_x - min_x, max_y - min_y]
  91. if __name__ == '__main__':
  92. parser = argparse.ArgumentParser()
  93. parser.add_argument('--anno_dirs', type=str, nargs="+", default=['./'])
  94. parser.add_argument('--train_ratio', type=float, default=0.9)
  95. args = parser.parse_args()
  96. labelme_folds = args.anno_dirs
  97. json_list_path = []
  98. train_path = []
  99. val_path = []
  100. # 每个文件夹按照比例划分,遍历完,最后合并
  101. for labelme_path in labelme_folds:
  102. list_path = glob.glob(labelme_path + "/*.json")
  103. json_list_path.extend(list_path)
  104. train_path1, val_path1 = train_test_split(list_path, test_size=1-args.train_ratio, train_size=args.train_ratio)
  105. train_path.extend(train_path1)
  106. val_path.extend(val_path1)
  107. saved_coco_path = "./"
  108. print('reading...')
  109. # 创建文件
  110. if not os.path.exists("%scoco/train/" % saved_coco_path):
  111. os.makedirs("%scoco/train/" % saved_coco_path)
  112. if not os.path.exists("%scoco/train/images/" % saved_coco_path):
  113. os.makedirs("%scoco/train/images/" % saved_coco_path)
  114. if not os.path.exists("%scoco/eval/" % saved_coco_path):
  115. os.makedirs("%scoco/eval/" % saved_coco_path)
  116. if not os.path.exists("%scoco/eval/images/" % saved_coco_path):
  117. os.makedirs("%scoco/eval/images/" % saved_coco_path)
  118. print('total images: ', len(json_list_path))
  119. # 数据划分,这里没有区分val2017和tran2017目录,所有图片都放在images目录下
  120. print("train_n:", len(train_path), 'val_n:', len(val_path))
  121. # 把训练集转化为COCO的json格式
  122. l2c_train = Lableme2CoCo()
  123. train_instance = l2c_train.to_coco(train_path)
  124. l2c_train.save_coco_json(train_instance, '%scoco/train/annotations.json' % saved_coco_path)
  125. print("train images: %d" % len(train_path))
  126. for file in tqdm(train_path):
  127. img_name = file.replace('json', 'png')
  128. temp_img = cv2.imread(img_name)
  129. try:
  130. img_name = str(img_name).split('\\')[-1]
  131. cv2.imwrite("{}coco/train/images/{}".format(saved_coco_path, img_name.replace('jpg', 'jpg')), temp_img)
  132. except Exception as e:
  133. print(e)
  134. print('Wrong Image:', img_name)
  135. continue
  136. # print(img_name + '-->', img_name.replace('jpg', 'jpg'))
  137. print("eval images: %d" % len(val_path))
  138. for file in tqdm(val_path):
  139. img_name = file.replace('json', 'png')
  140. temp_img = cv2.imread(img_name)
  141. try:
  142. img_name = str(img_name).split('\\')[-1]
  143. cv2.imwrite("{}coco/eval/images/{}".format(saved_coco_path, img_name.replace('jpg', 'jpg')), temp_img)
  144. except Exception as e:
  145. print(e)
  146. print('Wrong Image:', img_name)
  147. continue
  148. # 把验证集转化为COCO的json格式
  149. l2c_val = Lableme2CoCo()
  150. val_instance = l2c_val.to_coco(val_path)
  151. l2c_val.save_coco_json(val_instance, '%scoco/eval/annotations.json' % saved_coco_path)
  152. with open('./coco/about.txt', 'w') as f:
  153. f.write(str(args.anno_dirs))
  154. f.close()