labelme_to_coco.py 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160
  1. import argparse
  2. import os
  3. import json
  4. import numpy as np
  5. import glob
  6. import cv2
  7. from sklearn.model_selection import train_test_split
  8. from labelme import utils
  9. np.random.seed(41)
  10. # 0为背景
  11. classname_to_id = {
  12. "front": 1, # 从1开始标注
  13. "back": 2,
  14. }
  15. class Lableme2CoCo:
  16. def __init__(self):
  17. self.images = []
  18. self.annotations = []
  19. self.categories = []
  20. self.img_id = 0
  21. self.ann_id = 0
  22. def save_coco_json(self, instance, save_path):
  23. json.dump(instance, open(save_path, 'w', encoding='utf-8'), ensure_ascii=False, indent=1) # indent=2 更加美观显示
  24. # 由json文件构建COCO
  25. def to_coco(self, json_path_list):
  26. self._init_categories()
  27. for json_path in json_path_list:
  28. print(json_path)
  29. obj = self.read_jsonfile(json_path)
  30. self.images.append(self._image(obj, json_path))
  31. shapes = obj['shapes']
  32. for shape in shapes:
  33. annotation = self._annotation(shape)
  34. self.annotations.append(annotation)
  35. self.ann_id += 1
  36. self.img_id += 1
  37. instance = {}
  38. instance['info'] = 'spytensor created'
  39. instance['license'] = ['license']
  40. instance['images'] = self.images
  41. instance['annotations'] = self.annotations
  42. instance['categories'] = self.categories
  43. return instance
  44. # 构建类别
  45. def _init_categories(self):
  46. for k, v in classname_to_id.items():
  47. category = {}
  48. category['id'] = v
  49. category['name'] = k
  50. self.categories.append(category)
  51. # 构建COCO的image字段
  52. def _image(self, obj, path):
  53. image = {}
  54. img_x = utils.img_b64_to_arr(obj['imageData'])
  55. h, w = img_x.shape[:-1]
  56. image['height'] = h
  57. image['width'] = w
  58. image['id'] = self.img_id
  59. image['file_name'] = os.path.basename(path).replace(".files", ".jpg")
  60. return image
  61. # 构建COCO的annotation字段
  62. def _annotation(self, shape):
  63. # print('shape', shape)
  64. label = shape['label']
  65. points = shape['points']
  66. annotation = {}
  67. annotation['id'] = self.ann_id
  68. annotation['image_id'] = self.img_id
  69. annotation['category_id'] = int(classname_to_id[label])
  70. annotation['segmentation'] = [np.asarray(points).flatten().tolist()]
  71. annotation['bbox'] = self._get_box(points)
  72. annotation['iscrowd'] = 0
  73. annotation['area'] = 1.0
  74. return annotation
  75. # 读取json文件,返回一个json对象
  76. def read_jsonfile(self, path):
  77. with open(path, "r", encoding='utf-8') as f:
  78. return json.loads(f.read())
  79. # COCO的格式: [x1,y1,w,h] 对应COCO的bbox格式
  80. def _get_box(self, points):
  81. min_x = min_y = np.inf
  82. max_x = max_y = 0
  83. for x, y in points:
  84. min_x = min(min_x, x)
  85. min_y = min(min_y, y)
  86. max_x = max(max_x, x)
  87. max_y = max(max_y, y)
  88. return [min_x, min_y, max_x - min_x, max_y - min_y]
  89. if __name__ == '__main__':
  90. parser = argparse.ArgumentParser()
  91. parser.add_argument('--labelme_dir', type=str, default='./images')
  92. parser.add_argument('--save_coco_dir', type=str, default='./coco_dataset')
  93. args = parser.parse_args()
  94. labelme_path = args.labelme_dir
  95. saved_coco_path = args.save_coco_dir
  96. print('reading...')
  97. # 创建文件
  98. if not os.path.exists("%scoco/train/" % saved_coco_path):
  99. os.makedirs("%scoco/train/" % saved_coco_path)
  100. if not os.path.exists("%scoco/train/images/" % saved_coco_path):
  101. os.makedirs("%scoco/train/images/" % saved_coco_path)
  102. if not os.path.exists("%scoco/eval/" % saved_coco_path):
  103. os.makedirs("%scoco/eval/" % saved_coco_path)
  104. if not os.path.exists("%scoco/eval/images/" % saved_coco_path):
  105. os.makedirs("%scoco/eval/images/" % saved_coco_path)
  106. # 获取images目录下所有的joson文件列表
  107. print(labelme_path + "/*.files")
  108. json_list_path = glob.glob(labelme_path + "/*.files")
  109. print('json_list_path: ', len(json_list_path))
  110. # 数据划分,这里没有区分val2017和tran2017目录,所有图片都放在images目录下
  111. train_path, val_path = train_test_split(json_list_path, test_size=0.1, train_size=0.9)
  112. print("train_n:", len(train_path), 'val_n:', len(val_path))
  113. # 把训练集转化为COCO的json格式
  114. l2c_train = Lableme2CoCo()
  115. train_instance = l2c_train.to_coco(train_path)
  116. l2c_train.save_coco_json(train_instance, '%scoco/train/annotations.files' % saved_coco_path)
  117. for file in train_path:
  118. # shutil.copy(file.replace("files", "jpg"), "%scoco/images/train2017/" % saved_coco_path)
  119. img_name = file.replace('files', 'jpg')
  120. temp_img = cv2.imread(img_name)
  121. try:
  122. img_name = str(img_name).split('\\')[-1]
  123. cv2.imwrite("{}coco/train/images/{}".format(saved_coco_path, img_name.replace('jpg', 'jpg')),temp_img)
  124. except Exception as e:
  125. print(e)
  126. print('Wrong Image:', img_name)
  127. continue
  128. print(img_name + '-->', img_name.replace('jpg', 'jpg'))
  129. for file in val_path:
  130. # shutil.copy(file.replace("files", "jpg"), "%scoco/images/val2017/" % saved_coco_path)
  131. img_name = file.replace('files', 'jpg')
  132. temp_img = cv2.imread(img_name)
  133. try:
  134. img_name = str(img_name).split('\\')[-1]
  135. cv2.imwrite("{}coco/eval/images/{}".format(saved_coco_path, img_name.replace('jpg', 'jpg')), temp_img)
  136. except Exception as e:
  137. print(e)
  138. print('Wrong Image:', img_name)
  139. continue
  140. print(img_name + '-->', img_name.replace('jpg', 'jpg'))
  141. # 把验证集转化为COCO的json格式
  142. l2c_val = Lableme2CoCo()
  143. val_instance = l2c_val.to_coco(val_path)
  144. l2c_val.save_coco_json(val_instance, '%scoco/eval/annotations.files' % saved_coco_path)