Browse Source

[feat] 添加修改标签功能

WangChao 4 months ago
parent
commit
b09e48c1bd
1 changed files with 38 additions and 20 deletions
  1. 38 20
      data_collection/clean_label.py

+ 38 - 20
data_collection/clean_label.py

@@ -23,33 +23,51 @@ def clean_label_files(labels_dir, output_labels_dir, log_file):
                 continue
 
             try:
-                with open(file_path, 'r') as f:  # 打开旧的txt文件
+                with open(file_path, 'r') as f, open(log_file, 'a+') as log:  # 打开旧的txt文件
                     lines = f.readlines()  # 得到文件内容
-                new_lines = []
-                file_needs_logging = False
-                for line in lines:  # 遍历处理文件内容
-                    parts = line.strip().split()
-                    if parts:
-                        first_num = int(parts[0])
-                        other_nums = [float(num) for num in parts[1:]]
-                        if first_num not in [4, 20] and all(
-                                0 <= num <= 1 for num in other_nums):  # 只有当一行内容的第一个数字不是4或20,其他数字都在[0,1]之间时这一行内容才能被保留
-                            new_lines.append(line)
-                        elif not all(0 <= num <= 1 for num in other_nums):  # 如果不满足上诉条件且有数字不在【0,1】的,要记录文件名
-                            file_needs_logging = True
-                if file_needs_logging:
-                    processed_files.append(label_file)
-                with open(output_file_path, 'w') as f:  # 将文本内容写入新文件夹中
-                    f.writelines(new_lines)
+                    new_lines = []
+                    file_needs_logging = False
+
+                    for line_number, line in enumerate(lines, start=1):
+                        parts = line.strip().split()
+                        if parts:
+                            first_num = int(parts[0])
+                            other_nums = [float(num) for num in parts[1:]]
+
+                            if not all(0 <= num <= 1 for num in other_nums):  # label 数据有误
+                                log.write(f"{label_file}:\n")
+                                log.write(f"\terror line: {line_number}\n")
+                            elif first_num in [4, 20]:  # 4 和 20 是暂时不需要的数据
+                                log.write(f"{label_file}:\n")
+                                log.write(f"\tdelete line: {line_number}\n")
+                            elif first_num in [5, 9, 13, 21, 25]:  # 修改为标题
+                                log.write(f"{label_file}:\n")
+                                log.write(f"\tmodify line {line_number}: {first_num} -> 26\n")
+                                new_content = '26 '
+                                new_content += ' '.join(map(str, other_nums))
+                                new_content += '\n'
+                                new_lines.append(new_content)
+                            else:
+                                new_lines.append(line)
+
+                            # if first_num not in [4, 20] and all(
+                            #         0 <= num <= 1 for num in other_nums):  # 只有当一行内容的第一个数字不是4或20,其他数字都在[0,1]之间时这一行内容才能被保留
+                            #     new_lines.append(line)
+                            # elif not all(0 <= num <= 1 for num in other_nums):  # 如果不满足上诉条件且有数字不在【0,1】的,要记录文件名
+                            #     file_needs_logging = True
+                            # =====
+
+                    with open(output_file_path, 'w') as out:  # 将文本内容写入新文件夹中
+                        out.writelines(new_lines)
 
             except Exception as e:
                 print(f"发生错误: {e}")
 
             pbar.update(1)  # 更新进度条
 
-    with open(log_file, 'w') as log:
-        for file_name in processed_files:
-            log.write(f"{file_name}\n")
+    # with open(log_file, 'w') as log:
+    #     for file_name in processed_files:
+    #         log.write(f"{file_name}\n")
 
 
 if __name__ == '__main__':