import json import os import random import shutil import argparse from tqdm import tqdm def clean_label_files(labels_dir, output_labels_dir, log_file): if not os.path.exists(output_labels_dir): os.makedirs(output_labels_dir) processed_files = [] label_files = os.listdir(labels_dir) with tqdm(total=len(label_files), desc="Processing files", unit="file") as pbar: for label_file in label_files: # 遍历每一个txt文件名 file_path = os.path.join(labels_dir, label_file) # 拼接得到旧的txt文件地址 output_file_path = os.path.join(output_labels_dir, label_file) # 新的txt文件地址 _, file_extension = os.path.splitext(file_path) if not file_extension.lower() == ".txt": continue try: with open(file_path, 'r') as f, open(log_file, 'a+') as log: # 打开旧的txt文件 lines = f.readlines() # 得到文件内容 new_lines = [] file_needs_logging = False for line_number, line in enumerate(lines, start=1): parts = line.strip().split() if parts: first_num = int(parts[0]) other_nums = [float(num) for num in parts[1:]] if not all(0 <= num <= 1 for num in other_nums): # label 数据有误 log.write(f"{label_file}:\n") log.write(f"\terror line: {line_number}\n") elif first_num in [4, 20]: # 4 和 20 是暂时不需要的数据 log.write(f"{label_file}:\n") log.write(f"\tdelete line: {line_number}\n") elif first_num in [5, 9, 13, 21, 25]: # 修改为标题 log.write(f"{label_file}:\n") log.write(f"\tmodify line {line_number}: {first_num} -> 26\n") new_content = '26 ' new_content += ' '.join(map(str, other_nums)) new_content += '\n' new_lines.append(new_content) else: new_lines.append(line) # if first_num not in [4, 20] and all( # 0 <= num <= 1 for num in other_nums): # 只有当一行内容的第一个数字不是4或20,其他数字都在[0,1]之间时这一行内容才能被保留 # new_lines.append(line) # elif not all(0 <= num <= 1 for num in other_nums): # 如果不满足上诉条件且有数字不在【0,1】的,要记录文件名 # file_needs_logging = True # ===== with open(output_file_path, 'w') as out: # 将文本内容写入新文件夹中 out.writelines(new_lines) except Exception as e: print(f"发生错误: {e}") pbar.update(1) # 更新进度条 # with open(log_file, 'w') as log: # for file_name in processed_files: # log.write(f"{file_name}\n") if __name__ == '__main__': parser = argparse.ArgumentParser("删除特定标注信息\n") parser.add_argument("dataset_dir", help="图片文件夹路径") parser.add_argument("--log_file", default="processed_files.log", help="记录被处理文件的日志文件路径") args = parser.parse_args() output_labels_dir = os.path.join(args.dataset_dir, 'processed_labels') # 生成的新的labels文件夹 log_file_dir = os.path.join(args.dataset_dir, args.log_file) # 日志文件 clean_label_files(os.path.join(args.dataset_dir, 'labels'), output_labels_dir, log_file_dir)