|
@@ -23,33 +23,51 @@ def clean_label_files(labels_dir, output_labels_dir, log_file):
|
|
|
continue
|
|
|
|
|
|
try:
|
|
|
- with open(file_path, 'r') as f: # 打开旧的txt文件
|
|
|
+ with open(file_path, 'r') as f, open(log_file, 'a+') as log: # 打开旧的txt文件
|
|
|
lines = f.readlines() # 得到文件内容
|
|
|
- new_lines = []
|
|
|
- file_needs_logging = False
|
|
|
- for line in lines: # 遍历处理文件内容
|
|
|
- parts = line.strip().split()
|
|
|
- if parts:
|
|
|
- first_num = int(parts[0])
|
|
|
- other_nums = [float(num) for num in parts[1:]]
|
|
|
- if first_num not in [4, 20] and all(
|
|
|
- 0 <= num <= 1 for num in other_nums): # 只有当一行内容的第一个数字不是4或20,其他数字都在[0,1]之间时这一行内容才能被保留
|
|
|
- new_lines.append(line)
|
|
|
- elif not all(0 <= num <= 1 for num in other_nums): # 如果不满足上诉条件且有数字不在【0,1】的,要记录文件名
|
|
|
- file_needs_logging = True
|
|
|
- if file_needs_logging:
|
|
|
- processed_files.append(label_file)
|
|
|
- with open(output_file_path, 'w') as f: # 将文本内容写入新文件夹中
|
|
|
- f.writelines(new_lines)
|
|
|
+ new_lines = []
|
|
|
+ file_needs_logging = False
|
|
|
+
|
|
|
+ for line_number, line in enumerate(lines, start=1):
|
|
|
+ parts = line.strip().split()
|
|
|
+ if parts:
|
|
|
+ first_num = int(parts[0])
|
|
|
+ other_nums = [float(num) for num in parts[1:]]
|
|
|
+
|
|
|
+ if not all(0 <= num <= 1 for num in other_nums): # label 数据有误
|
|
|
+ log.write(f"{label_file}:\n")
|
|
|
+ log.write(f"\terror line: {line_number}\n")
|
|
|
+ elif first_num in [4, 20]: # 4 和 20 是暂时不需要的数据
|
|
|
+ log.write(f"{label_file}:\n")
|
|
|
+ log.write(f"\tdelete line: {line_number}\n")
|
|
|
+ elif first_num in [5, 9, 13, 21, 25]: # 修改为标题
|
|
|
+ log.write(f"{label_file}:\n")
|
|
|
+ log.write(f"\tmodify line {line_number}: {first_num} -> 26\n")
|
|
|
+ new_content = '26 '
|
|
|
+ new_content += ' '.join(map(str, other_nums))
|
|
|
+ new_content += '\n'
|
|
|
+ new_lines.append(new_content)
|
|
|
+ else:
|
|
|
+ new_lines.append(line)
|
|
|
+
|
|
|
+ # if first_num not in [4, 20] and all(
|
|
|
+ # 0 <= num <= 1 for num in other_nums): # 只有当一行内容的第一个数字不是4或20,其他数字都在[0,1]之间时这一行内容才能被保留
|
|
|
+ # new_lines.append(line)
|
|
|
+ # elif not all(0 <= num <= 1 for num in other_nums): # 如果不满足上诉条件且有数字不在【0,1】的,要记录文件名
|
|
|
+ # file_needs_logging = True
|
|
|
+ # =====
|
|
|
+
|
|
|
+ with open(output_file_path, 'w') as out: # 将文本内容写入新文件夹中
|
|
|
+ out.writelines(new_lines)
|
|
|
|
|
|
except Exception as e:
|
|
|
print(f"发生错误: {e}")
|
|
|
|
|
|
pbar.update(1) # 更新进度条
|
|
|
|
|
|
- with open(log_file, 'w') as log:
|
|
|
- for file_name in processed_files:
|
|
|
- log.write(f"{file_name}\n")
|
|
|
+ # with open(log_file, 'w') as log:
|
|
|
+ # for file_name in processed_files:
|
|
|
+ # log.write(f"{file_name}\n")
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|