Ver Fonte

[feat] 删除重复文件脚本

WangChao há 6 meses atrás
pai
commit
88ae67adfd
1 ficheiros alterados com 43 adições e 0 exclusões
  1. 43 0
      data_collection/check_repeat.py

+ 43 - 0
data_collection/check_repeat.py

@@ -0,0 +1,43 @@
+import os
+import shutil
+import argparse
+
+
+def find_and_delete_duplicate_files(path):
+    # 字典,用于跟踪文件名及其出现的路径
+    file_paths = {}
+
+    # 遍历目录及其子目录
+    for root, dirs, files in os.walk(path):
+        for file in files:
+            file_path = os.path.join(root, file)
+
+            # 如果文件名已经在字典中,则添加其路径
+            if file in file_paths:
+                file_paths[file].append(file_path)
+            else:
+                file_paths[file] = [file_path]
+
+                # 查找并删除重复文件
+    for file, paths in file_paths.items():
+        if len(paths) > 1:
+            # 保留第一个文件,删除其他文件
+            to_keep = paths[0]
+            for path in paths[1:]:
+                try:
+                    print(f"Deleting duplicate file: {path}")
+                    os.remove(path)
+                except OSError as e:
+                    print(f"Error deleting file {path}: {e}")
+
+
+def main():
+    parse = argparse.ArgumentParser("查找重复文件并删除\n")
+    parse.add_argument("input", help="输入路径")
+
+    args = parse.parse_args()
+    find_and_delete_duplicate_files(args.input)
+
+
+if __name__ == "__main__":
+    main()