Browse Source

[feat] 脚本适配优化

WangChao 6 tháng trước cách đây
mục cha
commit
8df4f6ea98
2 tập tin đã thay đổi với 37 bổ sung26 xóa
  1. 2 2
      Images_rename/images_rename.py
  2. 35 24
      pdf_rename_to_images/pdf_to_image.py

+ 2 - 2
Images_rename/images_rename.py

@@ -24,8 +24,8 @@ def rename(img_dir, save_dir):
 
 if __name__ == '__main__':
     parser = argparse.ArgumentParser()
-    parser.add_argument('--img_dir', type=str, default=r'C:\Users\KDAN\Desktop\workspace\rec_data(tw_idcard)\tw_idcard_rec_1223\id')
-    parser.add_argument('--save_dir', type=str, default=r'C:\Users\KDAN\Desktop\workspace\rec_data(tw_idcard)\tw_idcard_rec_1223\id')
+    parser.add_argument('--img_dir', type=str)
+    parser.add_argument('--save_dir', type=str)
     args = parser.parse_args()
     if not os.path.exists(args.save_dir):
         os.makedirs(args.save_dir)

+ 35 - 24
pdf_rename_to_images/pdf_to_image.py

@@ -26,30 +26,41 @@ def findfiles(path):
 
 def pyMuPDF_fitz(pdfPath, imagePath):
     global count
-    pdfDoc = fitz.open(pdfPath)
-    cnt = 1
-    for pg in range(pdfDoc.page_count):
-        page = pdfDoc[pg]
-        info = page.bound()
-        radio = 0.0
-        if info.width > info.height:
-            radio = 800.0 / info.width
-        else:
-            radio = 800.0 / info.height
-        rotate = int(0)
-        zoom_x = 1.33333
-        zoom_y = 1.333333
-        mat = fitz.Matrix(zoom_x, zoom_y).prerotate(rotate)
-        try:
-            pix = page.get_pixmap(matrix=mat, alpha=False)
-        except Exception as e:
-            print("imagePath=" + pdfPath + " ---------------- ", e.__class__.__name__, e)
-            continue
-        if not os.path.exists(imagePath):
-            os.makedirs(imagePath)
-        pix.save(imagePath + '/' + str(pdfPath).split('\\')[-1][0:-4] + '-' + str(uuid.uuid1())[0:4] + '-' + str(cnt) + '.jpg')
-        count += 1
-        cnt += 1
+
+    try:
+        pdfDoc = fitz.open(pdfPath)
+
+        cnt = 1
+        for pg in range(pdfDoc.page_count):
+            page = pdfDoc[pg]
+            info = page.bound()
+            radio = 0.0
+            if info.width > info.height:
+                radio = 800.0 / info.width
+            else:
+                radio = 800.0 / info.height
+            rotate = int(0)
+            zoom_x = 1.33333
+            zoom_y = 1.333333
+            mat = fitz.Matrix(zoom_x, zoom_y).prerotate(rotate)
+            try:
+                pix = page.get_pixmap(matrix=mat, alpha=False)
+            except Exception as e:
+                print("imagePath=" + pdfPath + " ---------------- ", e.__class__.__name__, e)
+                continue
+            if not os.path.exists(imagePath):
+                os.makedirs(imagePath)
+
+            file_name = os.path.splitext(os.path.basename(pdfPath))[0]
+            save_path = os.path.join(imagePath, file_name + '-' + str(uuid.uuid1())[0:4] + '-' + str(cnt) + '.jpg')
+            pix.save(save_path)
+            count += 1
+            cnt += 1
+
+    except Exception as e:
+        print(f"发生错误: {e}")
+
+    pdfDoc.close()
 
 
 if __name__ == "__main__":