// // KMPDFConvert.swift // PDF Reader Pro // // Created by tangchao on 2022/12/7. // import Cocoa import PDFKit import ComPDFKit_Conversion let KMPDFConvertOptionsKeyImageDPI = "KMPDFConvertOptionsKeyImageDPI" let KMPDFConvertOptionsKeyImageWithAnnotation = "KMPDFConvertOptionsKeyImageWithAnnotation" enum KMPDFConvertType: Int { case word = 0 case excel = 1 case ppt = 2 case rtf = 3 case csv = 4 case html = 5 case text = 6 case jpeg = 7 case jpg = 8 case png = 9 case gif = 10 case tiff = 11 case tga = 12 case bmp = 13 case jp2 = 14 case json = 15 static let image: KMPDFConvertType = .jpeg } typealias KMPDFConvertCallback = (_ finished: Bool, _ error: Error?) -> () typealias KMPDFConvertProgress = (Int) -> () class KMPDFConvert: Operation { var type: Int = 0 var filePath: String = "" var password: String = "" var outputFileName: String = "" var outputFolderPath: String = "" var pages: [Int]! var convertType: KMPDFConvertType = .word var options: [String:Any]! var outputFilePath: String = "" var isSuccessful: Bool = false var isAllInOneSheet: Bool = false var isExtractTable: Bool = false var isExtractText: Bool = false /** 0 支持一个表格提取到单独的工作表 1 支持按页面提取表格到单独的工作表 2 支持将所有表格提取到一个工作表 */ var extractTableIndex: Int = 0 var errorInfo: Error! // 是否使用OCR var isAllowOCR = false var ocrLanguage: COCRLanguage? var isContainOCRBgImage = true var isContainAnnotations = true var isContainImages = true fileprivate var pathExtension: String = "" fileprivate var fpPDFConverter: CPDFConverterFP! fileprivate var converter: CPDFConverter! private var isCompletion: Bool = false var callback: KMPDFConvertCallback! var progress: KMPDFConvertProgress? var excelWorksheetOption: CPDFConvertExcelWorksheetOptions? var excelContentOption: CPDFConvertExcelContentOptions? public class func pathExtension(_ type: KMPDFConvertType) -> String { return self.pathExtension(type, nil) } public class func pathExtension(_ type: KMPDFConvertType, _ isExtractTable: Bool?) -> String { if type == .word { return "docx" } else if type == .excel { return "xlsx" } else if type == .ppt { return "pptx" } else if type == .rtf { return "rtf" } else if type == .csv { if isExtractTable != nil && isExtractTable! { return "zip" } return "csv" } else if type == .html { return "html" } else if type == .text { return "txt" } else if type == .jpeg { return "jpeg" } else if type == .jpg { return "jpg" } else if type == .png { return "png" } else if type == .gif { return "gif" } else if type == .tga { return "tga" } else if type == .bmp { return "bmp" } else if type == .jp2 { return "jp2" } else if type == .tiff { return "tiff" } else if type == .json { return "json" } return "" } override func start() { if isCancelled { return } let pathExtension = KMPDFConvert.pathExtension(self.convertType, self.isExtractTable) var fileName = outputFileName var path = outputFolderPath if convertType == .jpeg || convertType == .jpg || convertType == .png || convertType == .gif || convertType == .tga || convertType == .bmp || convertType == .jp2 || convertType == .tiff { // if (self.convertType == .jpeg || self.convertType == .png) { // self.outputFilePath = "\(path)/\(fileName).zip" // } else { path.append("/") path.append(fileName) let folderPath = getUniqueFilePath(filePath: path) try?FileManager.default.createDirectory(atPath: path, withIntermediateDirectories: false) outputFilePath = folderPath // } } else { if !pathExtension.isEmpty { fileName.append(".") fileName.append(pathExtension) path.append("/") path.append(fileName) let folderPath = getUniqueFilePath(filePath: path) outputFilePath = folderPath } else { outputFolderPath.append("/") outputFolderPath.append(outputFileName) outputFilePath = outputFolderPath } } self.pathExtension = pathExtension self.startConvert() } func getUniqueFilePath(filePath: String) -> String { var i: Int = 0 var isDirectory: ObjCBool = false var uniqueFilePath = filePath let fileManager = FileManager.default fileManager.fileExists(atPath: uniqueFilePath, isDirectory: &isDirectory) if isDirectory.boolValue { var path: String = "" while fileManager.fileExists(atPath: uniqueFilePath) { i += 1 path = filePath path.append("(\(i))") uniqueFilePath = path } } else { let fileURL = URL(fileURLWithPath: filePath) var path: String = "" while fileManager.fileExists(atPath: uniqueFilePath) { i += 1 path = fileURL.deletingPathExtension().path path.append("(\(i))") path.append(".") path.append(fileURL.pathExtension) uniqueFilePath = path } } return uniqueFilePath } func startConvert() { if pathExtension.isEmpty { convertSuccessful(isSuccessful: false, errorInfo: nil) return } // //// if (convertType == .jpeg || convertType == .png) { //// converter = CPDFConverterImg(url: URL(fileURLWithPath: filePath), password: nil) //// converter.delegate = self //// let options = CPDFConvertImgOptions() //// if (convertType == .jpeg) { //// options.type = .JPEG //// } else if (convertType == .png) { //// options.type = .PNG //// } // //// converter.convert(toFilePath: outputFilePath, pageIndexs: pages, options: options) //// return //// } // fpPDFConverter = CPDFConverterFP() fpPDFConverter.setDelegate(self) var dpi: Int = 0 if self.options != nil { dpi = self.options[KMPDFConvertOptionsKeyImageDPI] as! Int } let options: [String:Any] = [CPDFConvertOptionsKey.imageDPI.rawValue:dpi,CPDFConvertOptionsKey.allInOneSheet.rawValue:isAllInOneSheet] if self.convertType == .word { self.converter = CPDFConverterWord.init(url: URL(fileURLWithPath: filePath), password: self.password) self.converter.delegate = self let options = CPDFConvertWordOptions() options.layoutOptions = self.isAllInOneSheet ? .retainPageLayout : .retainFlowingText options.isContainAnnotations = true // options.isAllowOCR = self.isAllowOCR // if (self.isAllowOCR) { // options.isContainOCRBgImage = self.isContainOCRBgImage // if let language = self.ocrLanguage { // options.language = language // } else { // options.language = .english // } // } else { options.isContainImages = true // options.isContainOCRBgImage = false // } self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options) return } if self.convertType == .excel { self.converter = CPDFConverterExcel.init(url: URL(fileURLWithPath: filePath), password: self.password) self.converter.delegate = self let options = CPDFConvertExcelOptions() options.isContainAnnotations = true // options.isAllowOCR = self.isAllowOCR // if (self.isAllowOCR) { // options.isContainOCRBgImage = self.isContainOCRBgImage // if let language = self.ocrLanguage { // options.language = language // } else { // options.language = .english // } // } else { options.isContainImages = true // options.isContainOCRBgImage = false // } options.contentOptions = self.excelContentOption ?? .allContent options.worksheetOptions = self.excelWorksheetOption ?? .forEachPage self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options) return } if self.convertType == .ppt { self.converter = CPDFConverterPPT.init(url: URL(fileURLWithPath: filePath), password: self.password) self.converter.delegate = self let options = CPDFConvertPPTOptions() options.isContainAnnotations = true // options.isAllowOCR = self.isAllowOCR options.isContainImages = true self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options) return } if self.convertType == .csv && isExtractTable{ self.converter = CPDFConverterCsv.init(url: URL(fileURLWithPath: filePath), password: self.password) self.converter.delegate = self let options = CPDFConvertCsvOptions() // options.isAILayoutAnalysis = isExtractTable self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options) return } if self.convertType == .rtf{ self.converter = CPDFConverterRtf(url: URL(fileURLWithPath: self.filePath), password: self.password) self.converter.delegate = self let options = CPDFConvertRtfOptions() options.isContainAnnotations = true // options.isAllowOCR = true // if (self.isAllowOCR) { // options.isContainOCRBgImage = self.isContainOCRBgImage // if let language = self.ocrLanguage { // options.language = language // } else { // options.language = .english // } // } else { options.isContainImages = true // options.isContainOCRBgImage = false // } self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options) return } if self.convertType == .html{ self.converter = CPDFConverterHtml.init(url: URL(fileURLWithPath: filePath), password: self.password) self.converter.delegate = self let options = CPDFConvertHtmlOptions() options.isContainAnnotations = true // options.isAllowOCR = true // if (self.isAllowOCR) { // options.isContainOCRBgImage = self.isContainOCRBgImage // if let language = self.ocrLanguage { // options.language = language // } else { // options.language = .english // } // } else { options.isContainImages = true // options.isContainOCRBgImage = false // } self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options) return } if self.convertType == .text{ self.converter = CPDFConverterTxt.init(url: URL(fileURLWithPath: filePath), password: self.password) self.converter.delegate = self let options = CPDFConvertTxtOptions() // options.isAllowOCR = self.isAllowOCR // if (self.isAllowOCR) { // if let language = self.ocrLanguage { // options.language = language // } else { // options.language = .english // } // } self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options) return } fpPDFConverter.convertPDF(atPath: filePath, pdfPassword: self.password, pdfPageIndexs: pages, destDocType: pathExtension, destDocPath: outputFilePath, moreOptions: options) } func convertSuccessful(isSuccessful: Bool, errorInfo: Error!) { self.isSuccessful = isSuccessful self.errorInfo = errorInfo if self.converter != nil && self.converter.delegate != nil{ self.converter.delegate = nil } DispatchQueue.main.async { [self] in guard let callbackBlock = callback else { return } callbackBlock(isSuccessful, errorInfo) } willChangeValue(forKey: "isFinished") isCompletion = true didChangeValue(forKey: "isFinished") } override var isFinished: Bool { return self.isCompletion } } extension KMPDFConvert: CPDFConverterDelegate { func converter(_ converter: CPDFConverter!, didStartConvert error: Error!) { } func converter(_ converter: CPDFConverter!, didEndConvert error: Error!) { if (error != nil) { convertSuccessful(isSuccessful: false, errorInfo: error) } else { convertSuccessful(isSuccessful: true, errorInfo: error) } } func converter(_ converter: CPDFConverter!, pageIndex index: UInt, pageCount count: UInt) { guard let callback = progress else { return } callback(Int(index)) } } extension KMPDFConvert: CPDFConverterFPDelegate { func fppdfConverter(_ converter: Any!, didEndConversion error: Error!) { if (error != nil) { convertSuccessful(isSuccessful: false, errorInfo: error) } else { convertSuccessful(isSuccessful: true, errorInfo: error) } } func fppdfConverter(_ converter: Any!, convertPDFPageIndex pdfPageIndexA: UInt, writeWordPageIndex wordPageIndexA: UInt, finshedWordPageCount wordPageCountA: UInt) { guard let callback = progress else { return } callback(Int(wordPageIndexA)) } } // MARK: - PDF 转 Word class KMPDFConvertWord: KMPDFConvert { // 框排 | 流排 [默认流排] var layoutOptions: CPDFConvertLayoutOptions = .retainFlowingText override init() { super.init() self.convertType = .word } override func startConvert() { if self.pathExtension.isEmpty { self.convertSuccessful(isSuccessful: false, errorInfo: nil) return } self.converter = CPDFConverterWord(url: URL(fileURLWithPath: self.filePath), password: self.password) self.converter.delegate = self let options = CPDFConvertWordOptions() options.layoutOptions = self.layoutOptions options.isContainAnnotations = self.isContainAnnotations options.isAllowOCR = self.isAllowOCR if (self.isAllowOCR) { options.isContainOCRBgImage = self.isContainOCRBgImage options.isAILayoutAnalysis = true if let language = self.ocrLanguage { options.language = language } else { options.language = .english } } else { options.isContainImages = true options.isContainOCRBgImage = false options.isAILayoutAnalysis = false } self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options) } } // MARK: - PDF 转 Image class KMPDFConvertImage: KMPDFConvert { var imageType: CPDFConvertImgType = .JPEG var imageDpi: Int = 150 override func startConvert() { if self.pathExtension.isEmpty { self.convertSuccessful(isSuccessful: false, errorInfo: nil) return } if (self.convertType == .jpeg || self.convertType == .png) { self.converter = CPDFConverterImg(url: URL(fileURLWithPath: self.filePath), password: self.password) self.converter.delegate = self let options = CPDFConvertImgOptions() options.type = self.imageType options.imageDpi = Int32(self.imageDpi) options.isContainAnnotations = true self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options) return } self.fpPDFConverter = CPDFConverterFP() self.fpPDFConverter.setDelegate(self) let options: [String : Any] = [CPDFConvertOptionsKey.imageDPI.rawValue : self.imageDpi] self.fpPDFConverter.convertPDF(atPath: self.filePath, pdfPassword: self.password, pdfPageIndexs: self.pages, destDocType: self.pathExtension, destDocPath: self.outputFilePath, moreOptions: options) } } // MARK: - PDF 转 PPT class KMPDFConvertPPT: KMPDFConvert { override init() { super.init() self.convertType = .ppt } override func startConvert() { if self.pathExtension.isEmpty { self.convertSuccessful(isSuccessful: false, errorInfo: nil) return } let options = CPDFConvertPPTOptions() options.isContainAnnotations = self.isContainAnnotations options.isAllowOCR = self.isAllowOCR if (self.isAllowOCR) { options.isContainOCRBgImage = self.isContainOCRBgImage options.isAILayoutAnalysis = true if let language = self.ocrLanguage { options.language = language } else { options.language = .english } } else { options.isContainImages = true options.isContainOCRBgImage = false options.isAILayoutAnalysis = false } self.converter = CPDFConverterPPT(url: URL(fileURLWithPath: self.filePath), password: self.password) self.converter.delegate = self self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options) } } // MARK: - PDF 转 RTF class KMPDFConvertRTF: KMPDFConvert { override init() { super.init() self.convertType = .rtf } override func startConvert() { if self.pathExtension.isEmpty { self.convertSuccessful(isSuccessful: false, errorInfo: nil) return } let options = CPDFConvertRtfOptions() options.isContainAnnotations = self.isContainAnnotations options.isAllowOCR = self.isAllowOCR if (self.isAllowOCR) { options.isContainOCRBgImage = self.isContainOCRBgImage if let language = self.ocrLanguage { options.language = language } else { options.language = .english } } else { options.isContainImages = true options.isContainOCRBgImage = false } self.converter = CPDFConverterRtf(url: URL(fileURLWithPath: self.filePath), password: self.password) self.converter.delegate = self self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options) } } // MARK: - PDF 转 HTML class KMPDFConvertHTML: KMPDFConvert { var paneOptions: CPDFConvertHtmlPageAndNavigationPaneOptions = .singlePage override init() { super.init() self.convertType = .html } override func startConvert() { if self.pathExtension.isEmpty { self.convertSuccessful(isSuccessful: false, errorInfo: nil) return } let options = CPDFConvertHtmlOptions() options.isContainAnnotations = self.isContainAnnotations options.isAllowOCR = self.isAllowOCR options.paneOptions = self.paneOptions if (self.isAllowOCR) { options.isContainOCRBgImage = self.isContainOCRBgImage if let language = self.ocrLanguage { options.language = language } else { options.language = .english } } else { options.isContainImages = self.isContainImages options.isContainOCRBgImage = false } self.converter = CPDFConverterHtml(url: URL(fileURLWithPath: self.filePath), password: self.password) self.converter.delegate = self self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options) } } // MARK: - PDF 转 Text class KMPDFConvertText: KMPDFConvert { override init() { super.init() self.convertType = .text } override func startConvert() { if self.pathExtension.isEmpty { self.convertSuccessful(isSuccessful: false, errorInfo: nil) return } let options = CPDFConvertTxtOptions() options.isAllowOCR = self.isAllowOCR if (self.isAllowOCR) { if let language = self.ocrLanguage { options.language = language } else { options.language = .english } } self.converter = CPDFConverterTxt(url: URL(fileURLWithPath: self.filePath), password: self.password) self.converter.delegate = self self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options) } } // MARK: - PDF 转 CSV class KMPDFConvertCSV: KMPDFConvert { override init() { super.init() self.convertType = .csv } override func startConvert() { if self.pathExtension.isEmpty { self.convertSuccessful(isSuccessful: false, errorInfo: nil) return } if (self.convertType == .csv && self.isExtractTable) { self.converter = CPDFConverterCsv(url: URL(fileURLWithPath: self.filePath), password: self.password) self.converter.delegate = self self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: nil) return } self.fpPDFConverter = CPDFConverterFP() self.fpPDFConverter.setDelegate(self) let options: [String : Any] = [CPDFConvertOptionsKey.allInOneSheet.rawValue : self.isAllInOneSheet] self.fpPDFConverter.convertPDF(atPath: self.filePath, pdfPassword: self.password, pdfPageIndexs: self.pages, destDocType: self.pathExtension, destDocPath: self.outputFilePath, moreOptions: options) } } // MARK: - PDF 转 Excel class KMPDFConvertExcel: KMPDFConvert { override init() { super.init() self.convertType = .excel } override func startConvert() { if (self.pathExtension.isEmpty) { self.convertSuccessful(isSuccessful: false, errorInfo: nil) return } self.converter = CPDFConverterExcel(url: URL(fileURLWithPath: self.filePath), password: self.password) self.converter.delegate = self let options = CPDFConvertExcelOptions() options.isContainAnnotations = self.isContainAnnotations options.isAllowOCR = self.isAllowOCR if (self.isAllowOCR) { options.isAILayoutAnalysis = true if let language = self.ocrLanguage { options.language = language } else { options.language = .english } } else { options.isContainImages = true options.isAILayoutAnalysis = false } if (self.isExtractText) { options.contentOptions = .onlyText } else if (self.isExtractTable) { options.contentOptions = .onlyTable if (self.extractTableIndex == 0) { options.worksheetOptions = .forEachTable } else if (self.extractTableIndex == 1) { options.worksheetOptions = .forEachPage } else if (self.extractTableIndex == 2) { options.worksheetOptions = .forTheDocument } } else { options.contentOptions = .allContent if (self.isAllInOneSheet) { options.worksheetOptions = .forTheDocument } else { options.worksheetOptions = .forEachPage } } self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options) } } // MARK: - PDF 转 Json class KMPDFConvertJson: KMPDFConvert { override init() { super.init() self.convertType = .json } override func startConvert() { if self.isAllInOneSheet { self.converter = CPDFConverterJsonTable(url: URL(fileURLWithPath: self.filePath), password: self.password) } else { self.converter = CPDFConverterJson(url: URL(fileURLWithPath: self.filePath), password: self.password) } self.converter.delegate = self let options = CPDFConvertJsonOptions() options.isAllowOCR = self.isAllowOCR if (self.isAllowOCR) { if let language = self.ocrLanguage { options.language = language } else { options.language = .english } } else { } self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options) } }