123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581 |
- //
- // KMPDFConvert.swift
- // PDF Master
- //
- // Created by tangchao on 2022/12/7.
- //
- import Cocoa
- import PDFKit
- import ComPDFKit_Conversion
- let KMPDFConvertOptionsKeyImageDPI = "KMPDFConvertOptionsKeyImageDPI"
- let KMPDFConvertOptionsKeyImageWithAnnotation = "KMPDFConvertOptionsKeyImageWithAnnotation"
-
- enum KMPDFConvertType: Int {
- case word = 0
- case excel = 1
- case ppt = 2
- case rtf = 3
- case csv = 4
- case html = 5
- case text = 6
- case jpeg = 7
- case jpg = 8
- case png = 9
- case gif = 10
- case tiff = 11
- case tga = 12
- case bmp = 13
- case jp2 = 14
-
- static let image: KMPDFConvertType = .jpeg
- }
- typealias KMPDFConvertCallback = (_ finished: Bool, _ error: Error?) -> ()
- typealias KMPDFConvertProgress = (Int) -> ()
- class KMPDFConvert: Operation {
- var type: Int = 0
- var filePath: String = ""
- var password: String = ""
- var outputFileName: String = ""
- var outputFolderPath: String = ""
- var pages: [Int]!
- var convertType: KMPDFConvertType = .word
- var options: [String:Any]!
- var outputFilePath: String = ""
- var isSuccessful: Bool = false
- var isAllInOneSheet: Bool = false
- var isExtractTable: Bool = false
- var isExtractText: Bool = false
- /**
- 0 支持一个表格提取到单独的工作表
- 1 支持按页面提取表格到单独的工作表
- 2 支持将所有表格提取到一个工作表
- */
- var extractTableIndex: Int = 0
- var errorInfo: Error!
-
- // 是否使用OCR
- var isAllowOCR = false
- var ocrLanguage: COCRLanguage?
- var isContainOCRBgImage = true
- var isContainAnnotations = true
- var isContainImages = true
-
- fileprivate var pathExtension: String = ""
- fileprivate var fpPDFConverter: CPDFConverterFP!
- fileprivate var converter: CPDFConverter!
- private var isCompletion: Bool = false
-
- var callback: KMPDFConvertCallback!
- var progress: KMPDFConvertProgress?
-
- public class func pathExtension(_ type: KMPDFConvertType) -> String {
- return self.pathExtension(type, nil)
- }
-
- public class func pathExtension(_ type: KMPDFConvertType, _ isExtractTable: Bool?) -> String {
- if type == .word {
- return "docx"
- } else if type == .excel {
- return "xlsx"
- } else if type == .ppt {
- return "pptx"
- } else if type == .rtf {
- return "rtf"
- } else if type == .csv {
- if isExtractTable != nil && isExtractTable! {
- return "zip"
- }
- return "csv"
- } else if type == .html {
- return "html"
- } else if type == .text {
- return "txt"
- } else if type == .jpeg {
- return "jpeg"
- } else if type == .jpg {
- return "jpg"
- } else if type == .png {
- return "png"
- } else if type == .gif {
- return "gif"
- } else if type == .tga {
- return "tga"
- } else if type == .bmp {
- return "bmp"
- } else if type == .jp2 {
- return "jp2"
- } else if type == .tiff {
- return "tiff"
- }
- return ""
- }
-
- override func start() {
- if isCancelled {
- return
- }
-
- let pathExtension = KMPDFConvert.pathExtension(self.convertType, self.isExtractTable)
- var fileName = outputFileName
- var path = outputFolderPath
-
-
- if convertType == .jpeg || convertType == .jpg || convertType == .png || convertType == .gif || convertType == .tga || convertType == .bmp || convertType == .jp2 || convertType == .tiff {
- if (self.convertType == .jpeg || self.convertType == .png) {
- self.outputFilePath = "\(path)/\(fileName).zip"
- } else {
- path.append("/")
- path.append(fileName)
- // let folderPath = getUniqueFilePath(filePath: path)
- try?FileManager.default.createDirectory(atPath: path, withIntermediateDirectories: false)
- outputFilePath = path
- }
- } else {
- if !pathExtension.isEmpty {
- fileName.append(".")
- fileName.append(pathExtension)
-
- path.append("/")
- path.append(fileName)
- // let folderPath = getUniqueFilePath(filePath: path)
-
- outputFilePath = path
- } else {
- outputFolderPath.append("/")
- outputFolderPath.append(outputFileName)
- outputFilePath = outputFolderPath
- }
- }
-
- self.pathExtension = pathExtension
-
- self.startConvert()
- }
-
- func getUniqueFilePath(filePath: String) -> String {
- var i: Int = 0
- var isDirectory: ObjCBool = false
- var uniqueFilePath = filePath
- let fileManager = FileManager.default
- fileManager.fileExists(atPath: uniqueFilePath, isDirectory: &isDirectory)
- if isDirectory.boolValue {
- var path: String = ""
- while fileManager.fileExists(atPath: uniqueFilePath) {
- i += 1
-
- path = filePath
- path.append("(\(i))")
- uniqueFilePath = path
- }
- } else {
- let fileURL = URL(fileURLWithPath: filePath)
- var path: String = ""
- while fileManager.fileExists(atPath: uniqueFilePath) {
- i += 1
-
- path = fileURL.deletingPathExtension().path
- path.append("(\(i))")
- path.append(".")
- path.append(fileURL.pathExtension)
- uniqueFilePath = path
- }
- }
- return uniqueFilePath
- }
-
- func startConvert() {
- if pathExtension.isEmpty {
- convertSuccessful(isSuccessful: false, errorInfo: nil)
- return
- }
-
- //
- //// if (convertType == .jpeg || convertType == .png) {
- //// converter = CPDFConverterImg(url: URL(fileURLWithPath: filePath), password: nil)
- //// converter.delegate = self
- //// let options = CPDFConvertImgOptions()
- //// if (convertType == .jpeg) {
- //// options.type = .JPEG
- //// } else if (convertType == .png) {
- //// options.type = .PNG
- //// }
- //
- //// converter.convert(toFilePath: outputFilePath, pageIndexs: pages, options: options)
- //// return
- //// }
- //
- fpPDFConverter = CPDFConverterFP()
- fpPDFConverter.setDelegate(self)
- var dpi: Int = 0
- if self.options != nil {
- dpi = self.options[KMPDFConvertOptionsKeyImageDPI] as! Int
- }
- let options: [String:Any] = [CPDFConvertOptionsKey.imageDPI.rawValue:dpi,CPDFConvertOptionsKey.allInOneSheet.rawValue:isAllInOneSheet]
- fpPDFConverter.convertPDF(atPath: filePath, pdfPassword: self.password, pdfPageIndexs: pages, destDocType: pathExtension, destDocPath: outputFilePath, moreOptions: options)
- }
-
- func convertSuccessful(isSuccessful: Bool, errorInfo: Error!) {
- self.isSuccessful = isSuccessful
- self.errorInfo = errorInfo
-
- DispatchQueue.main.async { [self] in
-
- guard let callbackBlock = callback else {
- return
- }
-
- callbackBlock(isSuccessful, errorInfo)
- }
- willChangeValue(forKey: "isFinished")
- isCompletion = true
- didChangeValue(forKey: "isFinished")
- }
-
- override var isFinished: Bool {
- return self.isCompletion
- }
- }
- extension KMPDFConvert: CPDFConverterDelegate {
- func converter(_ converter: CPDFConverter!, didStartConvert error: Error!) {
-
- }
-
- func converter(_ converter: CPDFConverter!, didEndConvert error: Error!) {
- if (error != nil) {
- convertSuccessful(isSuccessful: false, errorInfo: error)
- } else {
- convertSuccessful(isSuccessful: true, errorInfo: error)
- }
- }
-
- func converter(_ converter: CPDFConverter!, pageIndex index: UInt, pageCount count: UInt) {
- guard let callback = progress else {
- return
- }
-
- callback(Int(index))
- }
- }
- extension KMPDFConvert: CPDFConverterFPDelegate {
- func fppdfConverter(_ converter: Any!, didEndConversion error: Error!) {
- if (error != nil) {
- convertSuccessful(isSuccessful: false, errorInfo: error)
- } else {
- convertSuccessful(isSuccessful: true, errorInfo: error)
- }
- }
-
- func fppdfConverter(_ converter: Any!, convertPDFPageIndex pdfPageIndexA: UInt, writeWordPageIndex wordPageIndexA: UInt, finshedWordPageCount wordPageCountA: UInt) {
- guard let callback = progress else {
- return
- }
-
- callback(Int(wordPageIndexA))
- }
- }
- // MARK: - PDF 转 Word
- class KMPDFConvertWord: KMPDFConvert {
- // 框排 | 流排 [默认流排]
- var layoutOptions: CPDFConvertLayoutOptions = .retainFlowingText
-
- override init() {
- super.init()
-
- self.convertType = .word
- }
-
- override func startConvert() {
- if self.pathExtension.isEmpty {
- self.convertSuccessful(isSuccessful: false, errorInfo: nil)
- return
- }
-
- self.converter = CPDFConverterWord(url: URL(fileURLWithPath: self.filePath), password: self.password)
- self.converter.delegate = self
-
- let options = CPDFConvertWordOptions()
- options.layoutOptions = self.layoutOptions
- options.isContainAnnotations = self.isContainAnnotations
- options.isAllowOCR = self.isAllowOCR
- if (self.isAllowOCR) {
- options.isContainOCRBgImage = self.isContainOCRBgImage
- if let language = self.ocrLanguage {
- options.language = language
- } else {
- options.language = .english
- }
- } else {
- options.isContainImages = true
- options.isContainOCRBgImage = false
- }
-
- self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
- }
- }
- // MARK: - PDF 转 Image
- class KMPDFConvertImage: KMPDFConvert {
- var imageType: CPDFConvertImgType = .JPEG
- var imageDpi: Int = 150
-
- override func startConvert() {
- if self.pathExtension.isEmpty {
- self.convertSuccessful(isSuccessful: false, errorInfo: nil)
- return
- }
-
- if (self.convertType == .jpeg || self.convertType == .png) {
- self.converter = CPDFConverterImg(url: URL(fileURLWithPath: self.filePath), password: self.password)
- self.converter.delegate = self
- let options = CPDFConvertImgOptions()
- options.type = self.imageType
- options.imageDpi = Int32(self.imageDpi)
- options.isContainAnnotations = true
-
- self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
- return
- }
-
- self.fpPDFConverter = CPDFConverterFP()
- self.fpPDFConverter.setDelegate(self)
- let options: [String : Any] = [CPDFConvertOptionsKey.imageDPI.rawValue : self.imageDpi]
- self.fpPDFConverter.convertPDF(atPath: self.filePath, pdfPassword: self.password, pdfPageIndexs: self.pages, destDocType: self.pathExtension, destDocPath: self.outputFilePath, moreOptions: options)
- }
- }
- // MARK: - PDF 转 PPT
- class KMPDFConvertPPT: KMPDFConvert {
- override init() {
- super.init()
-
- self.convertType = .ppt
- }
-
- override func startConvert() {
- if self.pathExtension.isEmpty {
- self.convertSuccessful(isSuccessful: false, errorInfo: nil)
- return
- }
-
- let options = CPDFConvertPPTOptions()
- options.isContainAnnotations = self.isContainAnnotations
- options.isAllowOCR = self.isAllowOCR
- if (self.isAllowOCR) {
- options.isContainOCRBgImage = self.isContainOCRBgImage
- if let language = self.ocrLanguage {
- options.language = language
- } else {
- options.language = .english
- }
- } else {
- options.isContainImages = true
- options.isContainOCRBgImage = false
- }
-
- self.converter = CPDFConverterPPT(url: URL(fileURLWithPath: self.filePath), password: self.password)
- self.converter.delegate = self
- self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
- }
- }
- // MARK: - PDF 转 RTF
- class KMPDFConvertRTF: KMPDFConvert {
- override init() {
- super.init()
-
- self.convertType = .rtf
- }
-
- override func startConvert() {
- if self.pathExtension.isEmpty {
- self.convertSuccessful(isSuccessful: false, errorInfo: nil)
- return
- }
-
- let options = CPDFConvertRtfOptions()
- options.isContainAnnotations = self.isContainAnnotations
- options.isAllowOCR = self.isAllowOCR
- if (self.isAllowOCR) {
- options.isContainOCRBgImage = self.isContainOCRBgImage
- if let language = self.ocrLanguage {
- options.language = language
- } else {
- options.language = .english
- }
- } else {
- options.isContainImages = true
- options.isContainOCRBgImage = false
- }
-
-
- self.converter = CPDFConverterRtf(url: URL(fileURLWithPath: self.filePath), password: self.password)
- self.converter.delegate = self
- self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
- }
- }
- // MARK: - PDF 转 HTML
- class KMPDFConvertHTML: KMPDFConvert {
- override init() {
- super.init()
-
- self.convertType = .html
- }
-
- override func startConvert() {
- if self.pathExtension.isEmpty {
- self.convertSuccessful(isSuccessful: false, errorInfo: nil)
- return
- }
-
- let options = CPDFConvertHtmlOptions()
- options.isContainAnnotations = self.isContainAnnotations
- options.isAllowOCR = self.isAllowOCR
- if (self.isAllowOCR) {
- options.isContainOCRBgImage = self.isContainOCRBgImage
- if let language = self.ocrLanguage {
- options.language = language
- } else {
- options.language = .english
- }
- } else {
- options.isContainImages = self.isContainImages
- options.isContainOCRBgImage = false
- }
-
- self.converter = CPDFConverterHtml(url: URL(fileURLWithPath: self.filePath), password: self.password)
- self.converter.delegate = self
- self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
- }
- }
- // MARK: - PDF 转 Text
- class KMPDFConvertText: KMPDFConvert {
- override init() {
- super.init()
-
- self.convertType = .text
- }
-
- override func startConvert() {
- if self.pathExtension.isEmpty {
- self.convertSuccessful(isSuccessful: false, errorInfo: nil)
- return
- }
-
- let options = CPDFConvertTxtOptions()
- options.isAllowOCR = self.isAllowOCR
- if (self.isAllowOCR) {
- if let language = self.ocrLanguage {
- options.language = language
- } else {
- options.language = .english
- }
- }
-
- self.converter = CPDFConverterTxt(url: URL(fileURLWithPath: self.filePath), password: self.password)
- self.converter.delegate = self
- self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
- }
- }
- // MARK: - PDF 转 CSV
- class KMPDFConvertCSV: KMPDFConvert {
- override init() {
- super.init()
-
- self.convertType = .csv
- }
-
- override func startConvert() {
- if self.pathExtension.isEmpty {
- self.convertSuccessful(isSuccessful: false, errorInfo: nil)
- return
- }
-
- if (self.convertType == .csv && self.isExtractTable) {
- self.converter = CPDFConverterCsv(url: URL(fileURLWithPath: self.filePath), password: self.password)
- self.converter.delegate = self
- self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: nil)
- return
- }
-
- self.fpPDFConverter = CPDFConverterFP()
- self.fpPDFConverter.setDelegate(self)
- let options: [String : Any] = [CPDFConvertOptionsKey.allInOneSheet.rawValue : self.isAllInOneSheet]
- self.fpPDFConverter.convertPDF(atPath: self.filePath, pdfPassword: self.password, pdfPageIndexs: self.pages, destDocType: self.pathExtension, destDocPath: self.outputFilePath, moreOptions: options)
- }
- }
- // MARK: - PDF 转 Excel
- class KMPDFConvertExcel: KMPDFConvert {
- override init() {
- super.init()
-
- self.convertType = .excel
- }
-
- override func startConvert() {
- if (self.pathExtension.isEmpty) {
- self.convertSuccessful(isSuccessful: false, errorInfo: nil)
- return
- }
-
- self.converter = CPDFConverterExcel(url: URL(fileURLWithPath: self.filePath), password: self.password)
- self.converter.delegate = self
-
- let options = CPDFConvertExcelOptions()
- options.isContainAnnotations = self.isContainAnnotations
- options.isAllowOCR = self.isAllowOCR
- if (self.isAllowOCR) {
- options.isContainOCRBgImage = self.isContainOCRBgImage
- if let language = self.ocrLanguage {
- options.language = language
- } else {
- options.language = .english
- }
- } else {
- options.isContainImages = true
- options.isContainOCRBgImage = false
- }
-
- if (self.isExtractText) {
- options.contentOptions = .onlyText
- } else if (self.isExtractTable) {
- options.contentOptions = .onlyTable
- if (self.extractTableIndex == 0) {
- options.worksheetOptions = .forEachTable
- } else if (self.extractTableIndex == 1) {
- options.worksheetOptions = .forEachPage
- } else if (self.extractTableIndex == 2) {
- options.worksheetOptions = .forTheDocument
- }
- } else {
- options.contentOptions = .allContent
- if (self.isAllInOneSheet) {
- options.worksheetOptions = .forTheDocument
- } else {
- options.worksheetOptions = .forEachPage
- }
- }
-
- self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
- }
- }
|