KMPDFConvert.swift 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586
  1. //
  2. // KMPDFConvert.swift
  3. // PDF Master
  4. //
  5. // Created by tangchao on 2022/12/7.
  6. //
  7. import Cocoa
  8. import PDFKit
  9. import ComPDFKit_Conversion
  10. let KMPDFConvertOptionsKeyImageDPI = "KMPDFConvertOptionsKeyImageDPI"
  11. let KMPDFConvertOptionsKeyImageWithAnnotation = "KMPDFConvertOptionsKeyImageWithAnnotation"
  12. enum KMPDFConvertType: Int {
  13. case word = 0
  14. case excel = 1
  15. case ppt = 2
  16. case rtf = 3
  17. case csv = 4
  18. case html = 5
  19. case text = 6
  20. case jpeg = 7
  21. case jpg = 8
  22. case png = 9
  23. case gif = 10
  24. case tiff = 11
  25. case tga = 12
  26. case bmp = 13
  27. case jp2 = 14
  28. static let image: KMPDFConvertType = .jpeg
  29. }
  30. typealias KMPDFConvertCallback = (_ finished: Bool, _ error: Error?) -> ()
  31. typealias KMPDFConvertProgress = (Int) -> ()
  32. class KMPDFConvert: Operation {
  33. var type: Int = 0
  34. var filePath: String = ""
  35. var password: String = ""
  36. var outputFileName: String = ""
  37. var outputFolderPath: String = ""
  38. var pages: [Int]!
  39. var convertType: KMPDFConvertType = .word
  40. var options: [String:Any]!
  41. var outputFilePath: String = ""
  42. var isSuccessful: Bool = false
  43. var isAllInOneSheet: Bool = false
  44. var isExtractTable: Bool = false
  45. var isExtractText: Bool = false
  46. /**
  47. 0 支持一个表格提取到单独的工作表
  48. 1 支持按页面提取表格到单独的工作表
  49. 2 支持将所有表格提取到一个工作表
  50. */
  51. var extractTableIndex: Int = 0
  52. var errorInfo: Error!
  53. // 是否使用OCR
  54. var isAllowOCR = false
  55. var ocrLanguage: COCRLanguage?
  56. var isContainOCRBgImage = true
  57. var isContainAnnotations = true
  58. var isContainImages = true
  59. fileprivate var pathExtension: String = ""
  60. fileprivate var fpPDFConverter: CPDFConverterFP!
  61. fileprivate var converter: CPDFConverter!
  62. private var isCompletion: Bool = false
  63. var callback: KMPDFConvertCallback!
  64. var progress: KMPDFConvertProgress?
  65. public class func pathExtension(_ type: KMPDFConvertType) -> String {
  66. return self.pathExtension(type, nil)
  67. }
  68. public class func pathExtension(_ type: KMPDFConvertType, _ isExtractTable: Bool?) -> String {
  69. if type == .word {
  70. return "docx"
  71. } else if type == .excel {
  72. return "xlsx"
  73. } else if type == .ppt {
  74. return "pptx"
  75. } else if type == .rtf {
  76. return "rtf"
  77. } else if type == .csv {
  78. if isExtractTable != nil && isExtractTable! {
  79. return "zip"
  80. }
  81. return "csv"
  82. } else if type == .html {
  83. return "html"
  84. } else if type == .text {
  85. return "txt"
  86. } else if type == .jpeg {
  87. return "jpeg"
  88. } else if type == .jpg {
  89. return "jpg"
  90. } else if type == .png {
  91. return "png"
  92. } else if type == .gif {
  93. return "gif"
  94. } else if type == .tga {
  95. return "tga"
  96. } else if type == .bmp {
  97. return "bmp"
  98. } else if type == .jp2 {
  99. return "jp2"
  100. } else if type == .tiff {
  101. return "tiff"
  102. }
  103. return ""
  104. }
  105. override func start() {
  106. if isCancelled {
  107. return
  108. }
  109. let pathExtension = KMPDFConvert.pathExtension(self.convertType, self.isExtractTable)
  110. var fileName = outputFileName
  111. var path = outputFolderPath
  112. if convertType == .jpeg || convertType == .jpg || convertType == .png || convertType == .gif || convertType == .tga || convertType == .bmp || convertType == .jp2 || convertType == .tiff {
  113. if (self.convertType == .jpeg || self.convertType == .png) {
  114. self.outputFilePath = "\(path)/\(fileName).zip"
  115. } else {
  116. path.append("/")
  117. path.append(fileName)
  118. // let folderPath = getUniqueFilePath(filePath: path)
  119. try?FileManager.default.createDirectory(atPath: path, withIntermediateDirectories: false)
  120. outputFilePath = path
  121. }
  122. } else {
  123. if !pathExtension.isEmpty {
  124. fileName.append(".")
  125. fileName.append(pathExtension)
  126. path.append("/")
  127. path.append(fileName)
  128. // let folderPath = getUniqueFilePath(filePath: path)
  129. outputFilePath = path
  130. } else {
  131. outputFolderPath.append("/")
  132. outputFolderPath.append(outputFileName)
  133. outputFilePath = outputFolderPath
  134. }
  135. }
  136. self.pathExtension = pathExtension
  137. self.startConvert()
  138. }
  139. func getUniqueFilePath(filePath: String) -> String {
  140. var i: Int = 0
  141. var isDirectory: ObjCBool = false
  142. var uniqueFilePath = filePath
  143. let fileManager = FileManager.default
  144. fileManager.fileExists(atPath: uniqueFilePath, isDirectory: &isDirectory)
  145. if isDirectory.boolValue {
  146. var path: String = ""
  147. while fileManager.fileExists(atPath: uniqueFilePath) {
  148. i += 1
  149. path = filePath
  150. path.append("(\(i))")
  151. uniqueFilePath = path
  152. }
  153. } else {
  154. let fileURL = URL(fileURLWithPath: filePath)
  155. var path: String = ""
  156. while fileManager.fileExists(atPath: uniqueFilePath) {
  157. i += 1
  158. path = fileURL.deletingPathExtension().path
  159. path.append("(\(i))")
  160. path.append(".")
  161. path.append(fileURL.pathExtension)
  162. uniqueFilePath = path
  163. }
  164. }
  165. return uniqueFilePath
  166. }
  167. func startConvert() {
  168. if pathExtension.isEmpty {
  169. convertSuccessful(isSuccessful: false, errorInfo: nil)
  170. return
  171. }
  172. //
  173. //// if (convertType == .jpeg || convertType == .png) {
  174. //// converter = CPDFConverterImg(url: URL(fileURLWithPath: filePath), password: nil)
  175. //// converter.delegate = self
  176. //// let options = CPDFConvertImgOptions()
  177. //// if (convertType == .jpeg) {
  178. //// options.type = .JPEG
  179. //// } else if (convertType == .png) {
  180. //// options.type = .PNG
  181. //// }
  182. //
  183. //// converter.convert(toFilePath: outputFilePath, pageIndexs: pages, options: options)
  184. //// return
  185. //// }
  186. //
  187. fpPDFConverter = CPDFConverterFP()
  188. fpPDFConverter.setDelegate(self)
  189. var dpi: Int = 0
  190. if self.options != nil {
  191. dpi = self.options[KMPDFConvertOptionsKeyImageDPI] as! Int
  192. }
  193. let options: [String:Any] = [CPDFConvertOptionsKey.imageDPI.rawValue:dpi,CPDFConvertOptionsKey.allInOneSheet.rawValue:isAllInOneSheet]
  194. fpPDFConverter.convertPDF(atPath: filePath, pdfPassword: self.password, pdfPageIndexs: pages, destDocType: pathExtension, destDocPath: outputFilePath, moreOptions: options)
  195. }
  196. func convertSuccessful(isSuccessful: Bool, errorInfo: Error!) {
  197. self.isSuccessful = isSuccessful
  198. self.errorInfo = errorInfo
  199. DispatchQueue.main.async { [self] in
  200. guard let callbackBlock = callback else {
  201. return
  202. }
  203. callbackBlock(isSuccessful, errorInfo)
  204. }
  205. willChangeValue(forKey: "isFinished")
  206. isCompletion = true
  207. didChangeValue(forKey: "isFinished")
  208. }
  209. override var isFinished: Bool {
  210. return self.isCompletion
  211. }
  212. }
  213. extension KMPDFConvert: CPDFConverterDelegate {
  214. func converter(_ converter: CPDFConverter!, didStartConvert error: Error!) {
  215. }
  216. func converter(_ converter: CPDFConverter!, didEndConvert error: Error!) {
  217. if (error != nil) {
  218. convertSuccessful(isSuccessful: false, errorInfo: error)
  219. } else {
  220. convertSuccessful(isSuccessful: true, errorInfo: error)
  221. }
  222. }
  223. func converter(_ converter: CPDFConverter!, pageIndex index: UInt, pageCount count: UInt) {
  224. guard let callback = progress else {
  225. return
  226. }
  227. callback(Int(index))
  228. }
  229. }
  230. extension KMPDFConvert: CPDFConverterFPDelegate {
  231. func fppdfConverter(_ converter: Any!, didEndConversion error: Error!) {
  232. if (error != nil) {
  233. convertSuccessful(isSuccessful: false, errorInfo: error)
  234. } else {
  235. convertSuccessful(isSuccessful: true, errorInfo: error)
  236. }
  237. }
  238. func fppdfConverter(_ converter: Any!, convertPDFPageIndex pdfPageIndexA: UInt, writeWordPageIndex wordPageIndexA: UInt, finshedWordPageCount wordPageCountA: UInt) {
  239. guard let callback = progress else {
  240. return
  241. }
  242. callback(Int(wordPageIndexA))
  243. }
  244. }
  245. // MARK: - PDF 转 Word
  246. class KMPDFConvertWord: KMPDFConvert {
  247. // 框排 | 流排 [默认流排]
  248. var layoutOptions: LayoutOptions = .RetainFlowingText
  249. override init() {
  250. super.init()
  251. self.convertType = .word
  252. }
  253. override func startConvert() {
  254. if self.pathExtension.isEmpty {
  255. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  256. return
  257. }
  258. if (self.layoutOptions == .RetainFlowingText) { // 流排
  259. self.converter = CPDFConverterWord(url: URL(fileURLWithPath: filePath), password: self.password)
  260. self.converter.delegate = self
  261. let options = CPDFConvertWordOptions()
  262. options.layoutOptions = self.layoutOptions
  263. options.isContainAnnotations = self.isContainAnnotations
  264. options.isAllowOCR = self.isAllowOCR
  265. if (self.isAllowOCR) {
  266. options.isContainOCRBgImage = self.isContainOCRBgImage
  267. if let language = self.ocrLanguage {
  268. options.language = language
  269. } else {
  270. options.language = .english
  271. }
  272. } else {
  273. options.isContainImages = true
  274. options.isContainOCRBgImage = false
  275. }
  276. self.converter.convert(toFilePath: outputFilePath, pageIndexs: pages, options: options)
  277. return
  278. }
  279. // 框排
  280. self.fpPDFConverter = CPDFConverterFP()
  281. self.fpPDFConverter.setDelegate(self)
  282. let options: [String : Any] = [CPDFConvertOptionsKey.allInOneSheet.rawValue : false]
  283. self.fpPDFConverter.convertPDF(atPath: self.filePath, pdfPassword: self.password, pdfPageIndexs: self.pages, destDocType: self.pathExtension, destDocPath: self.outputFilePath, moreOptions: options)
  284. }
  285. }
  286. // MARK: - PDF 转 Image
  287. class KMPDFConvertImage: KMPDFConvert {
  288. var imageType: CPDFConvertImgType = .JPEG
  289. var imageDpi: Int = 150
  290. override func startConvert() {
  291. if self.pathExtension.isEmpty {
  292. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  293. return
  294. }
  295. if (self.convertType == .jpeg || self.convertType == .png) {
  296. self.converter = CPDFConverterImg(url: URL(fileURLWithPath: self.filePath), password: self.password)
  297. self.converter.delegate = self
  298. let options = CPDFConvertImgOptions()
  299. options.type = self.imageType
  300. options.imageDpi = Int32(self.imageDpi)
  301. options.isContainAnnotations = true
  302. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
  303. return
  304. }
  305. self.fpPDFConverter = CPDFConverterFP()
  306. self.fpPDFConverter.setDelegate(self)
  307. let options: [String : Any] = [CPDFConvertOptionsKey.imageDPI.rawValue : self.imageDpi]
  308. self.fpPDFConverter.convertPDF(atPath: self.filePath, pdfPassword: self.password, pdfPageIndexs: self.pages, destDocType: self.pathExtension, destDocPath: self.outputFilePath, moreOptions: options)
  309. }
  310. }
  311. // MARK: - PDF 转 PPT
  312. class KMPDFConvertPPT: KMPDFConvert {
  313. override init() {
  314. super.init()
  315. self.convertType = .ppt
  316. }
  317. override func startConvert() {
  318. if self.pathExtension.isEmpty {
  319. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  320. return
  321. }
  322. let options = CPDFConvertPPTOptions()
  323. options.isContainAnnotations = self.isContainAnnotations
  324. options.isAllowOCR = self.isAllowOCR
  325. if (self.isAllowOCR) {
  326. options.isContainOCRBgImage = self.isContainOCRBgImage
  327. if let language = self.ocrLanguage {
  328. options.language = language
  329. } else {
  330. options.language = .english
  331. }
  332. } else {
  333. options.isContainImages = true
  334. options.isContainOCRBgImage = false
  335. }
  336. self.converter = CPDFConverterPPT(url: URL(fileURLWithPath: self.filePath), password: self.password)
  337. self.converter.delegate = self
  338. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
  339. }
  340. }
  341. // MARK: - PDF 转 RTF
  342. class KMPDFConvertRTF: KMPDFConvert {
  343. override init() {
  344. super.init()
  345. self.convertType = .rtf
  346. }
  347. override func startConvert() {
  348. if self.pathExtension.isEmpty {
  349. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  350. return
  351. }
  352. let options = CPDFConvertRtfOptions()
  353. options.isContainAnnotations = self.isContainAnnotations
  354. options.isAllowOCR = self.isAllowOCR
  355. if (self.isAllowOCR) {
  356. options.isContainOCRBgImage = self.isContainOCRBgImage
  357. if let language = self.ocrLanguage {
  358. options.language = language
  359. } else {
  360. options.language = .english
  361. }
  362. } else {
  363. options.isContainImages = true
  364. options.isContainOCRBgImage = false
  365. }
  366. self.converter = CPDFConverterRtf(url: URL(fileURLWithPath: self.filePath), password: self.password)
  367. self.converter.delegate = self
  368. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
  369. }
  370. }
  371. // MARK: - PDF 转 HTML
  372. class KMPDFConvertHTML: KMPDFConvert {
  373. override init() {
  374. super.init()
  375. self.convertType = .html
  376. }
  377. override func startConvert() {
  378. if self.pathExtension.isEmpty {
  379. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  380. return
  381. }
  382. let options = CPDFConvertHtmlOptions()
  383. options.isAllowOCR = self.isAllowOCR
  384. if (self.isAllowOCR) {
  385. if let language = self.ocrLanguage {
  386. options.language = language
  387. } else {
  388. options.language = .english
  389. }
  390. }
  391. self.converter = CPDFConverterHtml(url: URL(fileURLWithPath: self.filePath), password: self.password)
  392. self.converter.delegate = self
  393. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
  394. }
  395. }
  396. // MARK: - PDF 转 Text
  397. class KMPDFConvertText: KMPDFConvert {
  398. override init() {
  399. super.init()
  400. self.convertType = .text
  401. }
  402. override func startConvert() {
  403. if self.pathExtension.isEmpty {
  404. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  405. return
  406. }
  407. let options = CPDFConvertTxtOptions()
  408. options.isAllowOCR = self.isAllowOCR
  409. if (self.isAllowOCR) {
  410. if let language = self.ocrLanguage {
  411. options.language = language
  412. } else {
  413. options.language = .english
  414. }
  415. }
  416. self.converter = CPDFConverterTxt(url: URL(fileURLWithPath: self.filePath), password: self.password)
  417. self.converter.delegate = self
  418. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
  419. }
  420. }
  421. // MARK: - PDF 转 CSV
  422. class KMPDFConvertCSV: KMPDFConvert {
  423. override init() {
  424. super.init()
  425. self.convertType = .csv
  426. }
  427. override func startConvert() {
  428. if self.pathExtension.isEmpty {
  429. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  430. return
  431. }
  432. if (self.convertType == .csv && self.isExtractTable) {
  433. self.converter = CPDFConverterCsv(url: URL(fileURLWithPath: self.filePath), password: self.password)
  434. self.converter.delegate = self
  435. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: nil)
  436. return
  437. }
  438. self.fpPDFConverter = CPDFConverterFP()
  439. self.fpPDFConverter.setDelegate(self)
  440. let options: [String : Any] = [CPDFConvertOptionsKey.allInOneSheet.rawValue : self.isAllInOneSheet]
  441. self.fpPDFConverter.convertPDF(atPath: self.filePath, pdfPassword: self.password, pdfPageIndexs: self.pages, destDocType: self.pathExtension, destDocPath: self.outputFilePath, moreOptions: options)
  442. }
  443. }
  444. // MARK: - PDF 转 Excel
  445. class KMPDFConvertExcel: KMPDFConvert {
  446. override init() {
  447. super.init()
  448. self.convertType = .excel
  449. }
  450. override func startConvert() {
  451. if (self.pathExtension.isEmpty) {
  452. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  453. return
  454. }
  455. self.converter = CPDFConverterExcel(url: URL(fileURLWithPath: self.filePath), password: self.password)
  456. self.converter.delegate = self
  457. let options = CPDFConvertExcelOptions()
  458. options.isContainAnnotations = self.isContainAnnotations
  459. options.isAllowOCR = self.isAllowOCR
  460. if (self.isAllowOCR) {
  461. options.isContainOCRBgImage = self.isContainOCRBgImage
  462. if let language = self.ocrLanguage {
  463. options.language = language
  464. } else {
  465. options.language = .english
  466. }
  467. } else {
  468. options.isContainImages = true
  469. options.isContainOCRBgImage = false
  470. }
  471. if (self.isExtractText) {
  472. options.contentOptions = .onlyText
  473. } else if (self.isExtractTable) {
  474. options.contentOptions = .onlyTable
  475. if (self.extractTableIndex == 0) {
  476. options.worksheetOptions = .forEachTable
  477. } else if (self.extractTableIndex == 1) {
  478. options.worksheetOptions = .forEachPage
  479. } else if (self.extractTableIndex == 2) {
  480. options.worksheetOptions = .forTheDocument
  481. }
  482. } else {
  483. options.contentOptions = .allContent
  484. if (self.isAllInOneSheet) {
  485. options.worksheetOptions = .forTheDocument
  486. } else {
  487. options.worksheetOptions = .forEachPage
  488. }
  489. }
  490. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
  491. }
  492. }