KMPDFConvert.swift 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588
  1. //
  2. // KMPDFConvert.swift
  3. // PDF Master
  4. //
  5. // Created by tangchao on 2022/12/7.
  6. //
  7. import Cocoa
  8. import PDFKit
  9. import ComPDFKit_Conversion
  10. let KMPDFConvertOptionsKeyImageDPI = "KMPDFConvertOptionsKeyImageDPI"
  11. let KMPDFConvertOptionsKeyImageWithAnnotation = "KMPDFConvertOptionsKeyImageWithAnnotation"
  12. enum KMPDFConvertType: Int {
  13. case word = 0
  14. case excel = 1
  15. case ppt = 2
  16. case rtf = 3
  17. case csv = 4
  18. case html = 5
  19. case text = 6
  20. case jpeg = 7
  21. case jpg = 8
  22. case png = 9
  23. case gif = 10
  24. case tiff = 11
  25. case tga = 12
  26. case bmp = 13
  27. case jp2 = 14
  28. static let image: KMPDFConvertType = .jpeg
  29. }
  30. typealias KMPDFConvertCallback = (_ finished: Bool, _ error: Error?) -> ()
  31. typealias KMPDFConvertProgress = (Int) -> ()
  32. class KMPDFConvert: Operation {
  33. var type: Int = 0
  34. var filePath: String = ""
  35. var password: String = ""
  36. var outputFileName: String = ""
  37. var outputFolderPath: String = ""
  38. var pages: [Int]!
  39. var convertType: KMPDFConvertType = .word
  40. var options: [String:Any]!
  41. var outputFilePath: String = ""
  42. var isSuccessful: Bool = false
  43. var isAllInOneSheet: Bool = false
  44. var isExtractTable: Bool = false
  45. var isExtractText: Bool = false
  46. /**
  47. 0 支持一个表格提取到单独的工作表
  48. 1 支持按页面提取表格到单独的工作表
  49. 2 支持将所有表格提取到一个工作表
  50. */
  51. var extractTableIndex: Int = 0
  52. var errorInfo: Error!
  53. // 是否使用OCR
  54. var isAllowOCR = false
  55. var ocrLanguage: COCRLanguage?
  56. var isContainOCRBgImage = true
  57. var isContainAnnotations = true
  58. var isContainImages = true
  59. fileprivate var pathExtension: String = ""
  60. fileprivate var fpPDFConverter: CPDFConverterFP!
  61. fileprivate var converter: CPDFConverter!
  62. private var isCompletion: Bool = false
  63. var callback: KMPDFConvertCallback!
  64. var progress: KMPDFConvertProgress?
  65. public class func pathExtension(_ type: KMPDFConvertType) -> String {
  66. return self.pathExtension(type, nil)
  67. }
  68. public class func pathExtension(_ type: KMPDFConvertType, _ isExtractTable: Bool?) -> String {
  69. if type == .word {
  70. return "docx"
  71. } else if type == .excel {
  72. return "xlsx"
  73. } else if type == .ppt {
  74. return "pptx"
  75. } else if type == .rtf {
  76. return "rtf"
  77. } else if type == .csv {
  78. if isExtractTable != nil && isExtractTable! {
  79. return "zip"
  80. }
  81. return "csv"
  82. } else if type == .html {
  83. return "html"
  84. } else if type == .text {
  85. return "txt"
  86. } else if type == .jpeg {
  87. return "jpeg"
  88. } else if type == .jpg {
  89. return "jpg"
  90. } else if type == .png {
  91. return "png"
  92. } else if type == .gif {
  93. return "gif"
  94. } else if type == .tga {
  95. return "tga"
  96. } else if type == .bmp {
  97. return "bmp"
  98. } else if type == .jp2 {
  99. return "jp2"
  100. } else if type == .tiff {
  101. return "tiff"
  102. }
  103. return ""
  104. }
  105. override func start() {
  106. if isCancelled {
  107. return
  108. }
  109. var pathExtension = KMPDFConvert.pathExtension(self.convertType, self.isExtractTable)
  110. var fileName = outputFileName
  111. var path = outputFolderPath
  112. if convertType == .jpeg || convertType == .jpg || convertType == .png || convertType == .gif || convertType == .tga || convertType == .bmp || convertType == .jp2 || convertType == .tiff {
  113. path.append("/")
  114. path.append(fileName)
  115. // let folderPath = getUniqueFilePath(filePath: path)
  116. try?FileManager.default.createDirectory(atPath: path, withIntermediateDirectories: false)
  117. outputFilePath = path
  118. } else {
  119. if !pathExtension.isEmpty {
  120. fileName.append(".")
  121. fileName.append(pathExtension)
  122. path.append("/")
  123. path.append(fileName)
  124. // let folderPath = getUniqueFilePath(filePath: path)
  125. outputFilePath = path
  126. } else {
  127. outputFolderPath.append("/")
  128. outputFolderPath.append(outputFileName)
  129. outputFilePath = outputFolderPath
  130. }
  131. }
  132. self.pathExtension = pathExtension
  133. self.startConvert()
  134. }
  135. func getUniqueFilePath(filePath: String) -> String {
  136. var i: Int = 0
  137. var isDirectory: ObjCBool = false
  138. var uniqueFilePath = filePath
  139. let fileManager = FileManager.default
  140. fileManager.fileExists(atPath: uniqueFilePath, isDirectory: &isDirectory)
  141. if isDirectory.boolValue {
  142. var path: String = ""
  143. while fileManager.fileExists(atPath: uniqueFilePath) {
  144. i += 1
  145. path = filePath
  146. path.append("(\(i))")
  147. uniqueFilePath = path
  148. }
  149. } else {
  150. let fileURL = URL(fileURLWithPath: filePath)
  151. var path: String = ""
  152. while fileManager.fileExists(atPath: uniqueFilePath) {
  153. i += 1
  154. path = fileURL.deletingPathExtension().path
  155. path.append("(\(i))")
  156. path.append(".")
  157. path.append(fileURL.pathExtension)
  158. uniqueFilePath = path
  159. }
  160. }
  161. return uniqueFilePath
  162. }
  163. func startConvert() {
  164. if pathExtension.isEmpty {
  165. convertSuccessful(isSuccessful: false, errorInfo: nil)
  166. return
  167. }
  168. //
  169. //// if (convertType == .jpeg || convertType == .png) {
  170. //// converter = CPDFConverterImg(url: URL(fileURLWithPath: filePath), password: nil)
  171. //// converter.delegate = self
  172. //// let options = CPDFConvertImgOptions()
  173. //// if (convertType == .jpeg) {
  174. //// options.type = .JPEG
  175. //// } else if (convertType == .png) {
  176. //// options.type = .PNG
  177. //// }
  178. //
  179. //// converter.convert(toFilePath: outputFilePath, pageIndexs: pages, options: options)
  180. //// return
  181. //// }
  182. //
  183. fpPDFConverter = CPDFConverterFP()
  184. fpPDFConverter.setDelegate(self)
  185. var dpi: Int = 0
  186. if self.options != nil {
  187. dpi = self.options[KMPDFConvertOptionsKeyImageDPI] as! Int
  188. }
  189. let options: [String:Any] = [CPDFConvertOptionsKey.imageDPI.rawValue:dpi,CPDFConvertOptionsKey.allInOneSheet.rawValue:isAllInOneSheet]
  190. fpPDFConverter.convertPDF(atPath: filePath, pdfPassword: self.password, pdfPageIndexs: pages, destDocType: pathExtension, destDocPath: outputFilePath, moreOptions: options)
  191. }
  192. func convertSuccessful(isSuccessful: Bool, errorInfo: Error!) {
  193. self.isSuccessful = isSuccessful
  194. self.errorInfo = errorInfo
  195. DispatchQueue.main.async { [self] in
  196. guard let callbackBlock = callback else {
  197. return
  198. }
  199. callbackBlock(isSuccessful, errorInfo)
  200. }
  201. willChangeValue(forKey: "isFinished")
  202. isCompletion = true
  203. didChangeValue(forKey: "isFinished")
  204. }
  205. override var isFinished: Bool {
  206. return self.isCompletion
  207. }
  208. }
  209. extension KMPDFConvert: CPDFConverterDelegate {
  210. func converter(_ converter: CPDFConverter!, didStartConvert error: Error!) {
  211. }
  212. func converter(_ converter: CPDFConverter!, didEndConvert error: Error!) {
  213. if (error != nil) {
  214. convertSuccessful(isSuccessful: false, errorInfo: error)
  215. } else {
  216. convertSuccessful(isSuccessful: true, errorInfo: error)
  217. }
  218. }
  219. func converter(_ converter: CPDFConverter!, pageIndex index: UInt, pageCount count: UInt) {
  220. guard let callback = progress else {
  221. return
  222. }
  223. callback(Int(index))
  224. }
  225. }
  226. extension KMPDFConvert: CPDFConverterFPDelegate {
  227. func fppdfConverter(_ converter: Any!, didEndConversion error: Error!) {
  228. if (error != nil) {
  229. convertSuccessful(isSuccessful: false, errorInfo: error)
  230. } else {
  231. convertSuccessful(isSuccessful: true, errorInfo: error)
  232. }
  233. }
  234. func fppdfConverter(_ converter: Any!, convertPDFPageIndex pdfPageIndexA: UInt, writeWordPageIndex wordPageIndexA: UInt, finshedWordPageCount wordPageCountA: UInt) {
  235. guard let callback = progress else {
  236. return
  237. }
  238. callback(Int(wordPageIndexA))
  239. }
  240. }
  241. // MARK: - PDF 转 Word
  242. class KMPDFConvertWord: KMPDFConvert {
  243. // 框排 | 流排 [默认流排]
  244. var layoutOptions: LayoutOptions = .RetainFlowingText
  245. override init() {
  246. super.init()
  247. self.convertType = .word
  248. }
  249. override func startConvert() {
  250. if self.pathExtension.isEmpty {
  251. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  252. return
  253. }
  254. if (self.layoutOptions == .RetainFlowingText) { // 流排
  255. self.converter = CPDFConverterWord(url: URL(fileURLWithPath: filePath), password: self.password)
  256. self.converter.delegate = self
  257. let options = CPDFConvertWordOptions()
  258. options.layoutOptions = self.layoutOptions
  259. options.isContainAnnotations = self.isContainAnnotations
  260. options.isAllowOCR = self.isAllowOCR
  261. if (self.isAllowOCR) {
  262. options.isContainOCRBgImage = self.isContainOCRBgImage
  263. if let language = self.ocrLanguage {
  264. options.language = language
  265. } else {
  266. options.language = .english
  267. }
  268. } else {
  269. options.isContainImages = true
  270. options.isContainOCRBgImage = false
  271. }
  272. self.converter.convert(toFilePath: outputFilePath, pageIndexs: pages, options: options)
  273. return
  274. }
  275. // 框排
  276. self.fpPDFConverter = CPDFConverterFP()
  277. self.fpPDFConverter.setDelegate(self)
  278. let options: [String : Any] = [CPDFConvertOptionsKey.allInOneSheet.rawValue : false]
  279. self.fpPDFConverter.convertPDF(atPath: self.filePath, pdfPassword: self.password, pdfPageIndexs: self.pages, destDocType: self.pathExtension, destDocPath: self.outputFilePath, moreOptions: options)
  280. }
  281. }
  282. // MARK: - PDF 转 Image
  283. class KMPDFConvertImage: KMPDFConvert {
  284. var imageType: CPDFConvertImgType = .JPEG
  285. var imageDpi: Int = 150
  286. override func startConvert() {
  287. if self.pathExtension.isEmpty {
  288. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  289. return
  290. }
  291. // if (convertType == .jpeg || convertType == .png) {
  292. // converter = CPDFConverterImg(url: URL(fileURLWithPath: filePath), password: nil)
  293. // converter.delegate = self
  294. // let options = CPDFConvertImgOptions()
  295. // if (convertType == .jpeg) {
  296. // options.type = .JPEG
  297. // } else if (convertType == .png) {
  298. // options.type = .PNG
  299. // }
  300. // converter.convert(toFilePath: outputFilePath, pageIndexs: pages, options: options)
  301. // return
  302. // }
  303. self.fpPDFConverter = CPDFConverterFP()
  304. self.fpPDFConverter.setDelegate(self)
  305. var dpi: Int = 0
  306. if self.options != nil {
  307. dpi = self.options[KMPDFConvertOptionsKeyImageDPI] as! Int
  308. }
  309. let options: [String : Any] = [CPDFConvertOptionsKey.imageDPI.rawValue : dpi,
  310. CPDFConvertOptionsKey.allInOneSheet.rawValue : self.isAllInOneSheet]
  311. self.fpPDFConverter.convertPDF(atPath: self.filePath, pdfPassword: self.password, pdfPageIndexs: self.pages, destDocType: self.pathExtension, destDocPath: self.outputFilePath, moreOptions: options)
  312. }
  313. }
  314. // MARK: - PDF 转 PPT
  315. class KMPDFConvertPPT: KMPDFConvert {
  316. override init() {
  317. super.init()
  318. self.convertType = .ppt
  319. }
  320. override func startConvert() {
  321. if self.pathExtension.isEmpty {
  322. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  323. return
  324. }
  325. let options = CPDFConvertPPTOptions()
  326. options.isContainAnnotations = self.isContainAnnotations
  327. options.isAllowOCR = self.isAllowOCR
  328. if (self.isAllowOCR) {
  329. options.isContainOCRBgImage = self.isContainOCRBgImage
  330. if let language = self.ocrLanguage {
  331. options.language = language
  332. } else {
  333. options.language = .english
  334. }
  335. } else {
  336. options.isContainImages = true
  337. options.isContainOCRBgImage = false
  338. }
  339. self.converter = CPDFConverterPPT(url: URL(fileURLWithPath: self.filePath), password: self.password)
  340. self.converter.delegate = self
  341. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
  342. }
  343. }
  344. // MARK: - PDF 转 RTF
  345. class KMPDFConvertRTF: KMPDFConvert {
  346. override init() {
  347. super.init()
  348. self.convertType = .rtf
  349. }
  350. override func startConvert() {
  351. if self.pathExtension.isEmpty {
  352. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  353. return
  354. }
  355. let options = CPDFConvertRtfOptions()
  356. options.isContainAnnotations = self.isContainAnnotations
  357. options.isAllowOCR = self.isAllowOCR
  358. if (self.isAllowOCR) {
  359. options.isContainOCRBgImage = self.isContainOCRBgImage
  360. if let language = self.ocrLanguage {
  361. options.language = language
  362. } else {
  363. options.language = .english
  364. }
  365. } else {
  366. options.isContainImages = true
  367. options.isContainOCRBgImage = false
  368. }
  369. self.converter = CPDFConverterRtf(url: URL(fileURLWithPath: self.filePath), password: self.password)
  370. self.converter.delegate = self
  371. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
  372. }
  373. }
  374. // MARK: - PDF 转 HTML
  375. class KMPDFConvertHTML: KMPDFConvert {
  376. override init() {
  377. super.init()
  378. self.convertType = .html
  379. }
  380. override func startConvert() {
  381. if self.pathExtension.isEmpty {
  382. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  383. return
  384. }
  385. let options = CPDFConvertHtmlOptions()
  386. options.isAllowOCR = self.isAllowOCR
  387. if (self.isAllowOCR) {
  388. if let language = self.ocrLanguage {
  389. options.language = language
  390. } else {
  391. options.language = .english
  392. }
  393. }
  394. self.converter = CPDFConverterHtml(url: URL(fileURLWithPath: self.filePath), password: self.password)
  395. self.converter.delegate = self
  396. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
  397. }
  398. }
  399. // MARK: - PDF 转 Text
  400. class KMPDFConvertText: KMPDFConvert {
  401. override init() {
  402. super.init()
  403. self.convertType = .text
  404. }
  405. override func startConvert() {
  406. if self.pathExtension.isEmpty {
  407. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  408. return
  409. }
  410. let options = CPDFConvertTxtOptions()
  411. options.isAllowOCR = self.isAllowOCR
  412. if (self.isAllowOCR) {
  413. if let language = self.ocrLanguage {
  414. options.language = language
  415. } else {
  416. options.language = .english
  417. }
  418. }
  419. self.converter = CPDFConverterTxt(url: URL(fileURLWithPath: self.filePath), password: self.password)
  420. self.converter.delegate = self
  421. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
  422. }
  423. }
  424. // MARK: - PDF 转 CSV
  425. class KMPDFConvertCSV: KMPDFConvert {
  426. override init() {
  427. super.init()
  428. self.convertType = .csv
  429. }
  430. override func startConvert() {
  431. if self.pathExtension.isEmpty {
  432. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  433. return
  434. }
  435. if (self.convertType == .csv && self.isExtractTable) {
  436. self.converter = CPDFConverterCsv(url: URL(fileURLWithPath: self.filePath), password: self.password)
  437. self.converter.delegate = self
  438. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: nil)
  439. return
  440. }
  441. self.fpPDFConverter = CPDFConverterFP()
  442. self.fpPDFConverter.setDelegate(self)
  443. let options: [String : Any] = [CPDFConvertOptionsKey.allInOneSheet.rawValue : self.isAllInOneSheet]
  444. self.fpPDFConverter.convertPDF(atPath: self.filePath, pdfPassword: self.password, pdfPageIndexs: self.pages, destDocType: self.pathExtension, destDocPath: self.outputFilePath, moreOptions: options)
  445. }
  446. }
  447. // MARK: - PDF 转 Excel
  448. class KMPDFConvertExcel: KMPDFConvert {
  449. override init() {
  450. super.init()
  451. self.convertType = .excel
  452. }
  453. override func startConvert() {
  454. if (self.pathExtension.isEmpty) {
  455. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  456. return
  457. }
  458. self.converter = CPDFConverterExcel(url: URL(fileURLWithPath: self.filePath), password: self.password)
  459. self.converter.delegate = self
  460. let options = CPDFConvertExcelOptions()
  461. options.isContainAnnotations = self.isContainAnnotations
  462. options.isAllowOCR = self.isAllowOCR
  463. if (self.isAllowOCR) {
  464. options.isContainOCRBgImage = self.isContainOCRBgImage
  465. if let language = self.ocrLanguage {
  466. options.language = language
  467. } else {
  468. options.language = .english
  469. }
  470. } else {
  471. options.isContainImages = true
  472. options.isContainOCRBgImage = false
  473. }
  474. if (self.isExtractText) {
  475. options.contentOptions = .onlyText
  476. } else if (self.isExtractTable) {
  477. options.contentOptions = .onlyTable
  478. if (self.extractTableIndex == 0) {
  479. options.worksheetOptions = .forEachTable
  480. } else if (self.extractTableIndex == 1) {
  481. options.worksheetOptions = .forEachPage
  482. } else if (self.extractTableIndex == 2) {
  483. options.worksheetOptions = .forTheDocument
  484. }
  485. } else {
  486. options.contentOptions = .allContent
  487. if (self.isAllInOneSheet) {
  488. options.worksheetOptions = .forTheDocument
  489. } else {
  490. options.worksheetOptions = .forEachPage
  491. }
  492. }
  493. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
  494. }
  495. }