KMPDFConvert.swift 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581
  1. //
  2. // KMPDFConvert.swift
  3. // PDF Master
  4. //
  5. // Created by tangchao on 2022/12/7.
  6. //
  7. import Cocoa
  8. import PDFKit
  9. import ComPDFKit_Conversion
  10. let KMPDFConvertOptionsKeyImageDPI = "KMPDFConvertOptionsKeyImageDPI"
  11. let KMPDFConvertOptionsKeyImageWithAnnotation = "KMPDFConvertOptionsKeyImageWithAnnotation"
  12. enum KMPDFConvertType: Int {
  13. case word = 0
  14. case excel = 1
  15. case ppt = 2
  16. case rtf = 3
  17. case csv = 4
  18. case html = 5
  19. case text = 6
  20. case jpeg = 7
  21. case jpg = 8
  22. case png = 9
  23. case gif = 10
  24. case tiff = 11
  25. case tga = 12
  26. case bmp = 13
  27. case jp2 = 14
  28. static let image: KMPDFConvertType = .jpeg
  29. }
  30. typealias KMPDFConvertCallback = (_ finished: Bool, _ error: Error?) -> ()
  31. typealias KMPDFConvertProgress = (Int) -> ()
  32. class KMPDFConvert: Operation {
  33. var type: Int = 0
  34. var filePath: String = ""
  35. var password: String = ""
  36. var outputFileName: String = ""
  37. var outputFolderPath: String = ""
  38. var pages: [Int]!
  39. var convertType: KMPDFConvertType = .word
  40. var options: [String:Any]!
  41. var outputFilePath: String = ""
  42. var isSuccessful: Bool = false
  43. var isAllInOneSheet: Bool = false
  44. var isExtractTable: Bool = false
  45. var isExtractText: Bool = false
  46. /**
  47. 0 支持一个表格提取到单独的工作表
  48. 1 支持按页面提取表格到单独的工作表
  49. 2 支持将所有表格提取到一个工作表
  50. */
  51. var extractTableIndex: Int = 0
  52. var errorInfo: Error!
  53. // 是否使用OCR
  54. var isAllowOCR = false
  55. var ocrLanguage: COCRLanguage?
  56. var isContainOCRBgImage = true
  57. var isContainAnnotations = true
  58. var isContainImages = true
  59. fileprivate var pathExtension: String = ""
  60. fileprivate var fpPDFConverter: CPDFConverterFP!
  61. fileprivate var converter: CPDFConverter!
  62. private var isCompletion: Bool = false
  63. var callback: KMPDFConvertCallback!
  64. var progress: KMPDFConvertProgress?
  65. public class func pathExtension(_ type: KMPDFConvertType) -> String {
  66. return self.pathExtension(type, nil)
  67. }
  68. public class func pathExtension(_ type: KMPDFConvertType, _ isExtractTable: Bool?) -> String {
  69. if type == .word {
  70. return "docx"
  71. } else if type == .excel {
  72. return "xlsx"
  73. } else if type == .ppt {
  74. return "pptx"
  75. } else if type == .rtf {
  76. return "rtf"
  77. } else if type == .csv {
  78. if isExtractTable != nil && isExtractTable! {
  79. return "zip"
  80. }
  81. return "csv"
  82. } else if type == .html {
  83. return "html"
  84. } else if type == .text {
  85. return "txt"
  86. } else if type == .jpeg {
  87. return "jpeg"
  88. } else if type == .jpg {
  89. return "jpg"
  90. } else if type == .png {
  91. return "png"
  92. } else if type == .gif {
  93. return "gif"
  94. } else if type == .tga {
  95. return "tga"
  96. } else if type == .bmp {
  97. return "bmp"
  98. } else if type == .jp2 {
  99. return "jp2"
  100. } else if type == .tiff {
  101. return "tiff"
  102. }
  103. return ""
  104. }
  105. override func start() {
  106. if isCancelled {
  107. return
  108. }
  109. let pathExtension = KMPDFConvert.pathExtension(self.convertType, self.isExtractTable)
  110. var fileName = outputFileName
  111. var path = outputFolderPath
  112. if convertType == .jpeg || convertType == .jpg || convertType == .png || convertType == .gif || convertType == .tga || convertType == .bmp || convertType == .jp2 || convertType == .tiff {
  113. if (self.convertType == .jpeg || self.convertType == .png) {
  114. self.outputFilePath = "\(path)/\(fileName).zip"
  115. } else {
  116. path.append("/")
  117. path.append(fileName)
  118. // let folderPath = getUniqueFilePath(filePath: path)
  119. try?FileManager.default.createDirectory(atPath: path, withIntermediateDirectories: false)
  120. outputFilePath = path
  121. }
  122. } else {
  123. if !pathExtension.isEmpty {
  124. fileName.append(".")
  125. fileName.append(pathExtension)
  126. path.append("/")
  127. path.append(fileName)
  128. // let folderPath = getUniqueFilePath(filePath: path)
  129. outputFilePath = path
  130. } else {
  131. outputFolderPath.append("/")
  132. outputFolderPath.append(outputFileName)
  133. outputFilePath = outputFolderPath
  134. }
  135. }
  136. self.pathExtension = pathExtension
  137. self.startConvert()
  138. }
  139. func getUniqueFilePath(filePath: String) -> String {
  140. var i: Int = 0
  141. var isDirectory: ObjCBool = false
  142. var uniqueFilePath = filePath
  143. let fileManager = FileManager.default
  144. fileManager.fileExists(atPath: uniqueFilePath, isDirectory: &isDirectory)
  145. if isDirectory.boolValue {
  146. var path: String = ""
  147. while fileManager.fileExists(atPath: uniqueFilePath) {
  148. i += 1
  149. path = filePath
  150. path.append("(\(i))")
  151. uniqueFilePath = path
  152. }
  153. } else {
  154. let fileURL = URL(fileURLWithPath: filePath)
  155. var path: String = ""
  156. while fileManager.fileExists(atPath: uniqueFilePath) {
  157. i += 1
  158. path = fileURL.deletingPathExtension().path
  159. path.append("(\(i))")
  160. path.append(".")
  161. path.append(fileURL.pathExtension)
  162. uniqueFilePath = path
  163. }
  164. }
  165. return uniqueFilePath
  166. }
  167. func startConvert() {
  168. if pathExtension.isEmpty {
  169. convertSuccessful(isSuccessful: false, errorInfo: nil)
  170. return
  171. }
  172. //
  173. //// if (convertType == .jpeg || convertType == .png) {
  174. //// converter = CPDFConverterImg(url: URL(fileURLWithPath: filePath), password: nil)
  175. //// converter.delegate = self
  176. //// let options = CPDFConvertImgOptions()
  177. //// if (convertType == .jpeg) {
  178. //// options.type = .JPEG
  179. //// } else if (convertType == .png) {
  180. //// options.type = .PNG
  181. //// }
  182. //
  183. //// converter.convert(toFilePath: outputFilePath, pageIndexs: pages, options: options)
  184. //// return
  185. //// }
  186. //
  187. fpPDFConverter = CPDFConverterFP()
  188. fpPDFConverter.setDelegate(self)
  189. var dpi: Int = 0
  190. if self.options != nil {
  191. dpi = self.options[KMPDFConvertOptionsKeyImageDPI] as! Int
  192. }
  193. let options: [String:Any] = [CPDFConvertOptionsKey.imageDPI.rawValue:dpi,CPDFConvertOptionsKey.allInOneSheet.rawValue:isAllInOneSheet]
  194. fpPDFConverter.convertPDF(atPath: filePath, pdfPassword: self.password, pdfPageIndexs: pages, destDocType: pathExtension, destDocPath: outputFilePath, moreOptions: options)
  195. }
  196. func convertSuccessful(isSuccessful: Bool, errorInfo: Error!) {
  197. self.isSuccessful = isSuccessful
  198. self.errorInfo = errorInfo
  199. DispatchQueue.main.async { [self] in
  200. guard let callbackBlock = callback else {
  201. return
  202. }
  203. callbackBlock(isSuccessful, errorInfo)
  204. }
  205. willChangeValue(forKey: "isFinished")
  206. isCompletion = true
  207. didChangeValue(forKey: "isFinished")
  208. }
  209. override var isFinished: Bool {
  210. return self.isCompletion
  211. }
  212. }
  213. extension KMPDFConvert: CPDFConverterDelegate {
  214. func converter(_ converter: CPDFConverter!, didStartConvert error: Error!) {
  215. }
  216. func converter(_ converter: CPDFConverter!, didEndConvert error: Error!) {
  217. if (error != nil) {
  218. convertSuccessful(isSuccessful: false, errorInfo: error)
  219. } else {
  220. convertSuccessful(isSuccessful: true, errorInfo: error)
  221. }
  222. }
  223. func converter(_ converter: CPDFConverter!, pageIndex index: UInt, pageCount count: UInt) {
  224. guard let callback = progress else {
  225. return
  226. }
  227. callback(Int(index))
  228. }
  229. }
  230. extension KMPDFConvert: CPDFConverterFPDelegate {
  231. func fppdfConverter(_ converter: Any!, didEndConversion error: Error!) {
  232. if (error != nil) {
  233. convertSuccessful(isSuccessful: false, errorInfo: error)
  234. } else {
  235. convertSuccessful(isSuccessful: true, errorInfo: error)
  236. }
  237. }
  238. func fppdfConverter(_ converter: Any!, convertPDFPageIndex pdfPageIndexA: UInt, writeWordPageIndex wordPageIndexA: UInt, finshedWordPageCount wordPageCountA: UInt) {
  239. guard let callback = progress else {
  240. return
  241. }
  242. callback(Int(wordPageIndexA))
  243. }
  244. }
  245. // MARK: - PDF 转 Word
  246. class KMPDFConvertWord: KMPDFConvert {
  247. // 框排 | 流排 [默认流排]
  248. var layoutOptions: CPDFConvertLayoutOptions = .retainFlowingText
  249. override init() {
  250. super.init()
  251. self.convertType = .word
  252. }
  253. override func startConvert() {
  254. if self.pathExtension.isEmpty {
  255. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  256. return
  257. }
  258. self.converter = CPDFConverterWord(url: URL(fileURLWithPath: self.filePath), password: self.password)
  259. self.converter.delegate = self
  260. let options = CPDFConvertWordOptions()
  261. options.layoutOptions = self.layoutOptions
  262. options.isContainAnnotations = self.isContainAnnotations
  263. options.isAllowOCR = self.isAllowOCR
  264. if (self.isAllowOCR) {
  265. options.isContainOCRBgImage = self.isContainOCRBgImage
  266. if let language = self.ocrLanguage {
  267. options.language = language
  268. } else {
  269. options.language = .english
  270. }
  271. } else {
  272. options.isContainImages = true
  273. options.isContainOCRBgImage = false
  274. }
  275. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
  276. }
  277. }
  278. // MARK: - PDF 转 Image
  279. class KMPDFConvertImage: KMPDFConvert {
  280. var imageType: CPDFConvertImgType = .JPEG
  281. var imageDpi: Int = 150
  282. override func startConvert() {
  283. if self.pathExtension.isEmpty {
  284. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  285. return
  286. }
  287. if (self.convertType == .jpeg || self.convertType == .png) {
  288. self.converter = CPDFConverterImg(url: URL(fileURLWithPath: self.filePath), password: self.password)
  289. self.converter.delegate = self
  290. let options = CPDFConvertImgOptions()
  291. options.type = self.imageType
  292. options.imageDpi = Int32(self.imageDpi)
  293. options.isContainAnnotations = true
  294. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
  295. return
  296. }
  297. self.fpPDFConverter = CPDFConverterFP()
  298. self.fpPDFConverter.setDelegate(self)
  299. let options: [String : Any] = [CPDFConvertOptionsKey.imageDPI.rawValue : self.imageDpi]
  300. self.fpPDFConverter.convertPDF(atPath: self.filePath, pdfPassword: self.password, pdfPageIndexs: self.pages, destDocType: self.pathExtension, destDocPath: self.outputFilePath, moreOptions: options)
  301. }
  302. }
  303. // MARK: - PDF 转 PPT
  304. class KMPDFConvertPPT: KMPDFConvert {
  305. override init() {
  306. super.init()
  307. self.convertType = .ppt
  308. }
  309. override func startConvert() {
  310. if self.pathExtension.isEmpty {
  311. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  312. return
  313. }
  314. let options = CPDFConvertPPTOptions()
  315. options.isContainAnnotations = self.isContainAnnotations
  316. options.isAllowOCR = self.isAllowOCR
  317. if (self.isAllowOCR) {
  318. options.isContainOCRBgImage = self.isContainOCRBgImage
  319. if let language = self.ocrLanguage {
  320. options.language = language
  321. } else {
  322. options.language = .english
  323. }
  324. } else {
  325. options.isContainImages = true
  326. options.isContainOCRBgImage = false
  327. }
  328. self.converter = CPDFConverterPPT(url: URL(fileURLWithPath: self.filePath), password: self.password)
  329. self.converter.delegate = self
  330. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
  331. }
  332. }
  333. // MARK: - PDF 转 RTF
  334. class KMPDFConvertRTF: KMPDFConvert {
  335. override init() {
  336. super.init()
  337. self.convertType = .rtf
  338. }
  339. override func startConvert() {
  340. if self.pathExtension.isEmpty {
  341. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  342. return
  343. }
  344. let options = CPDFConvertRtfOptions()
  345. options.isContainAnnotations = self.isContainAnnotations
  346. options.isAllowOCR = self.isAllowOCR
  347. if (self.isAllowOCR) {
  348. options.isContainOCRBgImage = self.isContainOCRBgImage
  349. if let language = self.ocrLanguage {
  350. options.language = language
  351. } else {
  352. options.language = .english
  353. }
  354. } else {
  355. options.isContainImages = true
  356. options.isContainOCRBgImage = false
  357. }
  358. self.converter = CPDFConverterRtf(url: URL(fileURLWithPath: self.filePath), password: self.password)
  359. self.converter.delegate = self
  360. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
  361. }
  362. }
  363. // MARK: - PDF 转 HTML
  364. class KMPDFConvertHTML: KMPDFConvert {
  365. override init() {
  366. super.init()
  367. self.convertType = .html
  368. }
  369. override func startConvert() {
  370. if self.pathExtension.isEmpty {
  371. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  372. return
  373. }
  374. let options = CPDFConvertHtmlOptions()
  375. options.isContainAnnotations = self.isContainAnnotations
  376. options.isAllowOCR = self.isAllowOCR
  377. if (self.isAllowOCR) {
  378. options.isContainOCRBgImage = self.isContainOCRBgImage
  379. if let language = self.ocrLanguage {
  380. options.language = language
  381. } else {
  382. options.language = .english
  383. }
  384. } else {
  385. options.isContainImages = self.isContainImages
  386. options.isContainOCRBgImage = false
  387. }
  388. self.converter = CPDFConverterHtml(url: URL(fileURLWithPath: self.filePath), password: self.password)
  389. self.converter.delegate = self
  390. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
  391. }
  392. }
  393. // MARK: - PDF 转 Text
  394. class KMPDFConvertText: KMPDFConvert {
  395. override init() {
  396. super.init()
  397. self.convertType = .text
  398. }
  399. override func startConvert() {
  400. if self.pathExtension.isEmpty {
  401. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  402. return
  403. }
  404. let options = CPDFConvertTxtOptions()
  405. options.isAllowOCR = self.isAllowOCR
  406. if (self.isAllowOCR) {
  407. if let language = self.ocrLanguage {
  408. options.language = language
  409. } else {
  410. options.language = .english
  411. }
  412. }
  413. self.converter = CPDFConverterTxt(url: URL(fileURLWithPath: self.filePath), password: self.password)
  414. self.converter.delegate = self
  415. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
  416. }
  417. }
  418. // MARK: - PDF 转 CSV
  419. class KMPDFConvertCSV: KMPDFConvert {
  420. override init() {
  421. super.init()
  422. self.convertType = .csv
  423. }
  424. override func startConvert() {
  425. if self.pathExtension.isEmpty {
  426. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  427. return
  428. }
  429. if (self.convertType == .csv && self.isExtractTable) {
  430. self.converter = CPDFConverterCsv(url: URL(fileURLWithPath: self.filePath), password: self.password)
  431. self.converter.delegate = self
  432. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: nil)
  433. return
  434. }
  435. self.fpPDFConverter = CPDFConverterFP()
  436. self.fpPDFConverter.setDelegate(self)
  437. let options: [String : Any] = [CPDFConvertOptionsKey.allInOneSheet.rawValue : self.isAllInOneSheet]
  438. self.fpPDFConverter.convertPDF(atPath: self.filePath, pdfPassword: self.password, pdfPageIndexs: self.pages, destDocType: self.pathExtension, destDocPath: self.outputFilePath, moreOptions: options)
  439. }
  440. }
  441. // MARK: - PDF 转 Excel
  442. class KMPDFConvertExcel: KMPDFConvert {
  443. override init() {
  444. super.init()
  445. self.convertType = .excel
  446. }
  447. override func startConvert() {
  448. if (self.pathExtension.isEmpty) {
  449. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  450. return
  451. }
  452. self.converter = CPDFConverterExcel(url: URL(fileURLWithPath: self.filePath), password: self.password)
  453. self.converter.delegate = self
  454. let options = CPDFConvertExcelOptions()
  455. options.isContainAnnotations = self.isContainAnnotations
  456. options.isAllowOCR = self.isAllowOCR
  457. if (self.isAllowOCR) {
  458. options.isContainOCRBgImage = self.isContainOCRBgImage
  459. if let language = self.ocrLanguage {
  460. options.language = language
  461. } else {
  462. options.language = .english
  463. }
  464. } else {
  465. options.isContainImages = true
  466. options.isContainOCRBgImage = false
  467. }
  468. if (self.isExtractText) {
  469. options.contentOptions = .onlyText
  470. } else if (self.isExtractTable) {
  471. options.contentOptions = .onlyTable
  472. if (self.extractTableIndex == 0) {
  473. options.worksheetOptions = .forEachTable
  474. } else if (self.extractTableIndex == 1) {
  475. options.worksheetOptions = .forEachPage
  476. } else if (self.extractTableIndex == 2) {
  477. options.worksheetOptions = .forTheDocument
  478. }
  479. } else {
  480. options.contentOptions = .allContent
  481. if (self.isAllInOneSheet) {
  482. options.worksheetOptions = .forTheDocument
  483. } else {
  484. options.worksheetOptions = .forEachPage
  485. }
  486. }
  487. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
  488. }
  489. }