KMPDFConvert.swift 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584
  1. //
  2. // KMPDFConvert.swift
  3. // PDF Master
  4. //
  5. // Created by tangchao on 2022/12/7.
  6. //
  7. import Cocoa
  8. import PDFKit
  9. import ComPDFKit_Conversion
  10. let KMPDFConvertOptionsKeyImageDPI = "KMPDFConvertOptionsKeyImageDPI"
  11. let KMPDFConvertOptionsKeyImageWithAnnotation = "KMPDFConvertOptionsKeyImageWithAnnotation"
  12. enum KMPDFConvertType: Int {
  13. case word = 0
  14. case excel = 1
  15. case ppt = 2
  16. case rtf = 3
  17. case csv = 4
  18. case html = 5
  19. case text = 6
  20. case jpeg = 7
  21. case jpg = 8
  22. case png = 9
  23. case gif = 10
  24. case tiff = 11
  25. case tga = 12
  26. case bmp = 13
  27. case jp2 = 14
  28. static let image: KMPDFConvertType = .jpeg
  29. }
  30. typealias KMPDFConvertCallback = (_ finished: Bool, _ error: Error?) -> ()
  31. typealias KMPDFConvertProgress = (Int) -> ()
  32. class KMPDFConvert: Operation {
  33. var type: Int = 0
  34. var filePath: String = ""
  35. var password: String = ""
  36. var outputFileName: String = ""
  37. var outputFolderPath: String = ""
  38. var pages: [Int]!
  39. var convertType: KMPDFConvertType = .word
  40. var options: [String:Any]!
  41. var outputFilePath: String = ""
  42. var isSuccessful: Bool = false
  43. var isAllInOneSheet: Bool = false
  44. var isExtractTable: Bool = false
  45. var isExtractText: Bool = false
  46. /**
  47. 0 支持一个表格提取到单独的工作表
  48. 1 支持按页面提取表格到单独的工作表
  49. 2 支持将所有表格提取到一个工作表
  50. */
  51. var extractTableIndex: Int = 0
  52. var errorInfo: Error!
  53. // 是否使用OCR
  54. var isAllowOCR = false
  55. var ocrLanguage: COCRLanguage?
  56. var isContainOCRBgImage = true
  57. var isContainAnnotations = true
  58. var isContainImages = true
  59. fileprivate var pathExtension: String = ""
  60. fileprivate var fpPDFConverter: CPDFConverterFP!
  61. fileprivate var converter: CPDFConverter!
  62. private var isCompletion: Bool = false
  63. var callback: KMPDFConvertCallback!
  64. var progress: KMPDFConvertProgress?
  65. var excelWorksheetOption: CPDFConvertExcelWorksheetOptions?
  66. var excelContentOption: CPDFConvertExcelContentOptions?
  67. public class func pathExtension(_ type: KMPDFConvertType) -> String {
  68. return self.pathExtension(type, nil)
  69. }
  70. public class func pathExtension(_ type: KMPDFConvertType, _ isExtractTable: Bool?) -> String {
  71. if type == .word {
  72. return "docx"
  73. } else if type == .excel {
  74. return "xlsx"
  75. } else if type == .ppt {
  76. return "pptx"
  77. } else if type == .rtf {
  78. return "rtf"
  79. } else if type == .csv {
  80. if isExtractTable != nil && isExtractTable! {
  81. return "zip"
  82. }
  83. return "csv"
  84. } else if type == .html {
  85. return "html"
  86. } else if type == .text {
  87. return "txt"
  88. } else if type == .jpeg {
  89. return "jpeg"
  90. } else if type == .jpg {
  91. return "jpg"
  92. } else if type == .png {
  93. return "png"
  94. } else if type == .gif {
  95. return "gif"
  96. } else if type == .tga {
  97. return "tga"
  98. } else if type == .bmp {
  99. return "bmp"
  100. } else if type == .jp2 {
  101. return "jp2"
  102. } else if type == .tiff {
  103. return "tiff"
  104. }
  105. return ""
  106. }
  107. override func start() {
  108. if isCancelled {
  109. return
  110. }
  111. let pathExtension = KMPDFConvert.pathExtension(self.convertType, self.isExtractTable)
  112. var fileName = outputFileName
  113. var path = outputFolderPath
  114. if convertType == .jpeg || convertType == .jpg || convertType == .png || convertType == .gif || convertType == .tga || convertType == .bmp || convertType == .jp2 || convertType == .tiff {
  115. if (self.convertType == .jpeg || self.convertType == .png) {
  116. self.outputFilePath = "\(path)/\(fileName).zip"
  117. } else {
  118. path.append("/")
  119. path.append(fileName)
  120. // let folderPath = getUniqueFilePath(filePath: path)
  121. try?FileManager.default.createDirectory(atPath: path, withIntermediateDirectories: false)
  122. outputFilePath = path
  123. }
  124. } else {
  125. if !pathExtension.isEmpty {
  126. fileName.append(".")
  127. fileName.append(pathExtension)
  128. path.append("/")
  129. path.append(fileName)
  130. // let folderPath = getUniqueFilePath(filePath: path)
  131. outputFilePath = path
  132. } else {
  133. outputFolderPath.append("/")
  134. outputFolderPath.append(outputFileName)
  135. outputFilePath = outputFolderPath
  136. }
  137. }
  138. self.pathExtension = pathExtension
  139. self.startConvert()
  140. }
  141. func getUniqueFilePath(filePath: String) -> String {
  142. var i: Int = 0
  143. var isDirectory: ObjCBool = false
  144. var uniqueFilePath = filePath
  145. let fileManager = FileManager.default
  146. fileManager.fileExists(atPath: uniqueFilePath, isDirectory: &isDirectory)
  147. if isDirectory.boolValue {
  148. var path: String = ""
  149. while fileManager.fileExists(atPath: uniqueFilePath) {
  150. i += 1
  151. path = filePath
  152. path.append("(\(i))")
  153. uniqueFilePath = path
  154. }
  155. } else {
  156. let fileURL = URL(fileURLWithPath: filePath)
  157. var path: String = ""
  158. while fileManager.fileExists(atPath: uniqueFilePath) {
  159. i += 1
  160. path = fileURL.deletingPathExtension().path
  161. path.append("(\(i))")
  162. path.append(".")
  163. path.append(fileURL.pathExtension)
  164. uniqueFilePath = path
  165. }
  166. }
  167. return uniqueFilePath
  168. }
  169. func startConvert() {
  170. if pathExtension.isEmpty {
  171. convertSuccessful(isSuccessful: false, errorInfo: nil)
  172. return
  173. }
  174. //
  175. //// if (convertType == .jpeg || convertType == .png) {
  176. //// converter = CPDFConverterImg(url: URL(fileURLWithPath: filePath), password: nil)
  177. //// converter.delegate = self
  178. //// let options = CPDFConvertImgOptions()
  179. //// if (convertType == .jpeg) {
  180. //// options.type = .JPEG
  181. //// } else if (convertType == .png) {
  182. //// options.type = .PNG
  183. //// }
  184. //
  185. //// converter.convert(toFilePath: outputFilePath, pageIndexs: pages, options: options)
  186. //// return
  187. //// }
  188. //
  189. fpPDFConverter = CPDFConverterFP()
  190. fpPDFConverter.setDelegate(self)
  191. var dpi: Int = 0
  192. if self.options != nil {
  193. dpi = self.options[KMPDFConvertOptionsKeyImageDPI] as! Int
  194. }
  195. let options: [String:Any] = [CPDFConvertOptionsKey.imageDPI.rawValue:dpi,CPDFConvertOptionsKey.allInOneSheet.rawValue:isAllInOneSheet]
  196. fpPDFConverter.convertPDF(atPath: filePath, pdfPassword: self.password, pdfPageIndexs: pages, destDocType: pathExtension, destDocPath: outputFilePath, moreOptions: options)
  197. }
  198. func convertSuccessful(isSuccessful: Bool, errorInfo: Error!) {
  199. self.isSuccessful = isSuccessful
  200. self.errorInfo = errorInfo
  201. DispatchQueue.main.async { [self] in
  202. guard let callbackBlock = callback else {
  203. return
  204. }
  205. callbackBlock(isSuccessful, errorInfo)
  206. }
  207. willChangeValue(forKey: "isFinished")
  208. isCompletion = true
  209. didChangeValue(forKey: "isFinished")
  210. }
  211. override var isFinished: Bool {
  212. return self.isCompletion
  213. }
  214. }
  215. extension KMPDFConvert: CPDFConverterDelegate {
  216. func converter(_ converter: CPDFConverter!, didStartConvert error: Error!) {
  217. }
  218. func converter(_ converter: CPDFConverter!, didEndConvert error: Error!) {
  219. if (error != nil) {
  220. convertSuccessful(isSuccessful: false, errorInfo: error)
  221. } else {
  222. convertSuccessful(isSuccessful: true, errorInfo: error)
  223. }
  224. }
  225. func converter(_ converter: CPDFConverter!, pageIndex index: UInt, pageCount count: UInt) {
  226. guard let callback = progress else {
  227. return
  228. }
  229. callback(Int(index))
  230. }
  231. }
  232. extension KMPDFConvert: CPDFConverterFPDelegate {
  233. func fppdfConverter(_ converter: Any!, didEndConversion error: Error!) {
  234. if (error != nil) {
  235. convertSuccessful(isSuccessful: false, errorInfo: error)
  236. } else {
  237. convertSuccessful(isSuccessful: true, errorInfo: error)
  238. }
  239. }
  240. func fppdfConverter(_ converter: Any!, convertPDFPageIndex pdfPageIndexA: UInt, writeWordPageIndex wordPageIndexA: UInt, finshedWordPageCount wordPageCountA: UInt) {
  241. guard let callback = progress else {
  242. return
  243. }
  244. callback(Int(wordPageIndexA))
  245. }
  246. }
  247. // MARK: - PDF 转 Word
  248. class KMPDFConvertWord: KMPDFConvert {
  249. // 框排 | 流排 [默认流排]
  250. var layoutOptions: CPDFConvertLayoutOptions = .retainFlowingText
  251. override init() {
  252. super.init()
  253. self.convertType = .word
  254. }
  255. override func startConvert() {
  256. if self.pathExtension.isEmpty {
  257. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  258. return
  259. }
  260. self.converter = CPDFConverterWord(url: URL(fileURLWithPath: self.filePath), password: self.password)
  261. self.converter.delegate = self
  262. let options = CPDFConvertWordOptions()
  263. options.layoutOptions = self.layoutOptions
  264. options.isContainAnnotations = self.isContainAnnotations
  265. options.isAllowOCR = self.isAllowOCR
  266. if (self.isAllowOCR) {
  267. options.isContainOCRBgImage = self.isContainOCRBgImage
  268. if let language = self.ocrLanguage {
  269. options.language = language
  270. } else {
  271. options.language = .english
  272. }
  273. } else {
  274. options.isContainImages = true
  275. options.isContainOCRBgImage = false
  276. }
  277. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
  278. }
  279. }
  280. // MARK: - PDF 转 Image
  281. class KMPDFConvertImage: KMPDFConvert {
  282. var imageType: CPDFConvertImgType = .JPEG
  283. var imageDpi: Int = 150
  284. override func startConvert() {
  285. if self.pathExtension.isEmpty {
  286. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  287. return
  288. }
  289. if (self.convertType == .jpeg || self.convertType == .png) {
  290. self.converter = CPDFConverterImg(url: URL(fileURLWithPath: self.filePath), password: self.password)
  291. self.converter.delegate = self
  292. let options = CPDFConvertImgOptions()
  293. options.type = self.imageType
  294. options.imageDpi = Int32(self.imageDpi)
  295. options.isContainAnnotations = true
  296. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
  297. return
  298. }
  299. self.fpPDFConverter = CPDFConverterFP()
  300. self.fpPDFConverter.setDelegate(self)
  301. let options: [String : Any] = [CPDFConvertOptionsKey.imageDPI.rawValue : self.imageDpi]
  302. self.fpPDFConverter.convertPDF(atPath: self.filePath, pdfPassword: self.password, pdfPageIndexs: self.pages, destDocType: self.pathExtension, destDocPath: self.outputFilePath, moreOptions: options)
  303. }
  304. }
  305. // MARK: - PDF 转 PPT
  306. class KMPDFConvertPPT: KMPDFConvert {
  307. override init() {
  308. super.init()
  309. self.convertType = .ppt
  310. }
  311. override func startConvert() {
  312. if self.pathExtension.isEmpty {
  313. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  314. return
  315. }
  316. let options = CPDFConvertPPTOptions()
  317. options.isContainAnnotations = self.isContainAnnotations
  318. options.isAllowOCR = self.isAllowOCR
  319. if (self.isAllowOCR) {
  320. options.isContainOCRBgImage = self.isContainOCRBgImage
  321. if let language = self.ocrLanguage {
  322. options.language = language
  323. } else {
  324. options.language = .english
  325. }
  326. } else {
  327. options.isContainImages = true
  328. options.isContainOCRBgImage = false
  329. }
  330. self.converter = CPDFConverterPPT(url: URL(fileURLWithPath: self.filePath), password: self.password)
  331. self.converter.delegate = self
  332. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
  333. }
  334. }
  335. // MARK: - PDF 转 RTF
  336. class KMPDFConvertRTF: KMPDFConvert {
  337. override init() {
  338. super.init()
  339. self.convertType = .rtf
  340. }
  341. override func startConvert() {
  342. if self.pathExtension.isEmpty {
  343. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  344. return
  345. }
  346. let options = CPDFConvertRtfOptions()
  347. options.isContainAnnotations = self.isContainAnnotations
  348. options.isAllowOCR = self.isAllowOCR
  349. if (self.isAllowOCR) {
  350. options.isContainOCRBgImage = self.isContainOCRBgImage
  351. if let language = self.ocrLanguage {
  352. options.language = language
  353. } else {
  354. options.language = .english
  355. }
  356. } else {
  357. options.isContainImages = true
  358. options.isContainOCRBgImage = false
  359. }
  360. self.converter = CPDFConverterRtf(url: URL(fileURLWithPath: self.filePath), password: self.password)
  361. self.converter.delegate = self
  362. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
  363. }
  364. }
  365. // MARK: - PDF 转 HTML
  366. class KMPDFConvertHTML: KMPDFConvert {
  367. override init() {
  368. super.init()
  369. self.convertType = .html
  370. }
  371. override func startConvert() {
  372. if self.pathExtension.isEmpty {
  373. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  374. return
  375. }
  376. let options = CPDFConvertHtmlOptions()
  377. options.isContainAnnotations = self.isContainAnnotations
  378. options.isAllowOCR = self.isAllowOCR
  379. if (self.isAllowOCR) {
  380. options.isContainOCRBgImage = self.isContainOCRBgImage
  381. if let language = self.ocrLanguage {
  382. options.language = language
  383. } else {
  384. options.language = .english
  385. }
  386. } else {
  387. options.isContainImages = self.isContainImages
  388. options.isContainOCRBgImage = false
  389. }
  390. self.converter = CPDFConverterHtml(url: URL(fileURLWithPath: self.filePath), password: self.password)
  391. self.converter.delegate = self
  392. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
  393. }
  394. }
  395. // MARK: - PDF 转 Text
  396. class KMPDFConvertText: KMPDFConvert {
  397. override init() {
  398. super.init()
  399. self.convertType = .text
  400. }
  401. override func startConvert() {
  402. if self.pathExtension.isEmpty {
  403. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  404. return
  405. }
  406. let options = CPDFConvertTxtOptions()
  407. options.isAllowOCR = self.isAllowOCR
  408. if (self.isAllowOCR) {
  409. if let language = self.ocrLanguage {
  410. options.language = language
  411. } else {
  412. options.language = .english
  413. }
  414. }
  415. self.converter = CPDFConverterTxt(url: URL(fileURLWithPath: self.filePath), password: self.password)
  416. self.converter.delegate = self
  417. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
  418. }
  419. }
  420. // MARK: - PDF 转 CSV
  421. class KMPDFConvertCSV: KMPDFConvert {
  422. override init() {
  423. super.init()
  424. self.convertType = .csv
  425. }
  426. override func startConvert() {
  427. if self.pathExtension.isEmpty {
  428. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  429. return
  430. }
  431. if (self.convertType == .csv && self.isExtractTable) {
  432. self.converter = CPDFConverterCsv(url: URL(fileURLWithPath: self.filePath), password: self.password)
  433. self.converter.delegate = self
  434. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: nil)
  435. return
  436. }
  437. self.fpPDFConverter = CPDFConverterFP()
  438. self.fpPDFConverter.setDelegate(self)
  439. let options: [String : Any] = [CPDFConvertOptionsKey.allInOneSheet.rawValue : self.isAllInOneSheet]
  440. self.fpPDFConverter.convertPDF(atPath: self.filePath, pdfPassword: self.password, pdfPageIndexs: self.pages, destDocType: self.pathExtension, destDocPath: self.outputFilePath, moreOptions: options)
  441. }
  442. }
  443. // MARK: - PDF 转 Excel
  444. class KMPDFConvertExcel: KMPDFConvert {
  445. override init() {
  446. super.init()
  447. self.convertType = .excel
  448. }
  449. override func startConvert() {
  450. if (self.pathExtension.isEmpty) {
  451. self.convertSuccessful(isSuccessful: false, errorInfo: nil)
  452. return
  453. }
  454. self.converter = CPDFConverterExcel(url: URL(fileURLWithPath: self.filePath), password: self.password)
  455. self.converter.delegate = self
  456. let options = CPDFConvertExcelOptions()
  457. options.isContainAnnotations = self.isContainAnnotations
  458. options.isAllowOCR = self.isAllowOCR
  459. if (self.isAllowOCR) {
  460. options.isContainOCRBgImage = self.isContainOCRBgImage
  461. if let language = self.ocrLanguage {
  462. options.language = language
  463. } else {
  464. options.language = .english
  465. }
  466. } else {
  467. options.isContainImages = true
  468. options.isContainOCRBgImage = false
  469. }
  470. if (self.isExtractText) {
  471. options.contentOptions = .onlyText
  472. } else if (self.isExtractTable) {
  473. options.contentOptions = .onlyTable
  474. if (self.extractTableIndex == 0) {
  475. options.worksheetOptions = .forEachTable
  476. } else if (self.extractTableIndex == 1) {
  477. options.worksheetOptions = .forEachPage
  478. } else if (self.extractTableIndex == 2) {
  479. options.worksheetOptions = .forTheDocument
  480. }
  481. } else {
  482. options.contentOptions = .allContent
  483. if (self.isAllInOneSheet) {
  484. options.worksheetOptions = .forTheDocument
  485. } else {
  486. options.worksheetOptions = .forEachPage
  487. }
  488. }
  489. self.converter.convert(toFilePath: self.outputFilePath, pageIndexs: self.pages, options: options)
  490. }
  491. }