KMGOCRManagerNew.swift 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352
  1. //
  2. // KMGOCRManagerNew.swift
  3. // PDF Reader Pro
  4. //
  5. // Created by liujiajie on 2023/11/15.
  6. //
  7. import Foundation
  8. import Cocoa
  9. import Vision
  10. let KMGOCRLanguageCodeKey = "KMGOCRLanguageCodeKey"
  11. let KMGOCRLanguageStringKey = "KMGOCRLanguageStringKey"
  12. let KMImageScale = 4.0
  13. //@objc enum KMOCRType: Int {
  14. // case Google = 0
  15. // case Apple
  16. //}
  17. @objc(KMGOCRManagerNewDelegate)
  18. protocol KMGOCRManagerNewDelegate: AnyObject {
  19. @objc optional func GOCRManagerDidStartOCR(_ manager: KMGOCRManagerNew)
  20. @objc optional func GOCRManagerDidFinishOCR(_ manager: KMGOCRManagerNew)
  21. @objc optional func GOCRManagerDidCancel(_ manager:KMGOCRManagerNew, atIndex:Int)
  22. @objc optional func GOCRManagerDidStart(_ manager:KMGOCRManagerNew, atIndex:Int)
  23. @objc optional func GOCRManagerDidFinish(_ manager:KMGOCRManagerNew, atIndex:Int, results: [Any])
  24. @objc optional func GOCRManagerDidFail(_ manager:KMGOCRManagerNew, atIndex:Int, error: Error?)
  25. }
  26. //class KMOCROperationQueue: OperationQueue{
  27. // static let sharedInstance: KMOCROperationQueue = {
  28. // let queue = KMOCROperationQueue()
  29. // return queue
  30. // }()
  31. //
  32. // func addOCROperation(op: Operation) {
  33. // self.addOperation(op)
  34. // }
  35. // func cancelAll() {
  36. // self.cancelAllOperations()
  37. // }
  38. //
  39. //}
  40. @objcMembers class KMGOCRManagerNew: NSObject, KMGOCROperationDelegate{
  41. var delegate: KMGOCRManagerNewDelegate?
  42. var images: Array<Any>?
  43. var OCRType: KMOCRType = .apple
  44. var selectedLanguages: Array<Any>?
  45. var isOCR = false
  46. var languages: Array<String>?
  47. var fileType: String = ""
  48. var ocrPath: Array<Any>?
  49. var filePath: URL?
  50. var finishIndex: Int = 0
  51. var appleRequest: VNRecognizeTextRequest?
  52. var appleRecognitionMode: VNRequestTextRecognitionLevel?
  53. override init() {
  54. super.init()
  55. }
  56. static let defaultManager: KMGOCRManagerNew = {
  57. let manager = KMGOCRManagerNew()
  58. return manager
  59. }()
  60. /*class func languages() -> [[String: Any]] {
  61. if KMGOCRManagerNew.defaultManager.OCRType == .Google {
  62. return [[KMGOCRLanguageCodeKey: "af", KMGOCRLanguageStringKey: "Afrikaans"],
  63. [KMGOCRLanguageCodeKey: "sq", KMGOCRLanguageStringKey: "Albanian"],
  64. [KMGOCRLanguageCodeKey: "ar", KMGOCRLanguageStringKey: "Arabic"],
  65. [KMGOCRLanguageCodeKey: "hy", KMGOCRLanguageStringKey: "Armenian"],
  66. [KMGOCRLanguageCodeKey: "az", KMGOCRLanguageStringKey: "Azerbaijani"],
  67. [KMGOCRLanguageCodeKey: "eu", KMGOCRLanguageStringKey: "Basque"],
  68. [KMGOCRLanguageCodeKey: "be", KMGOCRLanguageStringKey: "Belarusian"],
  69. [KMGOCRLanguageCodeKey: "bn", KMGOCRLanguageStringKey: "Bengali"],
  70. [KMGOCRLanguageCodeKey: "bs", KMGOCRLanguageStringKey: "Bosnian"],
  71. [KMGOCRLanguageCodeKey: "bg", KMGOCRLanguageStringKey: "Bulgarian"],
  72. [KMGOCRLanguageCodeKey: "ca", KMGOCRLanguageStringKey: "Catalan"],
  73. [KMGOCRLanguageCodeKey: "ceb", KMGOCRLanguageStringKey: "Cebuano"],
  74. [KMGOCRLanguageCodeKey: "ny", KMGOCRLanguageStringKey: "Chichewa"],
  75. [KMGOCRLanguageCodeKey: "zh-CN", KMGOCRLanguageStringKey: "Chinese Simplified"],
  76. [KMGOCRLanguageCodeKey: "zh-TW", KMGOCRLanguageStringKey: "Chinese Traditional"],
  77. [KMGOCRLanguageCodeKey: "hr", KMGOCRLanguageStringKey: "Croatian"],
  78. [KMGOCRLanguageCodeKey: "cs", KMGOCRLanguageStringKey: "Czech"],
  79. [KMGOCRLanguageCodeKey: "da", KMGOCRLanguageStringKey: "Danish"],
  80. [KMGOCRLanguageCodeKey: "nl", KMGOCRLanguageStringKey: "Dutch"],
  81. [KMGOCRLanguageCodeKey: "en", KMGOCRLanguageStringKey: "English"],
  82. [KMGOCRLanguageCodeKey: "eo", KMGOCRLanguageStringKey: "Esperanto"],
  83. [KMGOCRLanguageCodeKey: "et", KMGOCRLanguageStringKey: "Estonian"],
  84. [KMGOCRLanguageCodeKey: "tl", KMGOCRLanguageStringKey: "Filipino"],
  85. [KMGOCRLanguageCodeKey: "fi", KMGOCRLanguageStringKey: "Finnish"],
  86. [KMGOCRLanguageCodeKey: "fr", KMGOCRLanguageStringKey: "French"],
  87. [KMGOCRLanguageCodeKey: "gl", KMGOCRLanguageStringKey: "Galician"],
  88. [KMGOCRLanguageCodeKey: "ka", KMGOCRLanguageStringKey: "Georgian"],
  89. [KMGOCRLanguageCodeKey: "de", KMGOCRLanguageStringKey: "German"],
  90. [KMGOCRLanguageCodeKey: "el", KMGOCRLanguageStringKey: "Greek"],
  91. [KMGOCRLanguageCodeKey: "gu", KMGOCRLanguageStringKey: "Gujarati"],
  92. [KMGOCRLanguageCodeKey: "ht", KMGOCRLanguageStringKey: "Haitian Creole"],
  93. [KMGOCRLanguageCodeKey: "ha", KMGOCRLanguageStringKey: "Hausa"],
  94. [KMGOCRLanguageCodeKey: "iw", KMGOCRLanguageStringKey: "Hebrew"],
  95. [KMGOCRLanguageCodeKey: "hi", KMGOCRLanguageStringKey: "Hindi"],
  96. [KMGOCRLanguageCodeKey: "hmn", KMGOCRLanguageStringKey: "Hmong"],
  97. [KMGOCRLanguageCodeKey: "hu", KMGOCRLanguageStringKey: "Hungarian"],
  98. [KMGOCRLanguageCodeKey: "is", KMGOCRLanguageStringKey: "Icelandic"],
  99. [KMGOCRLanguageCodeKey: "ig", KMGOCRLanguageStringKey: "Igbo"],
  100. [KMGOCRLanguageCodeKey: "id", KMGOCRLanguageStringKey: "Indonesian"],
  101. [KMGOCRLanguageCodeKey: "ga", KMGOCRLanguageStringKey: "Irish"],
  102. [KMGOCRLanguageCodeKey: "it", KMGOCRLanguageStringKey: "Italian"],
  103. [KMGOCRLanguageCodeKey: "ja", KMGOCRLanguageStringKey: "Japanese"],
  104. [KMGOCRLanguageCodeKey: "jw", KMGOCRLanguageStringKey: "Javanese"],
  105. [KMGOCRLanguageCodeKey: "kn", KMGOCRLanguageStringKey: "Kannada"],
  106. [KMGOCRLanguageCodeKey: "kk", KMGOCRLanguageStringKey: "Kazakh"],
  107. [KMGOCRLanguageCodeKey: "km", KMGOCRLanguageStringKey: "Khmer"],
  108. [KMGOCRLanguageCodeKey: "ko", KMGOCRLanguageStringKey: "Korean"],
  109. [KMGOCRLanguageCodeKey: "lo", KMGOCRLanguageStringKey: "Lao"],
  110. [KMGOCRLanguageCodeKey: "la", KMGOCRLanguageStringKey: "Latin"],
  111. [KMGOCRLanguageCodeKey: "lv", KMGOCRLanguageStringKey: "Latvian"],
  112. [KMGOCRLanguageCodeKey: "lt", KMGOCRLanguageStringKey: "Lithuanian"],
  113. [KMGOCRLanguageCodeKey: "mk", KMGOCRLanguageStringKey: "Macedonian"],
  114. [KMGOCRLanguageCodeKey: "mg", KMGOCRLanguageStringKey: "Malagasy"],
  115. [KMGOCRLanguageCodeKey: "ms", KMGOCRLanguageStringKey: "Malay"],
  116. [KMGOCRLanguageCodeKey: "ml", KMGOCRLanguageStringKey: "Malayalam"],
  117. [KMGOCRLanguageCodeKey: "mt", KMGOCRLanguageStringKey: "Maltese"],
  118. [KMGOCRLanguageCodeKey: "mi", KMGOCRLanguageStringKey: "Maori"],
  119. [KMGOCRLanguageCodeKey: "mr", KMGOCRLanguageStringKey: "Marathi"],
  120. [KMGOCRLanguageCodeKey: "mn", KMGOCRLanguageStringKey: "Mongolian"],
  121. [KMGOCRLanguageCodeKey: "my", KMGOCRLanguageStringKey: "Myanmar (Burmese)"],
  122. [KMGOCRLanguageCodeKey: "ne", KMGOCRLanguageStringKey: "Nepali"],
  123. [KMGOCRLanguageCodeKey: "no", KMGOCRLanguageStringKey: "Norwegian"],
  124. [KMGOCRLanguageCodeKey: "fa", KMGOCRLanguageStringKey: "Persian"],
  125. [KMGOCRLanguageCodeKey: "pl", KMGOCRLanguageStringKey: "Polish"],
  126. [KMGOCRLanguageCodeKey: "pt", KMGOCRLanguageStringKey: "Portuguese"],
  127. [KMGOCRLanguageCodeKey: "ma", KMGOCRLanguageStringKey: "Punjabi"],
  128. [KMGOCRLanguageCodeKey: "ro", KMGOCRLanguageStringKey: "Romanian"],
  129. [KMGOCRLanguageCodeKey: "ru", KMGOCRLanguageStringKey: "Russian"],
  130. [KMGOCRLanguageCodeKey: "sr", KMGOCRLanguageStringKey: "Serbian"],
  131. [KMGOCRLanguageCodeKey: "st", KMGOCRLanguageStringKey: "Sesotho"],
  132. [KMGOCRLanguageCodeKey: "si", KMGOCRLanguageStringKey: "Sinhala"],
  133. [KMGOCRLanguageCodeKey:"sk", KMGOCRLanguageStringKey:"Slovak"],
  134. [KMGOCRLanguageCodeKey:"sl", KMGOCRLanguageStringKey:"Slovenian"],
  135. [KMGOCRLanguageCodeKey:"so", KMGOCRLanguageStringKey:"Somali"],
  136. [KMGOCRLanguageCodeKey:"es", KMGOCRLanguageStringKey:"Spanish"],
  137. [KMGOCRLanguageCodeKey:"su", KMGOCRLanguageStringKey:"Sudanese"],
  138. [KMGOCRLanguageCodeKey:"sw", KMGOCRLanguageStringKey:"Swahili"],
  139. [KMGOCRLanguageCodeKey:"sv", KMGOCRLanguageStringKey:"Swedish"],
  140. [KMGOCRLanguageCodeKey:"tg", KMGOCRLanguageStringKey:"Tajik"],
  141. [KMGOCRLanguageCodeKey:"ta", KMGOCRLanguageStringKey:"Tamil"],
  142. [KMGOCRLanguageCodeKey:"te", KMGOCRLanguageStringKey:"Telugu"],
  143. [KMGOCRLanguageCodeKey:"th", KMGOCRLanguageStringKey:"Thai"],
  144. [KMGOCRLanguageCodeKey:"tr", KMGOCRLanguageStringKey:"Turkish"],
  145. [KMGOCRLanguageCodeKey:"uk", KMGOCRLanguageStringKey:"Ukrainian"],
  146. [KMGOCRLanguageCodeKey:"ur", KMGOCRLanguageStringKey:"Urdu"],
  147. [KMGOCRLanguageCodeKey:"uz", KMGOCRLanguageStringKey:"Uzbek"],
  148. [KMGOCRLanguageCodeKey:"vi", KMGOCRLanguageStringKey:"Vietnamese"],
  149. [KMGOCRLanguageCodeKey:"cy", KMGOCRLanguageStringKey:"Welsh"],
  150. [KMGOCRLanguageCodeKey:"yi", KMGOCRLanguageStringKey:"Yiddish"],
  151. [KMGOCRLanguageCodeKey:"yo", KMGOCRLanguageStringKey:"Yoruba"],
  152. [KMGOCRLanguageCodeKey:"zu", KMGOCRLanguageStringKey:"Zulu"]]
  153. }
  154. return [[KMGOCRLanguageCodeKey: "en-US", KMGOCRLanguageStringKey: "English"],
  155. [KMGOCRLanguageCodeKey: "fr-FR", KMGOCRLanguageStringKey: "French"],
  156. [KMGOCRLanguageCodeKey: "it-IT", KMGOCRLanguageStringKey: "Italian"],
  157. [KMGOCRLanguageCodeKey: "de-DE", KMGOCRLanguageStringKey: "German"],
  158. [KMGOCRLanguageCodeKey: "es-ES", KMGOCRLanguageStringKey: "Spanish"],
  159. [KMGOCRLanguageCodeKey: "pt-BR", KMGOCRLanguageStringKey: "Portuguese"],
  160. [KMGOCRLanguageCodeKey: "zh-Hant", KMGOCRLanguageStringKey: "Chinese Traditional"],
  161. [KMGOCRLanguageCodeKey: "zh-Hans", KMGOCRLanguageStringKey: "Chinese Simplified"]
  162. ]
  163. }
  164. func recognitionImages(_ images: [Any], withLanguages languages: [Any]) {
  165. recognitionImages(images, withLanguages: languages, fileType: nil, filePath: nil)
  166. }
  167. func recognitionImages(_ images: [Any], withLanguages languages: [Any], fileType: String?, filePath: URL?) {
  168. self.ocrPath = []
  169. self.finishIndex = 0
  170. self.fileType = "PDF"
  171. self.images = images
  172. if filePath == nil {
  173. self.filePath = URL(string: NSSearchPathForDirectoriesInDomains(.desktopDirectory, .userDomainMask, true)[0])
  174. } else {
  175. self.filePath = filePath
  176. }
  177. if self.OCRType == .Google {
  178. gocrRecognitionImages(images, withLanguages: languages)
  179. } else {
  180. if #available(macOS 10.15, *) {
  181. if appleRequest != nil {
  182. appleRequest?.cancel()
  183. appleRequest = nil
  184. }
  185. recognitionAppleImage(at: self.finishIndex)
  186. } else {
  187. self.delegate?.GOCRManagerDidFail?(self, atIndex: self.finishIndex, error: nil)
  188. }
  189. }
  190. }
  191. func gocrRecognitionImages(_ images: [Any], withLanguages languages: [Any]) {
  192. if images.isEmpty || images.count == 0 { return }
  193. self.delegate?.GOCRManagerDidStartOCR?(self)
  194. for i in 0..<images.count {
  195. self.delegate?.GOCRManagerDidStart?(self, atIndex: i)
  196. let queue = KMOCROperationQueue.sharedInstance
  197. queue.maxConcurrentOperationCount = 1
  198. var image = NSImage()
  199. if images[i] is NSImage {
  200. image = images[i] as! NSImage
  201. } else {
  202. let data = images[i] as! Data
  203. image = NSImage(data: data) ?? NSImage()
  204. }
  205. let op = KMGOCROperation(recognitionImg: image, imgIndex: i)
  206. op.selectedLanguages = NSMutableArray(array: languages) as? Array<Any>
  207. op.operationDelegate = self
  208. queue.addOperation(op)
  209. }
  210. }
  211. func recognitionAppleImage(at index: Int) {
  212. guard index < self.images?.count ?? 0 else {
  213. if #available(macOS 10.15, *) {
  214. appleRequest?.cancel()
  215. }
  216. appleRequest = nil
  217. self.delegate?.GOCRManagerDidFinishOCR?(self)
  218. return
  219. }
  220. finishIndex = index
  221. delegate?.GOCRManagerDidStart?(self, atIndex: self.finishIndex)
  222. if let image = self.images?[finishIndex] as? NSImage {
  223. recognitionAppleImage(self.images?[self.finishIndex] as! NSImage)
  224. } else if let data = self.images?[finishIndex] as? Data {
  225. let image = NSImage(data: data)
  226. recognitionAppleImage(image!)
  227. }
  228. }
  229. func recognitionAppleImage(_ image: NSImage) {
  230. DispatchQueue.global().async { [weak self] in guard let self = self else { return }
  231. self.appleRequest = VNRecognizeTextRequest { [weak self] request, error in
  232. guard let self = self else { return }
  233. var results: [KMGOCRResult]? = nil
  234. if let reqResults = request.results as? [VNRecognizedTextObservation] {
  235. results = self.responseDataRequest(request, dictionary: nil, imageSize: image.size)
  236. }
  237. var resultArray = [[String: Any]]()
  238. if let results = results {
  239. for result in results {
  240. let dic: [String: Any] = ["x": result.textBounds.origin.x,
  241. "y": result.textBounds.origin.y,
  242. "width": result.textBounds.size.width,
  243. "height": result.textBounds.size.height,
  244. "text": result.text]
  245. resultArray.append(dic)
  246. }
  247. }
  248. DispatchQueue.main.async {
  249. if let error = error {
  250. self.delegate?.GOCRManagerDidFail?(self, atIndex: self.finishIndex, error: error)
  251. } else {
  252. self.delegate?.GOCRManagerDidFinish?(self, atIndex: self.finishIndex, results: results!)
  253. }
  254. self.recognitionAppleImage(at: self.finishIndex + 1)
  255. }
  256. }
  257. self.appleRequest?.usesCPUOnly = true
  258. self.appleRequest?.recognitionLevel = self.appleRecognitionMode ?? .accurate
  259. var array: Array<String> = self.languages ?? []
  260. if array.contains("zh-Hant") {
  261. array.removeAll(where: { $0 == "zh-Hant" })
  262. array.insert("zh-Hant", at: 0)
  263. }
  264. if array.contains("zh-Hans") {
  265. array.removeAll(where: { $0 == "zh-Hans" })
  266. array.insert("zh-Hans", at: 0)
  267. }
  268. if array.isEmpty {
  269. array = ["zh-Hans", "zh-Hant"]
  270. }
  271. self.appleRequest?.recognitionLanguages = array
  272. let options = [VNImageOption: Any]()
  273. if let cgImage = self.nsImageToCGImageRef(image) {
  274. let handler = VNImageRequestHandler(cgImage: cgImage, options: options)
  275. try? handler.perform([self.appleRequest!])
  276. }
  277. }
  278. }
  279. func nsImageToCGImageRef(_ image: NSImage) -> CGImage? {
  280. guard let imageData = image.tiffRepresentation else { return nil }
  281. if let imageSource = CGImageSourceCreateWithData(imageData as CFData, nil), let imageRef = CGImageSourceCreateImageAtIndex(imageSource, 0, nil) {
  282. return imageRef
  283. }
  284. return nil
  285. }
  286. func responseDataRequest(_ request: VNRequest, dictionary: NSDictionary?, imageSize: CGSize) -> [KMGOCRResult] {
  287. var results: Array<KMGOCRResult> = Array()
  288. let maximumCandidates = 1
  289. var OCRStr = ""
  290. // if let observations = request.results as? [VNRecognizedTextObservation] {
  291. // for observation in observations {
  292. // if let text = observation.topCandidates(maximumCandidates).first {
  293. // OCRStr.append("\(text.string)\n")
  294. //
  295. // var x: CGFloat = 0, y: CGFloat = 0, width: CGFloat = 0, height: CGFloat = 0
  296. // var error: Error?
  297. // let cnt = text.string.count
  298. // let range = 0 ..< cnt
  299. //
  300. // if let rectangleObservation = text.boundingBox(for: Range(location: 0, length: text.string.count), error: &error) {
  301. // x = rectangleObservation.topLeft.x * imageSize.width
  302. // y = (1 - rectangleObservation.topLeft.y) * imageSize.height
  303. // width = rectangleObservation.boundingBox.size.width * imageSize.width
  304. // height = rectangleObservation.boundingBox.size.height * imageSize.height
  305. // }
  306. //
  307. // let result = KMGOCRResult()
  308. // result.text = text.string
  309. // result.locale = ""
  310. // result.textBounds = CGRect(x: x, y: y, width: width, height: height)
  311. // results.append(result)
  312. // }
  313. // }
  314. // // Following Google's logic, the first element of the array represents the text of the entire image
  315. // if results.count > 0 {
  316. // let result = KMGOCRResult()
  317. // result.text = OCRStr
  318. // if self.languages.count > 0 {
  319. // result.locale = self.languages[0]
  320. // }
  321. // result.textBounds = CGRect.zero
  322. // results.insert(result, at: 0)
  323. // }
  324. // }
  325. return results
  326. }
  327. */
  328. }