KMGOCRManagerNew.swift 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352
  1. //
  2. // KMGOCRManagerNew.swift
  3. // PDF Master
  4. //
  5. // Created by liujiajie on 2023/11/15.
  6. //
  7. import Foundation
  8. import Cocoa
  9. import Vision
  10. let KMGOCRLanguageCodeKey = "KMGOCRLanguageCodeKey"
  11. let KMGOCRLanguageStringKey = "KMGOCRLanguageStringKey"
  12. let KMImageScale = 4.0
  13. @objc enum KMOCRType: Int {
  14. case Google = 0
  15. case Apple
  16. }
  17. @objc(KMGOCRManagerNewDelegate)
  18. protocol KMGOCRManagerNewDelegate: AnyObject {
  19. @objc optional func GOCRManagerDidStartOCR(_ manager: KMGOCRManagerNew)
  20. @objc optional func GOCRManagerDidFinishOCR(_ manager: KMGOCRManagerNew)
  21. @objc optional func GOCRManagerDidCancel(_ manager:KMGOCRManagerNew, atIndex:Int)
  22. @objc optional func GOCRManagerDidStart(_ manager:KMGOCRManagerNew, atIndex:Int)
  23. @objc optional func GOCRManagerDidFinish(_ manager:KMGOCRManagerNew, atIndex:Int, results: [Any])
  24. @objc optional func GOCRManagerDidFail(_ manager:KMGOCRManagerNew, atIndex:Int, error: Error?)
  25. }
  26. class KMOCROperationQueue: OperationQueue{
  27. static let sharedInstance: KMOCROperationQueue = {
  28. let queue = KMOCROperationQueue()
  29. return queue
  30. }()
  31. func addOCROperation(op: Operation) {
  32. self.addOperation(op)
  33. }
  34. func cancelAll() {
  35. self.cancelAllOperations()
  36. }
  37. }
  38. @objcMembers class KMGOCRManagerNew: NSObject, KMGOCROperationDelegate{
  39. var delegate: KMGOCRManagerNewDelegate?
  40. var images: Array<Any>?
  41. var OCRType: KMOCRType = .Apple
  42. var selectedLanguages: Array<Any>?
  43. var isOCR = false
  44. var languages: Array<String>?
  45. var fileType: String = ""
  46. var ocrPath: Array<Any>?
  47. var filePath: URL?
  48. var finishIndex: Int = 0
  49. var appleRequest: VNRecognizeTextRequest?
  50. var appleRecognitionMode: VNRequestTextRecognitionLevel?
  51. override init() {
  52. super.init()
  53. }
  54. static let defaultManager: KMGOCRManagerNew = {
  55. let manager = KMGOCRManagerNew()
  56. return manager
  57. }()
  58. /*class func languages() -> [[String: Any]] {
  59. if KMGOCRManagerNew.defaultManager.OCRType == .Google {
  60. return [[KMGOCRLanguageCodeKey: "af", KMGOCRLanguageStringKey: "Afrikaans"],
  61. [KMGOCRLanguageCodeKey: "sq", KMGOCRLanguageStringKey: "Albanian"],
  62. [KMGOCRLanguageCodeKey: "ar", KMGOCRLanguageStringKey: "Arabic"],
  63. [KMGOCRLanguageCodeKey: "hy", KMGOCRLanguageStringKey: "Armenian"],
  64. [KMGOCRLanguageCodeKey: "az", KMGOCRLanguageStringKey: "Azerbaijani"],
  65. [KMGOCRLanguageCodeKey: "eu", KMGOCRLanguageStringKey: "Basque"],
  66. [KMGOCRLanguageCodeKey: "be", KMGOCRLanguageStringKey: "Belarusian"],
  67. [KMGOCRLanguageCodeKey: "bn", KMGOCRLanguageStringKey: "Bengali"],
  68. [KMGOCRLanguageCodeKey: "bs", KMGOCRLanguageStringKey: "Bosnian"],
  69. [KMGOCRLanguageCodeKey: "bg", KMGOCRLanguageStringKey: "Bulgarian"],
  70. [KMGOCRLanguageCodeKey: "ca", KMGOCRLanguageStringKey: "Catalan"],
  71. [KMGOCRLanguageCodeKey: "ceb", KMGOCRLanguageStringKey: "Cebuano"],
  72. [KMGOCRLanguageCodeKey: "ny", KMGOCRLanguageStringKey: "Chichewa"],
  73. [KMGOCRLanguageCodeKey: "zh-CN", KMGOCRLanguageStringKey: "Chinese Simplified"],
  74. [KMGOCRLanguageCodeKey: "zh-TW", KMGOCRLanguageStringKey: "Chinese Traditional"],
  75. [KMGOCRLanguageCodeKey: "hr", KMGOCRLanguageStringKey: "Croatian"],
  76. [KMGOCRLanguageCodeKey: "cs", KMGOCRLanguageStringKey: "Czech"],
  77. [KMGOCRLanguageCodeKey: "da", KMGOCRLanguageStringKey: "Danish"],
  78. [KMGOCRLanguageCodeKey: "nl", KMGOCRLanguageStringKey: "Dutch"],
  79. [KMGOCRLanguageCodeKey: "en", KMGOCRLanguageStringKey: "English"],
  80. [KMGOCRLanguageCodeKey: "eo", KMGOCRLanguageStringKey: "Esperanto"],
  81. [KMGOCRLanguageCodeKey: "et", KMGOCRLanguageStringKey: "Estonian"],
  82. [KMGOCRLanguageCodeKey: "tl", KMGOCRLanguageStringKey: "Filipino"],
  83. [KMGOCRLanguageCodeKey: "fi", KMGOCRLanguageStringKey: "Finnish"],
  84. [KMGOCRLanguageCodeKey: "fr", KMGOCRLanguageStringKey: "French"],
  85. [KMGOCRLanguageCodeKey: "gl", KMGOCRLanguageStringKey: "Galician"],
  86. [KMGOCRLanguageCodeKey: "ka", KMGOCRLanguageStringKey: "Georgian"],
  87. [KMGOCRLanguageCodeKey: "de", KMGOCRLanguageStringKey: "German"],
  88. [KMGOCRLanguageCodeKey: "el", KMGOCRLanguageStringKey: "Greek"],
  89. [KMGOCRLanguageCodeKey: "gu", KMGOCRLanguageStringKey: "Gujarati"],
  90. [KMGOCRLanguageCodeKey: "ht", KMGOCRLanguageStringKey: "Haitian Creole"],
  91. [KMGOCRLanguageCodeKey: "ha", KMGOCRLanguageStringKey: "Hausa"],
  92. [KMGOCRLanguageCodeKey: "iw", KMGOCRLanguageStringKey: "Hebrew"],
  93. [KMGOCRLanguageCodeKey: "hi", KMGOCRLanguageStringKey: "Hindi"],
  94. [KMGOCRLanguageCodeKey: "hmn", KMGOCRLanguageStringKey: "Hmong"],
  95. [KMGOCRLanguageCodeKey: "hu", KMGOCRLanguageStringKey: "Hungarian"],
  96. [KMGOCRLanguageCodeKey: "is", KMGOCRLanguageStringKey: "Icelandic"],
  97. [KMGOCRLanguageCodeKey: "ig", KMGOCRLanguageStringKey: "Igbo"],
  98. [KMGOCRLanguageCodeKey: "id", KMGOCRLanguageStringKey: "Indonesian"],
  99. [KMGOCRLanguageCodeKey: "ga", KMGOCRLanguageStringKey: "Irish"],
  100. [KMGOCRLanguageCodeKey: "it", KMGOCRLanguageStringKey: "Italian"],
  101. [KMGOCRLanguageCodeKey: "ja", KMGOCRLanguageStringKey: "Japanese"],
  102. [KMGOCRLanguageCodeKey: "jw", KMGOCRLanguageStringKey: "Javanese"],
  103. [KMGOCRLanguageCodeKey: "kn", KMGOCRLanguageStringKey: "Kannada"],
  104. [KMGOCRLanguageCodeKey: "kk", KMGOCRLanguageStringKey: "Kazakh"],
  105. [KMGOCRLanguageCodeKey: "km", KMGOCRLanguageStringKey: "Khmer"],
  106. [KMGOCRLanguageCodeKey: "ko", KMGOCRLanguageStringKey: "Korean"],
  107. [KMGOCRLanguageCodeKey: "lo", KMGOCRLanguageStringKey: "Lao"],
  108. [KMGOCRLanguageCodeKey: "la", KMGOCRLanguageStringKey: "Latin"],
  109. [KMGOCRLanguageCodeKey: "lv", KMGOCRLanguageStringKey: "Latvian"],
  110. [KMGOCRLanguageCodeKey: "lt", KMGOCRLanguageStringKey: "Lithuanian"],
  111. [KMGOCRLanguageCodeKey: "mk", KMGOCRLanguageStringKey: "Macedonian"],
  112. [KMGOCRLanguageCodeKey: "mg", KMGOCRLanguageStringKey: "Malagasy"],
  113. [KMGOCRLanguageCodeKey: "ms", KMGOCRLanguageStringKey: "Malay"],
  114. [KMGOCRLanguageCodeKey: "ml", KMGOCRLanguageStringKey: "Malayalam"],
  115. [KMGOCRLanguageCodeKey: "mt", KMGOCRLanguageStringKey: "Maltese"],
  116. [KMGOCRLanguageCodeKey: "mi", KMGOCRLanguageStringKey: "Maori"],
  117. [KMGOCRLanguageCodeKey: "mr", KMGOCRLanguageStringKey: "Marathi"],
  118. [KMGOCRLanguageCodeKey: "mn", KMGOCRLanguageStringKey: "Mongolian"],
  119. [KMGOCRLanguageCodeKey: "my", KMGOCRLanguageStringKey: "Myanmar (Burmese)"],
  120. [KMGOCRLanguageCodeKey: "ne", KMGOCRLanguageStringKey: "Nepali"],
  121. [KMGOCRLanguageCodeKey: "no", KMGOCRLanguageStringKey: "Norwegian"],
  122. [KMGOCRLanguageCodeKey: "fa", KMGOCRLanguageStringKey: "Persian"],
  123. [KMGOCRLanguageCodeKey: "pl", KMGOCRLanguageStringKey: "Polish"],
  124. [KMGOCRLanguageCodeKey: "pt", KMGOCRLanguageStringKey: "Portuguese"],
  125. [KMGOCRLanguageCodeKey: "ma", KMGOCRLanguageStringKey: "Punjabi"],
  126. [KMGOCRLanguageCodeKey: "ro", KMGOCRLanguageStringKey: "Romanian"],
  127. [KMGOCRLanguageCodeKey: "ru", KMGOCRLanguageStringKey: "Russian"],
  128. [KMGOCRLanguageCodeKey: "sr", KMGOCRLanguageStringKey: "Serbian"],
  129. [KMGOCRLanguageCodeKey: "st", KMGOCRLanguageStringKey: "Sesotho"],
  130. [KMGOCRLanguageCodeKey: "si", KMGOCRLanguageStringKey: "Sinhala"],
  131. [KMGOCRLanguageCodeKey:"sk", KMGOCRLanguageStringKey:"Slovak"],
  132. [KMGOCRLanguageCodeKey:"sl", KMGOCRLanguageStringKey:"Slovenian"],
  133. [KMGOCRLanguageCodeKey:"so", KMGOCRLanguageStringKey:"Somali"],
  134. [KMGOCRLanguageCodeKey:"es", KMGOCRLanguageStringKey:"Spanish"],
  135. [KMGOCRLanguageCodeKey:"su", KMGOCRLanguageStringKey:"Sudanese"],
  136. [KMGOCRLanguageCodeKey:"sw", KMGOCRLanguageStringKey:"Swahili"],
  137. [KMGOCRLanguageCodeKey:"sv", KMGOCRLanguageStringKey:"Swedish"],
  138. [KMGOCRLanguageCodeKey:"tg", KMGOCRLanguageStringKey:"Tajik"],
  139. [KMGOCRLanguageCodeKey:"ta", KMGOCRLanguageStringKey:"Tamil"],
  140. [KMGOCRLanguageCodeKey:"te", KMGOCRLanguageStringKey:"Telugu"],
  141. [KMGOCRLanguageCodeKey:"th", KMGOCRLanguageStringKey:"Thai"],
  142. [KMGOCRLanguageCodeKey:"tr", KMGOCRLanguageStringKey:"Turkish"],
  143. [KMGOCRLanguageCodeKey:"uk", KMGOCRLanguageStringKey:"Ukrainian"],
  144. [KMGOCRLanguageCodeKey:"ur", KMGOCRLanguageStringKey:"Urdu"],
  145. [KMGOCRLanguageCodeKey:"uz", KMGOCRLanguageStringKey:"Uzbek"],
  146. [KMGOCRLanguageCodeKey:"vi", KMGOCRLanguageStringKey:"Vietnamese"],
  147. [KMGOCRLanguageCodeKey:"cy", KMGOCRLanguageStringKey:"Welsh"],
  148. [KMGOCRLanguageCodeKey:"yi", KMGOCRLanguageStringKey:"Yiddish"],
  149. [KMGOCRLanguageCodeKey:"yo", KMGOCRLanguageStringKey:"Yoruba"],
  150. [KMGOCRLanguageCodeKey:"zu", KMGOCRLanguageStringKey:"Zulu"]]
  151. }
  152. return [[KMGOCRLanguageCodeKey: "en-US", KMGOCRLanguageStringKey: "English"],
  153. [KMGOCRLanguageCodeKey: "fr-FR", KMGOCRLanguageStringKey: "French"],
  154. [KMGOCRLanguageCodeKey: "it-IT", KMGOCRLanguageStringKey: "Italian"],
  155. [KMGOCRLanguageCodeKey: "de-DE", KMGOCRLanguageStringKey: "German"],
  156. [KMGOCRLanguageCodeKey: "es-ES", KMGOCRLanguageStringKey: "Spanish"],
  157. [KMGOCRLanguageCodeKey: "pt-BR", KMGOCRLanguageStringKey: "Portuguese"],
  158. [KMGOCRLanguageCodeKey: "zh-Hant", KMGOCRLanguageStringKey: "Chinese Traditional"],
  159. [KMGOCRLanguageCodeKey: "zh-Hans", KMGOCRLanguageStringKey: "Chinese Simplified"]
  160. ]
  161. }
  162. func recognitionImages(_ images: [Any], withLanguages languages: [Any]) {
  163. recognitionImages(images, withLanguages: languages, fileType: nil, filePath: nil)
  164. }
  165. func recognitionImages(_ images: [Any], withLanguages languages: [Any], fileType: String?, filePath: URL?) {
  166. self.ocrPath = []
  167. self.finishIndex = 0
  168. self.fileType = "PDF"
  169. self.images = images
  170. if filePath == nil {
  171. self.filePath = URL(string: NSSearchPathForDirectoriesInDomains(.desktopDirectory, .userDomainMask, true)[0])
  172. } else {
  173. self.filePath = filePath
  174. }
  175. if self.OCRType == .Google {
  176. gocrRecognitionImages(images, withLanguages: languages)
  177. } else {
  178. if #available(macOS 10.15, *) {
  179. if appleRequest != nil {
  180. appleRequest?.cancel()
  181. appleRequest = nil
  182. }
  183. recognitionAppleImage(at: self.finishIndex)
  184. } else {
  185. self.delegate?.GOCRManagerDidFail?(self, atIndex: self.finishIndex, error: nil)
  186. }
  187. }
  188. }
  189. func gocrRecognitionImages(_ images: [Any], withLanguages languages: [Any]) {
  190. if images.isEmpty || images.count == 0 { return }
  191. self.delegate?.GOCRManagerDidStartOCR?(self)
  192. for i in 0..<images.count {
  193. self.delegate?.GOCRManagerDidStart?(self, atIndex: i)
  194. let queue = KMOCROperationQueue.sharedInstance
  195. queue.maxConcurrentOperationCount = 1
  196. var image = NSImage()
  197. if images[i] is NSImage {
  198. image = images[i] as! NSImage
  199. } else {
  200. let data = images[i] as! Data
  201. image = NSImage(data: data) ?? NSImage()
  202. }
  203. let op = KMGOCROperation(recognitionImg: image, imgIndex: i)
  204. op.selectedLanguages = NSMutableArray(array: languages) as? Array<Any>
  205. op.operationDelegate = self
  206. queue.addOperation(op)
  207. }
  208. }
  209. func recognitionAppleImage(at index: Int) {
  210. guard index < self.images?.count ?? 0 else {
  211. if #available(macOS 10.15, *) {
  212. appleRequest?.cancel()
  213. }
  214. appleRequest = nil
  215. self.delegate?.GOCRManagerDidFinishOCR?(self)
  216. return
  217. }
  218. finishIndex = index
  219. delegate?.GOCRManagerDidStart?(self, atIndex: self.finishIndex)
  220. if let image = self.images?[finishIndex] as? NSImage {
  221. recognitionAppleImage(self.images?[self.finishIndex] as! NSImage)
  222. } else if let data = self.images?[finishIndex] as? Data {
  223. let image = NSImage(data: data)
  224. recognitionAppleImage(image!)
  225. }
  226. }
  227. func recognitionAppleImage(_ image: NSImage) {
  228. DispatchQueue.global().async { [weak self] in guard let self = self else { return }
  229. self.appleRequest = VNRecognizeTextRequest { [weak self] request, error in
  230. guard let self = self else { return }
  231. var results: [KMGOCRResult]? = nil
  232. if let reqResults = request.results as? [VNRecognizedTextObservation] {
  233. results = self.responseDataRequest(request, dictionary: nil, imageSize: image.size)
  234. }
  235. var resultArray = [[String: Any]]()
  236. if let results = results {
  237. for result in results {
  238. let dic: [String: Any] = ["x": result.textBounds.origin.x,
  239. "y": result.textBounds.origin.y,
  240. "width": result.textBounds.size.width,
  241. "height": result.textBounds.size.height,
  242. "text": result.text]
  243. resultArray.append(dic)
  244. }
  245. }
  246. DispatchQueue.main.async {
  247. if let error = error {
  248. self.delegate?.GOCRManagerDidFail?(self, atIndex: self.finishIndex, error: error)
  249. } else {
  250. self.delegate?.GOCRManagerDidFinish?(self, atIndex: self.finishIndex, results: results!)
  251. }
  252. self.recognitionAppleImage(at: self.finishIndex + 1)
  253. }
  254. }
  255. self.appleRequest?.usesCPUOnly = true
  256. self.appleRequest?.recognitionLevel = self.appleRecognitionMode ?? .accurate
  257. var array: Array<String> = self.languages ?? []
  258. if array.contains("zh-Hant") {
  259. array.removeAll(where: { $0 == "zh-Hant" })
  260. array.insert("zh-Hant", at: 0)
  261. }
  262. if array.contains("zh-Hans") {
  263. array.removeAll(where: { $0 == "zh-Hans" })
  264. array.insert("zh-Hans", at: 0)
  265. }
  266. if array.isEmpty {
  267. array = ["zh-Hans", "zh-Hant"]
  268. }
  269. self.appleRequest?.recognitionLanguages = array
  270. let options = [VNImageOption: Any]()
  271. if let cgImage = self.nsImageToCGImageRef(image) {
  272. let handler = VNImageRequestHandler(cgImage: cgImage, options: options)
  273. try? handler.perform([self.appleRequest!])
  274. }
  275. }
  276. }
  277. func nsImageToCGImageRef(_ image: NSImage) -> CGImage? {
  278. guard let imageData = image.tiffRepresentation else { return nil }
  279. if let imageSource = CGImageSourceCreateWithData(imageData as CFData, nil), let imageRef = CGImageSourceCreateImageAtIndex(imageSource, 0, nil) {
  280. return imageRef
  281. }
  282. return nil
  283. }
  284. func responseDataRequest(_ request: VNRequest, dictionary: NSDictionary?, imageSize: CGSize) -> [KMGOCRResult] {
  285. var results: Array<KMGOCRResult> = Array()
  286. let maximumCandidates = 1
  287. var OCRStr = ""
  288. // if let observations = request.results as? [VNRecognizedTextObservation] {
  289. // for observation in observations {
  290. // if let text = observation.topCandidates(maximumCandidates).first {
  291. // OCRStr.append("\(text.string)\n")
  292. //
  293. // var x: CGFloat = 0, y: CGFloat = 0, width: CGFloat = 0, height: CGFloat = 0
  294. // var error: Error?
  295. // let cnt = text.string.count
  296. // let range = 0 ..< cnt
  297. //
  298. // if let rectangleObservation = text.boundingBox(for: Range(location: 0, length: text.string.count), error: &error) {
  299. // x = rectangleObservation.topLeft.x * imageSize.width
  300. // y = (1 - rectangleObservation.topLeft.y) * imageSize.height
  301. // width = rectangleObservation.boundingBox.size.width * imageSize.width
  302. // height = rectangleObservation.boundingBox.size.height * imageSize.height
  303. // }
  304. //
  305. // let result = KMGOCRResult()
  306. // result.text = text.string
  307. // result.locale = ""
  308. // result.textBounds = CGRect(x: x, y: y, width: width, height: height)
  309. // results.append(result)
  310. // }
  311. // }
  312. // // Following Google's logic, the first element of the array represents the text of the entire image
  313. // if results.count > 0 {
  314. // let result = KMGOCRResult()
  315. // result.text = OCRStr
  316. // if self.languages.count > 0 {
  317. // result.locale = self.languages[0]
  318. // }
  319. // result.textBounds = CGRect.zero
  320. // results.insert(result, at: 0)
  321. // }
  322. // }
  323. return results
  324. }
  325. */
  326. }