KMGOCROperation.swift 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. //
  2. // KMGOCROperation.swift
  3. // PDF Reader Pro
  4. //
  5. // Created by liujiajie on 2023/11/15.
  6. //
  7. import Foundation
  8. @objc(KMGOCROperationDelegate)
  9. protocol KMGOCROperationDelegate: AnyObject {
  10. @objc optional func GOCROperation(_ operation: KMGOCROperation, cancelOCRImageAtIndex index: Int)
  11. @objc optional func GOCROperation(_ operation: KMGOCROperation, startOCRImageAtIndex index: Int)
  12. @objc optional func GOCROperation(_ operation: KMGOCROperation, finishOCRImageAtIndex index: Int, results: Array<Any>)
  13. @objc optional func GOCROperation(_ operation: KMGOCROperation, failureOCRImageAtIndex index: Int, error: Error?)
  14. }
  15. let KMGOC_API_URL = "https://vision.googleapis.com/v1/images:annotate"
  16. #if VERSION_DMG
  17. let KMGOC_API_KEY = "AIzaSyBhSRohpngAu8pSgFDXPytslNDHgGm7uDs"
  18. #else
  19. let KMGOC_API_KEY = "AIzaSyCJuqJ9YvtkFKMl1mW3Yq-av3mmI9ScbRY"
  20. #endif
  21. @objcMembers class KMGOCROperation: Operation{
  22. @objc var operationDelegate: KMGOCROperationDelegate?
  23. var hasCanceled: Bool = false
  24. var hasFinished: Bool = false
  25. var hasExcuting: Bool = false
  26. var selectedLanguages: Array<Any>?
  27. override var isExecuting: Bool{
  28. return self.hasExcuting
  29. }
  30. override var isFinished: Bool{
  31. return self.hasFinished
  32. }
  33. override var isCancelled: Bool{
  34. return self.hasCanceled
  35. }
  36. var fileName: String = ""
  37. var orcImage: NSImage?
  38. var task: URLSessionDataTask?
  39. var imageIndex: Int = 0
  40. init(recognitionImg:NSImage, imgIndex:Int) {
  41. super.init()
  42. self.imageIndex = imgIndex
  43. self.fileName = fileNameWithDate()
  44. self.orcImage = recognitionImg
  45. self.queuePriority = .normal
  46. self.name = self.fileName
  47. self.hasExcuting = false
  48. self.hasFinished = false
  49. }
  50. func fileNameWithDate() -> String {
  51. let formatter = DateFormatter()
  52. formatter.dateFormat = "YYYY-MM-dd-hh-mm-ss-SSS"
  53. let dateString = formatter.string(from: Date())
  54. let fileName = "\(dateString) \(imageIndex)"
  55. return fileName
  56. }
  57. override func start() {
  58. if p_checkCancelled() { return }
  59. willChangeValue(forKey: "isExecuting")
  60. self.hasExcuting = true
  61. didChangeValue(forKey: "isExecuting")
  62. // Thread.detachNewThreadSelector(#selector(main), toTarget: self, with: nil)
  63. // }
  64. // @objc override func main() {
  65. // do {
  66. if p_checkCancelled() { return }
  67. recognitionImage(orcImage ?? NSImage())
  68. // while isExecuting {
  69. // if p_checkCancelled() {
  70. // return
  71. // }
  72. // }
  73. // } catch let e {
  74. // Swift.debugPrint("Exception %@", e)
  75. // }
  76. }
  77. override func cancel() {
  78. super.cancel()
  79. if task != nil {
  80. task?.cancel()
  81. task = nil
  82. }
  83. self.operationDelegate?.GOCROperation?(self, cancelOCRImageAtIndex: self.imageIndex)
  84. if isExecuting {
  85. willChangeValue(forKey: "isFinished")
  86. hasFinished = true
  87. didChangeValue(forKey: "isFinished")
  88. } else {
  89. willChangeValue(forKey: "isCancelled")
  90. hasCanceled = true
  91. didChangeValue(forKey: "isCancelled")
  92. }
  93. }
  94. func p_done() {
  95. self.willChangeValue(forKey: "isFinished")
  96. self.hasFinished = true
  97. self.didChangeValue(forKey: "isFinished")
  98. }
  99. func p_checkCancelled() -> Bool {
  100. if isCancelled {
  101. willChangeValue(forKey: "isFinished")
  102. willChangeValue(forKey: "isExecuting")
  103. hasExcuting = false
  104. hasFinished = true
  105. didChangeValue(forKey: "isExecuting")
  106. didChangeValue(forKey: "isFinished")
  107. return true
  108. }
  109. return false
  110. }
  111. func recognitionImage(_ image: NSImage) {
  112. self.operationDelegate?.GOCROperation?(self, startOCRImageAtIndex: self.imageIndex)
  113. let binaryImageData = base64EncodeImage(image)
  114. if binaryImageData == nil {
  115. self.operationDelegate?.GOCROperation?(self, failureOCRImageAtIndex: self.imageIndex, error: nil)
  116. return
  117. }
  118. let urlString = "\(KMGOC_API_URL)?key=\(KMGOC_API_KEY)"
  119. var request = URLRequest(url: URL(string: urlString)!)
  120. request.httpMethod = "POST"
  121. request.addValue("application/json", forHTTPHeaderField: "Content-Type")
  122. let imageDictionary = ["content": binaryImageData]
  123. let featuresArray = [["type": "TEXT_DETECTION", "maxResults": 10]]
  124. var paramsDictionary = ["requests": [["image": imageDictionary, "features": featuresArray]]] as [String: Any]
  125. if selectedLanguages != nil && selectedLanguages?.count ?? 0 > 0 {
  126. let imageContextDictionary = ["languageHints": selectedLanguages]
  127. paramsDictionary = ["requests": [["image": imageDictionary, "features": featuresArray, "imageContext": imageContextDictionary]]] as [String: Any]
  128. }
  129. let requestData = try? JSONSerialization.data(withJSONObject: paramsDictionary, options: [])
  130. request.httpBody = requestData
  131. let URLSession = URLSession.shared
  132. task = URLSession.dataTask(with: request) { (data, response, error) in
  133. if (error as NSError?)?.code == NSURLErrorCancelled {
  134. return
  135. }
  136. var results: [Any]? = nil
  137. if error == nil {
  138. let dictionary = try? JSONSerialization.jsonObject(with: data!, options: []) as? [String: Any]
  139. results = self.responseDataResults(dictionary! as NSDictionary)
  140. }
  141. DispatchQueue.main.async {
  142. if error != nil || results == nil {
  143. self.operationDelegate?.GOCROperation?(self, failureOCRImageAtIndex: self.imageIndex, error: error)
  144. } else {
  145. self.operationDelegate?.GOCROperation?(self, finishOCRImageAtIndex: self.imageIndex, results: results ?? [])
  146. }
  147. }
  148. if error != nil || results == nil {
  149. self.cancel()
  150. } else {
  151. self.p_done()
  152. }
  153. }
  154. task?.resume()
  155. }
  156. func base64EncodeImage(_ image: NSImage) -> String? {
  157. // 获取 TIFF 数据
  158. guard let data = image.tiffRepresentation else { return nil }
  159. // 创建位图表示
  160. guard let imageRep = NSBitmapImageRep(data: data) else { return nil }
  161. imageRep.size = image.size
  162. // 转换为 PNG 数据
  163. guard let imageData = imageRep.representation(using: .png, properties: [:]) else { return nil }
  164. // 如果数据大小超过限制,尝试压缩
  165. let finalData: Data
  166. if imageData.count > 4194304 {
  167. guard let compressedData = compressImageData(imageData, toMaxFileSize: 4194304) else {
  168. return nil // 压缩失败时返回 nil
  169. }
  170. finalData = compressedData
  171. } else {
  172. finalData = imageData
  173. }
  174. // 返回 Base64 编码字符串
  175. return finalData.base64EncodedString(options: .lineLength64Characters)
  176. }
  177. func compressImageData(_ imageData: Data, toMaxFileSize maxFileSize: Int) -> Data? {
  178. // 初始压缩比
  179. var compression: CGFloat = 0.9
  180. let maxCompression: CGFloat = 0.1
  181. // 循环压缩
  182. var currentData = imageData
  183. while currentData.count > maxFileSize && compression > maxCompression {
  184. compression -= 0.1
  185. // 创建位图表示
  186. guard let imageRep = NSBitmapImageRep(data: currentData) else {
  187. return nil // 解码失败时返回 nil
  188. }
  189. // 尝试压缩为 JPEG 数据
  190. guard let compressedData = imageRep.representation(
  191. using: .jpeg,
  192. properties: [.compressionFactor: NSNumber(value: Float(compression))]
  193. ) else {
  194. return nil // 压缩失败时返回 nil
  195. }
  196. currentData = compressedData
  197. }
  198. return currentData.count <= maxFileSize ? currentData : nil
  199. }
  200. //
  201. // func base64EncodeImage(_ image: NSImage) -> String? {
  202. // guard let data = image.tiffRepresentation else { return nil }
  203. // let imageRep = NSBitmapImageRep(data: data)!
  204. // imageRep.size = image.size
  205. // let imageData = imageRep.representation(using: .png, properties: [:])
  206. // // Resize the image if it exceeds the 4MB API limit
  207. // if imageData?.count ?? 0 > 4194304 {
  208. // let compressedData = compressImageData(imageData!, toMaxFileSize: 4194304)
  209. // if let data = compressedData {
  210. // return data.base64EncodedString(options: .endLineWithCarriageReturn)
  211. // }
  212. // }
  213. //
  214. // if let data = imageData {
  215. // if #available(macOS 10.9, *) {
  216. // return data.base64EncodedString(options: .endLineWithCarriageReturn)
  217. // } else {
  218. // return data.base64EncodedString(options: [])
  219. // }
  220. // }
  221. //
  222. // return nil
  223. // }
  224. //
  225. // func compressImageData(_ imageData: Data, toMaxFileSize maxFileSize: Int) -> Data? {
  226. // var compression: CGFloat = 0.9
  227. // let maxCompression: CGFloat = 0.1
  228. // var compressImageData = imageData
  229. // while compressImageData.count > maxFileSize && compression > maxCompression {
  230. // compression -= 0.1
  231. // let imageRep = NSBitmapImageRep(data: compressImageData)!
  232. // compressImageData = imageRep.representation(using: .jpeg, properties: [NSBitmapImageRep.PropertyKey.compressionFactor: NSNumber(value: Float(compression))])!
  233. // }
  234. // return compressImageData
  235. // }
  236. func responseDataResults(_ dictionary: NSDictionary) -> [Any]? {
  237. let responses = dictionary["responses"] as? [Any]
  238. if responses == nil {
  239. return nil
  240. }
  241. let responseData = responses?.first as? NSDictionary
  242. if let errorObj = dictionary["error"] as? NSDictionary {
  243. return nil
  244. }
  245. var results: [Any]? = nil
  246. if let textAnnotations = responseData?["textAnnotations"] as? [Any] {
  247. results = [Any]()
  248. for annotation in textAnnotations {
  249. var textBounds = CGRect.zero
  250. let dic = (annotation as? NSDictionary)?["boundingPoly"]
  251. if let vertices = (dic as? NSDictionary)?["vertices"] as? [Any] {
  252. var minX: CGFloat = 0, minY: CGFloat = 0, maxX: CGFloat = 0, maxY: CGFloat = 0
  253. for i in 0..<vertices.count {
  254. if let vertex = vertices[i] as? NSDictionary,
  255. let x = vertex["x"] as? CGFloat,
  256. let y = vertex["y"] as? CGFloat {
  257. minX = i == 0 ? x : min(x, minX)
  258. minY = i == 0 ? y : min(y, minY)
  259. maxX = i == 0 ? x : max(x, maxX)
  260. maxY = i == 0 ? y : max(y, maxY)
  261. }
  262. }
  263. textBounds = CGRect(x: minX, y: minY, width: maxX-minX, height: maxY-minY)
  264. }
  265. let result = KMGOCRResult()
  266. result.text = (annotation as? NSDictionary)?["description"] as? String ?? ""
  267. result.locale = (annotation as? NSDictionary)?["locale"] as? String ?? ""
  268. result.textBounds = textBounds
  269. results?.append(result)
  270. }
  271. }
  272. return results
  273. }
  274. }