// // CharacterAutoTest.swift // KdanAuto // // Created by 朱东勇 on 2022/11/22. // import Foundation import Cocoa class StringAutoTest : AutoTest { // override func type() -> String { // return "PDFConvert_China_Auto_Test" // } override func name() -> String { return _name } override func keys() -> NSArray { return ["字符"] } override func needTest() -> Bool { return self.selectedKeys().count > 0 } override class func shared() -> AutoTest? { return StringAutoTest() } // Auto Test refrence Check File override func autoTest(_ complention:@escaping (_ object:AutoTest, _ report:NSAttributedString?) -> ()) { self.compareFinishedFiles.removeAllObjects(); self.convertFiles.removeAllObjects() clearCacheFiles() let needCheckString = self.selectedKeys().contains("字符") if !needCheckString { _status = .Finished complention(self, self.reportString) return } _status = .Process reportString = NSMutableAttributedString.init(string: "\n【\(String(self.fileType())) - \(self.name())】字符比对开始!\n", attributes:[.foregroundColor : NSColor.blue]) let files = DataModel.shared.originFilesFor(_fileType, type: _type) self.testFiles = NSArray(array: files); let checkDirectory = self.checkFileDirectory() let originDirectory = self.originFileDirectory() let resultDirectory = self.resultFileDirectory() if (files.count > 0) { try? FileManager.default.createDirectory(atPath: checkDirectory, withIntermediateDirectories: true); try? FileManager.default.createDirectory(atPath: resultDirectory, withIntermediateDirectories: true); } var tDegree = Double(0); var tCount = Int(0) var fileIndex = 0; var convertFileBlock = { (files:[String]) in } convertFileBlock = { (files:[String]) in if (fileIndex >= files.count) { TestDegreeManager.shared().set(((tCount != 0) ? tDegree/Double(tCount) : 0.0), fileType: self.fileType(), type: self.type()) self._status = .Finished DispatchQueue.main.async { autoreleasepool { complention(self, self.reportString); } } return } let fileName = files[fileIndex] let fName = NSString(string: fileName).deletingPathExtension let originPath = NSString(string: originDirectory).appendingPathComponent(fName+".pdf") let resultPath = NSString(string: resultDirectory).appendingPathComponent(fName+"."+self.extention()) let checkPath = NSString(string: checkDirectory).appendingPathComponent(fName+"."+self.extention()) self.reportString?.append(NSMutableAttributedString.init(string: "\n【\(String(self.fileType())) - \(self.name())】开始转换文件 \"\(fName)\"\n", attributes:[.foregroundColor : NSColor.black])) // ... // 执行转换过程 let index = self.testFiles.index(of: fileName); if (index != NSNotFound) { self.convertProgress = Double(index) / Double(self.testFiles.count) } self.convertFiles.add(fileName); self.testlog("开始转换:"+fileName, (self.compareProgress + self.convertProgress)/2.0) self.process(originPath, resultPath: resultPath) { status in if FileManager.default.fileExists(atPath: resultPath) && status == 1 { if needCheckString && FileManager.default.fileExists(atPath: checkPath) { DispatchQueue.global().async { let checkString = self.readTextFile(checkPath as NSString) let resultStr = self.readTextFile(resultPath as NSString) if (checkString != nil && resultStr != nil) { let maxSize = checkString!.count let report = NSMutableAttributedString(string: "") let degree = self.compareString(checkString as! NSString, result: resultStr as! NSString) { appAttr in report.append(appAttr) } // let degree = self.compareString(checkString as! NSString, result: resultStr as! NSString) { skipString, failString in // self.appendErrorInfo(skipString, failString: failString) // } var color = NSColor.black if fabs(degree-100.0) >= 0.01 { color = NSColor.red } tDegree += degree; tCount += 1 TestDegreeManager.shared().set(degree, fileType: self.fileType(), type: self.type(), fileName: fileName) let successCount = Int(maxSize * Int(degree)/100) report.append(NSAttributedString.init(string: "\n【\(String(self.fileType())) - \(self.name())】文件 \"\(fName)\"比对完成,准确率\(degree)%(\(successCount)/\(maxSize))\n", attributes:[.foregroundColor : color])) if (report != nil) { do { let rtfData = try? report.data(from: .init(location: 0, length: report.length), documentAttributes: [.documentType: NSAttributedString.DocumentType.rtf]) let path = NSString(string: resultPath).appendingPathExtension("rtf") try? FileManager.default.removeItem(atPath: path!); try? rtfData?.write(to: NSURL.fileURL(withPath: path!)) } catch { } self.reportString?.append(report) } } let index = self.testFiles.index(of: fileName); if (index != NSNotFound) { self.compareProgress = Double(index) / Double(self.testFiles.count) } self.compareFinishedFiles.add(fileName); self.testlog("对比完成:"+fileName, (self.compareProgress + self.convertProgress)/2.0) fileIndex += 1 convertFileBlock(files); } }else { self.reportString?.append(NSMutableAttributedString.init(string: "【\(String(self.fileType())) - \(self.name())】对照文件 \"\(fName)\"不存在!\n", attributes:[.foregroundColor : NSColor.red])) let index = self.testFiles.index(of: fileName); if (index != NSNotFound) { self.compareProgress = Double(index) / Double(self.testFiles.count) } self.compareFinishedFiles.add(fileName); self.testlog("对比完成:"+fileName, (self.compareProgress + self.convertProgress)/2.0) fileIndex += 1 convertFileBlock(files); } }else { if (status == 0) { self.reportString?.append(NSMutableAttributedString.init(string: "【\(String(self.fileType())) - \(self.name())】文件 \"\(fName)\"转档失败!\n", attributes:[.foregroundColor : NSColor.red])) }else if (status == -1 || status == -2) { self.reportString?.append(NSMutableAttributedString.init(string: "【\(String(self.fileType())) - \(self.name())】文件 \"\(fName)\"文档无法打开!\n", attributes:[.foregroundColor : NSColor.red])) }else if (status == -3) { self.reportString?.append(NSMutableAttributedString.init(string: "【\(String(self.fileType())) - \(self.name())】文件 \"\(fName)\"转档中 Crash!\n", attributes:[.foregroundColor : NSColor.red])) } let index = self.testFiles.index(of: fileName); if (index != NSNotFound) { self.compareProgress = Double(index) / Double(self.testFiles.count) } self.compareFinishedFiles.add(fileName); self.testlog("对比完成:"+fileName, (self.compareProgress + self.convertProgress)/2.0) fileIndex += 1 convertFileBlock(files); } } } convertFileBlock(files); } ///Compare /// Tools func appendErrorInfo(_ skipString:NSString, failString: NSString) { if skipString.length > 0 && failString.length > 0 { reportString?.append(NSMutableAttributedString.init(string: "对比字符串【\(skipString)】错识别为【\(failString)】\n", attributes:[.foregroundColor : NSColor.red])) }else if (skipString.length > 0) { reportString?.append(NSMutableAttributedString.init(string: "对比字符串【\(skipString)】未识别到\n", attributes:[.foregroundColor : NSColor.red])) }else if failString.length > 0 { reportString?.append(NSMutableAttributedString.init(string: "字符串【\(failString)】识别出错\n", attributes:[.foregroundColor : NSColor.red])) } } func attributeStringWith(_ skipString:NSString, failString: NSString) -> NSAttributedString { if skipString.length > 0 && failString.length > 0 { let strikethroughStyle = NSParagraphStyle.init() let attString = NSMutableAttributedString.init(string: skipString as String, attributes:[.foregroundColor : NSColor.red, .strikethroughStyle:NSNumber(integerLiteral: NSUnderlineStyle.single.rawValue), ]) attString.append(NSMutableAttributedString.init(string: failString as String, attributes:[.foregroundColor : NSColor.blue, ])) return attString }else if (skipString.length > 0) { let strikethroughStyle = NSParagraphStyle.init() let attString = NSMutableAttributedString.init(string: skipString as String, attributes:[.foregroundColor : NSColor.red, .strikethroughStyle:NSNumber(integerLiteral: NSUnderlineStyle.single.rawValue), ]) return attString }else if failString.length > 0 { let strikethroughStyle = NSParagraphStyle.init() let attString = NSMutableAttributedString.init(string: failString as String, attributes:[.foregroundColor : NSColor.blue, ]) return attString } return NSAttributedString() } func replaceUnicodeString(_ string:NSString) -> NSString {//中 let items = string.components(separatedBy: "\\u") as [NSString] var resultString = NSString() for item in items { if (item.contains("?")) { let unicodeValue = item.intValue let skipRange = item.range(of: "?") let nextString = item.substring(from: Int(skipRange.location + skipRange.length)) as NSString let bytes : [UInt8] = [UInt8(unicodeValue/256),UInt8(unicodeValue%256)] let data = NSData.init(bytes: bytes, length: 2) let unicodeString = NSString.init(data: data as Data, encoding: NSUnicodeStringEncoding)! as NSString resultString = resultString.appending(String("\(unicodeString)\(nextString)")) as NSString }else { resultString = resultString.appending(String(item)) as NSString } } return resultString } /// Compare func compareString(_ check:NSString, result:NSString, callback:@escaping (_ appAttr:NSAttributedString)->()) -> Double { // func compareString(_ check:NSString, result:NSString, failure:@escaping (_ skipString:NSString, _ failString:NSString)->()) -> Double { return autoreleasepool { var checkString = check var resultStr = result var maxSize = checkString.length var successCount = 0; resultStr = resultStr.replacingOccurrences(of: "\n", with: "") as NSString resultStr = resultStr.replacingOccurrences(of: " ", with: "") as NSString /** (A0 = B0) - A-1 & B-1 (A0 != B0) & (A0 in B) & (B0 in A) - 取 A0,B0最小 Range 值 - 字符串裁剪对齐 (A0 != B0) & (A0 in B) - 存储B0到识别错误缓存 (A0 != B0) & (B0 in A) - 存储 A0到识别遗漏字符串 (A0 != B0) - 分别存储 A0、B0到遗漏及错误字串 */ var skipString = NSString() var failString = NSString() while (checkString.length > 0 && resultStr.length > 0) { let subc = checkString.substring(to: 1) as NSString let subr = resultStr.substring(to: 1) as NSString let cRange = checkString.range(of: subr as String) let rRange = resultStr.range(of: subc as String) if subc.isEqual(to: subr) { // (A0 = B0) // Check Success // 两个字符相同 callback(attributeStringWith(skipString, failString: failString)) // failure(skipString, failString) // self.appendErrorInfo(skipString, failString: failString) skipString = NSString() failString = NSString() checkString = checkString.substring(from:1) as NSString resultStr = resultStr.substring(from:1) as NSString successCount = successCount + 1 callback(NSAttributedString(string: subc as String)) }else if (["\n", "\r", " "].contains(subc)) { // 两个字符不相同,但 CheckString 首字符 为 空格或回车 callback(NSAttributedString(string: subc as String)) maxSize -= 1; checkString = checkString.substring(from:1) as NSString }else if (cRange.location != NSNotFound && rRange.location != NSNotFound) { // 两个字符均出现在另外一个字符 串中 if min(cRange.location, rRange.location) >= 10 { // let pc = checkString.substring(with: NSMakeRange(0, min(checkString.length, cRange.location))) // let pr = resultStr.substring(with: NSMakeRange(0, min(resultStr.length, rRange.location))) // NSLog("C(\(cCount)):%.2f%%:\(pc)", cdegree) // NSLog("R(\(rCount)):%.2f%%:\(pr)", rdegree) if (cRange.location > rRange.location*3) { //‘subc' 字符串有在‘resultStr’中,但'subr'不在’checkString‘中,resultStr 往后推一 failString = failString.appending(subr as String) as NSString resultStr = resultStr.substring(from:1) as NSString }else if (cRange.location*3 < rRange.location) { //‘subc' 字符串有在‘resultStr’中,但'subr'不在’checkString‘中,resultStr 往后推一 skipString = skipString.appending(subc as String) as NSString checkString = checkString.substring(from:1) as NSString }else { let checkSubString = checkString.substring(with: NSMakeRange(0, cRange.location)) let cCount = checkCountIn(checkSubString as NSString, inStr: resultStr); let rCount = checkCountIn(resultStr.substring(with: NSMakeRange(0, rRange.location)) as NSString, inStr: checkString) let cdegree = Float(cCount) / Float(cRange.location) let rdegree = Float(rCount) / Float(rRange.location) if (cdegree > 0.2) { //‘subc' 字符串有在‘resultStr’中,但'subr'不在’checkString‘中,resultStr 往后推一 failString = failString.appending(subr as String) as NSString resultStr = resultStr.substring(from:1) as NSString }else if (rdegree > 0.2) { //‘subc' 字符串有在‘resultStr’中,但'subr'不在’checkString‘中,resultStr 往后推一 skipString = skipString.appending(subc as String) as NSString checkString = checkString.substring(from:1) as NSString } else if (cdegree > rdegree * 2.0) { //‘subc' 字符串有在‘resultStr’中,但'subr'不在’checkString‘中,resultStr 往后推一 failString = failString.appending(subr as String) as NSString resultStr = resultStr.substring(from:1) as NSString }else if (cdegree * 2.0 < rdegree) { //‘subc' 字符串有在‘resultStr’中,但'subr'不在’checkString‘中,resultStr 往后推一 skipString = skipString.appending(subc as String) as NSString checkString = checkString.substring(from:1) as NSString }else if (cCount < rCount) { //‘subc' 字符串有在‘resultStr’中,但'subr'不在’checkString‘中,resultStr 往后推一 failString = failString.appending(subr as String) as NSString resultStr = resultStr.substring(from:1) as NSString }else { //‘subc' 字符串有在‘resultStr’中,但'subr'不在’checkString‘中,resultStr 往后推一 skipString = skipString.appending(subc as String) as NSString checkString = checkString.substring(from:1) as NSString } } }else if min(cRange.location, rRange.location) >= 2 { // 首部字符在对方字串中位置偏移最小值 大于 2 var scale = (resultStr.length > 0) ? (Float(checkString.length) / Float(resultStr.length)) : Float(1.0)//, Float(1.0)) let nextc = checkString.substring(with: NSRange(location: 1, length: 1)) as NSString let nextr = resultStr.substring(with: NSRange(location: 1, length: 1)) as NSString var ncRange = checkString.range(of: nextr as String) var nrRange = resultStr.range(of: nextc as String) if nrRange.location == NSNotFound { nrRange.location = 100000; } if ncRange.location == NSNotFound { ncRange.location = 100000; } if (min(nrRange.location, ncRange.location) < min(cRange.location, rRange.location)) { // 第二位字符,在对方字串中位置偏移最小值小于 首字符偏移 if ncRange.location < Int(Float(nrRange.location) * scale) { //‘subr' 字符串有在‘checkString’ 偏移 小于'subc'在’checkString‘中 的偏移,优先移除checkString第一位 skipString = skipString.appending(subc as String) as NSString checkString = checkString.substring(from:1) as NSString }else { //‘subr' 字符串有在‘checkString’ 偏移 不小于'subc'在’checkString‘中 的偏移,优先移除resultStr第一位 failString = failString.appending(subr as String) as NSString resultStr = resultStr.substring(from:1) as NSString } }else if (cRange.location < Int(Float(rRange.location) * scale)) { //‘subr' 字符串有在‘checkString’ 偏移 小于'subc'在’checkString‘中 的偏移,优先移除checkString第一位 skipString = skipString.appending(subc as String) as NSString checkString = checkString.substring(from:1) as NSString }else { //‘subr' 字符串有在‘checkString’ 偏移 不小于'subc'在’checkString‘中 的偏移,优先移除resultStr第一位 failString = failString.appending(subr as String) as NSString resultStr = resultStr.substring(from:1) as NSString } }else { // var scale = (skipString.length > 0) ? (Float(checkString.length) / Float(skipString.length)) : Float(1.0) // if (checkString.length > skipString.length && cRange.location <= Int(Float(rRange.location)*scale)) { // //‘subc' 字符串有在‘resultStr’中,但'subr'不在’checkString‘中,resultStr 往后推一 // skipString = skipString.appending(subc as String) as NSString // checkString = checkString.substring(from:1) as NSString // }else if (checkString.length <= skipString.length && cRange.location > Int(Float(rRange.location)*scale)) { // failString = failString.appending(subr as String) as NSString // resultStr = resultStr.substring(from:1) as NSString // }else if (cRange.location < rRange.location) { //‘subr' 字符串有在‘checkString’ 偏移 小于'subc'在’checkString‘中 的偏移,优先移除checkString第一位 skipString = skipString.appending(subc as String) as NSString checkString = checkString.substring(from:1) as NSString }else { //‘subr' 字符串有在‘checkString’ 偏移 不小于'subc'在’checkString‘中 的偏移,优先移除resultStr第一位 failString = failString.appending(subr as String) as NSString resultStr = resultStr.substring(from:1) as NSString } } }else if (cRange.location != NSNotFound) { // let checkSubString = checkString.substring(with: cRange) // if (checkCountIn(checkSubString as NSString, inStr: resultStr) < Int(Float(cRange.location) * 0.6)) { //‘subc' 字符串有在‘resultStr’中,但'subr'不在’checkString‘中,resultStr 往后推一 skipString = skipString.appending(subc as String) as NSString checkString = checkString.substring(from:1) as NSString // }else { // failString = failString.appending(subr as String) as NSString // resultStr = resultStr.substring(from:1) as NSString // } }else if (rRange.location != NSNotFound) { // let resultSubString = resultStr.substring(with: rRange) // if (checkCountIn(resultSubString as NSString, inStr: checkString) < Int(Float(rRange.location) * 0.5)) { //‘subc' 字符串有在‘resultStr’中,但'subr'不在’checkString‘中,resultStr 往后推一 failString = failString.appending(subr as String) as NSString resultStr = resultStr.substring(from:1) as NSString // }else { // skipString = skipString.appending(subc as String) as NSString // checkString = checkString.substring(from:1) as NSString // } }else { // 两个子字串均未找到 skipString = skipString.appending(subc as String) as NSString failString = failString.appending(subr as String) as NSString checkString = checkString.substring(from:1) as NSString resultStr = resultStr.substring(from:1) as NSString } } skipString = skipString.appending(checkString as String) as NSString failString = failString.appending(resultStr as String) as NSString // failure(skipString, failString) callback(attributeStringWith(skipString, failString: failString)) callback(NSAttributedString(string: "\n")) let degree = (maxSize>1) ? Double(Float(successCount)/Float(maxSize) * 100) : 0 return degree } } func checkCountIn(_ subStr:NSString, inStr:NSString) -> Int { let countInfo = NSMutableArray() for i in 0...(subStr.length - 3) { let tStr = subStr.substring(with: NSMakeRange(i, 3)) let range = inStr.range(of: tStr) if (range.location != NSNotFound) { countInfo.add(range.location) } } return findLISLength(countInfo as! [Int]); } func findLISLength(_ nums: [Int]) -> Int { let n = nums.count if n == 0 { return 0 } var dp = Array(repeating: 1, count: n) var maxLen = 1 for i in 1 ..< n { for j in 0 ..< i { if nums[i] > nums[j] { dp[i] = max(dp[i], dp[j] + 1) } } maxLen = max(maxLen, dp[i]) } return maxLen } // Read File func readTextFile(_ filePath:NSString) -> String? { if NSArray(array: ["TXT", "txt"]).contains(filePath.pathExtension) { var checkString = try? NSString.init(contentsOfFile: filePath as String, encoding: NSUTF8StringEncoding) if (checkString != nil) { // checkString = checkString!.replacingOccurrences(of: "\n", with: "") as NSString // checkString = checkString!.replacingOccurrences(of: " ", with: "") as NSString return checkString! as String } return nil }else if NSArray(array: ["rtf", "RTF"]).contains(filePath.pathExtension) { // Load check file let checkData = NSData.init(contentsOfFile: filePath as String) as! Data var documentAttributes:NSDictionary! let checkAttString = NSAttributedString.init(rtf: checkData, documentAttributes: &documentAttributes) var checkString = NSString(string: checkAttString!.string) as NSString var pureString = checkString.replacingOccurrences(of: "\n", with: "") as NSString pureString = pureString.replacingOccurrences(of: " ", with: "") as NSString // 常规 rtf 读取失败 //使用框排进行读取 if (pureString.length > 0) { return checkString as String? } var resultString = try? NSString.init(contentsOfFile: filePath as String, encoding: NSUTF8StringEncoding) if (nil != resultString && !resultString!.contains("\\shptxt\\shptxt")) { resultString = resultString!.replacingOccurrences(of: "\n", with: "") as NSString resultString = resultString!.replacingOccurrences(of: " ", with: "") as NSString return resultString! as String } //识别字符串 \shptxt\shptxt ... } let pageInfoStrings = resultString!.components(separatedBy: "\\shptxt\\shptxt") as NSArray var finalString = "" if pageInfoStrings.count > 0 { let subStrings = pageInfoStrings.subarray(with: NSMakeRange(1, Int(pageInfoStrings.count - 1))) as! [String] for pageInfoString in subStrings { let endRange = NSString(string: pageInfoString).range(of: "}") finalString = finalString.appending(NSString(string: pageInfoString).substring(to: endRange.location)) } } //识别所有 【空格 ~ \】 之间的值,并进行缝合 // Detect all strings between Spaces and \ and stitch let strings = finalString.components(separatedBy: " ") var resultStr = "" as NSString for str in strings { let markStr = str as NSString if (markStr.contains("\\f")) { let fRange = markStr.range(of: "\\f") let cRange = markStr.range(of: "\\c") let bRange = markStr.range(of: "\\b") let iRange = markStr.range(of: "\\i") let eRange = markStr.range(of: "\\e") let pRange = markStr.range(of: "\\p") let minPos = min(Int(fRange.location), Int(cRange.location), Int(bRange.location), Int(iRange.location), Int(eRange.location), Int(pRange.location)) resultStr = resultStr.appending(markStr.substring(to: minPos)) as NSString }else { resultStr = resultStr.appending(markStr as String) as NSString } } resultStr = self.replaceUnicodeString(resultStr) resultStr = resultStr.replacingOccurrences(of: "\n", with: "") as NSString resultStr = resultStr.replacingOccurrences(of: " ", with: "") as NSString resultStr = resultStr.replacingOccurrences(of: "\\pard", with: "") as NSString resultStr = resultStr.replacingOccurrences(of: "\\par", with: "") as NSString return resultStr as String? } return nil } override func compareFiles() -> NSArray? { let items = NSMutableArray() let files = DataModel.shared.originFilesFor(_fileType, type: _type) as [String] for fileName in files { let sItems = compareFiles(fileName) if sItems != nil && sItems!.count != 0 { items.addObjects(from: sItems as! [Any]) } } return items } override func compareFiles(_ fileName: String) -> NSArray? { let files = NSMutableArray() let checkDirectory = self.checkFileDirectory() let resultDirectory = self.resultFileDirectory() let nName = NSString(string: fileName).deletingPathExtension.appending(".\(self.extention())") let rComparePath = NSString(string: resultDirectory).appendingPathComponent(nName) let cComparePath = NSString(string: checkDirectory).appendingPathComponent(nName) if (FileManager.default.fileExists(atPath: rComparePath) && FileManager.default.fileExists(atPath: rComparePath+".rtf")) { let fileInfo = NSMutableDictionary.fileInfoWith(fileName, refFilePath: nil, resultPath: rComparePath, comparePath: cComparePath, objc: self) files.add(fileInfo) } return files } /** Replace the refrence image for next image check test */ override func canUpdateRefImage() -> Bool { return false } override func updateRefImage() { } override func canUpdateRefImage(_ fileName:String) -> Bool { return false } override func updateRefImage(_ fileName:String) { } }