tableRec.vue 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467
  1. <!-- -->
  2. <template>
  3. <el-row :gutter="20">
  4. <el-col :span="12" class="place">
  5. <el-row class="small-title">
  6. <h2>通用表格识别</h2>
  7. </el-row>
  8. <el-row style="color: gray; font-size: small;">
  9. <p>支持识别图片/PDF格式文档中的表格内容,包括有线表格、无线表格、合并单元格表格,同时支持单张图片内的多个表格内容识别,返回各表格的表头表尾内容、单元格文字内容及其行列位置信息。</p>
  10. </el-row>
  11. <el-row style="color: gray; font-size: small;">
  12. <h4>支持jpg, png, bmp, pdf格式</h4>
  13. </el-row>
  14. <div class="common-layout">
  15. <el-container>
  16. <el-input type="file" v-model="fileName" @change="uploadImg"></el-input>
  17. <OcrLangList />
  18. <el-button type="primary" @click="predict">Predict</el-button>
  19. </el-container>
  20. </div>
  21. <el-row v-show="!is_pdf">
  22. <!-- 用于展示图片 -->
  23. <img id="show-img" class="show-area" />
  24. <!-- 用于存放真实图片进行文字识别 -->
  25. <img id="raw-img" style="display: none" />
  26. </el-row>
  27. <div class="pdf-preview" v-show="is_pdf">
  28. <div class="pdf-wrap">
  29. <vue-pdf-embed :source="state.source" :scale="3.0" class="vue-pdf-embed" :page="state.pageNum" />
  30. </div>
  31. <div class="page-tool">
  32. <div class="page-tool-item" @click="prePage">上一页</div>
  33. <div class="page-tool-item" @click="nextPage">下一页</div>
  34. <div class="page-tool-item">{{ state.pageNum }}/{{ state.numPages }}</div>
  35. <!-- <div class="page-tool-item" @click="pageZoomOut">放大</div>
  36. <div class="page-tool-item" @click="pageZoomIn">缩小</div> -->
  37. <el-input type="number" v-model="pdf_page"></el-input>
  38. <el-button type="primary" @click="SetPdfPage" plain>页面跳转</el-button>
  39. </div>
  40. </div>
  41. <canvas id="canvas" style="display: none;"></canvas>
  42. </el-col>
  43. <el-col :span="12" class="place">
  44. <el-row class="small-title">
  45. <h2 style="margin-right: 100;">识别结果展示</h2>
  46. <el-button type="danger" @click="submitBug" disabled>提交bug</el-button>
  47. </el-row>
  48. <el-row v-loading="loading" class="small-title">
  49. <!-- <p v-if="loading">处理中</p> -->
  50. </el-row>
  51. <div class="demo-collapse">
  52. <el-collapse v-model="activeName" accordion>
  53. <el-collapse-item title="JSON识别结果" name="1">
  54. <li v-for="(item, index) in jsonArr" :key="index">
  55. <el-scrollbar height="600px">
  56. {{ item }}
  57. </el-scrollbar>
  58. </li>
  59. </el-collapse-item>
  60. <el-collapse-item title="识别结果" name="2">
  61. <li v-for="(item, index) in htmlArr" :key="index">
  62. <el-scrollbar height="auto">
  63. <div style="display: flex;">
  64. <span v-html="item"></span>
  65. </div>
  66. </el-scrollbar>
  67. <!-- <div>
  68. <el-button type="info" :icon="DocumentCopy" @click="copy(index)" v-on:mouseover="showText()" v-on:mouseout="hideText()" plain />
  69. </div> -->
  70. </li>
  71. </el-collapse-item>
  72. </el-collapse>
  73. </div>
  74. <el-row>
  75. <section> 耗时:{{ predictTime }} ms.</section>
  76. </el-row>
  77. </el-col>
  78. </el-row>
  79. </template>
  80. <script lang='ts' setup>
  81. import { reactive, ref, toRefs, computed } from 'vue'
  82. import { onMounted } from "vue";
  83. import { TableRec, SubmitBug } from '../../../../api/api'
  84. import VuePdfEmbed from "vue-pdf-embed";
  85. import { createLoadingTask } from "vue3-pdfjs/esm"; // 获得总页数
  86. // import { useServerIpStore } from '../../../../store/ServerIp';
  87. import { storeToRefs } from 'pinia'
  88. import OcrLangList from '../../../../components/OcrLangList.vue'
  89. import { useOcrLangStore } from '../../../../store/OcrLang';
  90. import { useBugIdStore } from '../../../../store/BugID';
  91. import useClipboard from 'vue-clipboard3';
  92. import {
  93. DocumentCopy,
  94. } from '@element-plus/icons-vue'
  95. const ol = useOcrLangStore();
  96. const { ocr_lang } = storeToRefs(ol);
  97. const bi = useBugIdStore();
  98. const { bug_id } = storeToRefs(bi);
  99. // const si = useServerIpStore()
  100. // const { server_ip } = storeToRefs(si);
  101. const state = reactive({
  102. source: "", //预览pdf文件地址
  103. pageNum: 1, //当前页面
  104. // scale: 4, // 缩放比例
  105. numPages: 0, // 总页数
  106. });
  107. let loading = ref(false)
  108. let predictTime = ref(0)
  109. const fileName = ref(null);
  110. const pdf_page = ref(1);
  111. const is_pdf = ref(false);
  112. const activeName = ref('2')
  113. const pdf_img: any = ref("")
  114. const canvas = ref(null as unknown as HTMLCanvasElement);
  115. let jsonArr: any = ref([])
  116. let htmlArr: any = ref([])
  117. let loadingTask: any;
  118. onMounted(async () => {
  119. canvas.value = document.getElementById("canvas") as HTMLCanvasElement;
  120. });
  121. const uploadImg = () => {
  122. /**
  123. * 这里由于操作是绑定在 el-input 上;因此需要在内部重新获取 input 再拿到 file
  124. */
  125. const reader = new FileReader();
  126. // 用于展示
  127. const showImg = document.getElementById("show-img") as HTMLImageElement;
  128. // 用于识别
  129. const rawImg = document.getElementById("raw-img") as HTMLImageElement;
  130. const inputElement = document
  131. .getElementsByClassName("el-input")[0]
  132. .getElementsByTagName("input")[0];
  133. try {
  134. const file = inputElement.files![0];
  135. reader.onload = () => {
  136. // console.log(file.name.substring(file.name.lastIndexOf("."), file.name.length))
  137. const post_ = file.name.substring(file.name.lastIndexOf("."), file.name.length).toLowerCase();
  138. if (post_ == ".pdf") {
  139. state.pageNum = 1;
  140. // state.scale = 4;
  141. state.source = URL.createObjectURL(file);
  142. is_pdf.value = true
  143. loadingTask = createLoadingTask(state.source);
  144. getPdfImage(state.pageNum);
  145. } else if (post_ == ".jpg" || post_ == ".png" || post_ == ".bmp") {
  146. showImg.src = URL.createObjectURL(file);
  147. rawImg.src = URL.createObjectURL(file);
  148. is_pdf.value = false
  149. } else {
  150. alert('不支持的文件格式!')
  151. fileName.value = null
  152. }
  153. };
  154. reader.readAsDataURL(file);
  155. } catch (err) {
  156. console.error(err);
  157. }
  158. };
  159. const predict = async () => {
  160. if (fileName.value == undefined) {
  161. alert('请上传图片!')
  162. return;
  163. }
  164. if (ocr_lang.value == undefined || ocr_lang.value == "") {
  165. alert('请先指定语言!')
  166. return;
  167. }
  168. const inputElement = document
  169. .getElementsByClassName("el-input")[0]
  170. .getElementsByTagName("input")[0];
  171. const file = inputElement.files![0];
  172. loading.value = true
  173. var data = new FormData();
  174. data.append('lang', ocr_lang.value);
  175. // console.log('is_pdf' + is_pdf.value)
  176. if (is_pdf.value) {
  177. // console.log(res.code)
  178. // console.log(pdf_img)
  179. const file: any = convertCanvasToFile(canvas.value, "pdf.png").then(result => {
  180. // console.log(result)
  181. data.append('images', result);
  182. TableRec(data).then(res => {
  183. console.log(res.code)
  184. bug_id.value = res.response_id;
  185. predictTime.value = res.data.cost;
  186. jsonArr.value.splice(0);
  187. let tmp_json = res.data.json_items;
  188. if (tmp_json.length == 0) {
  189. alert('未检测到结果!');
  190. } else {
  191. for (let i = 0; i < tmp_json.length; i++) {
  192. jsonArr.value.push(JSON.stringify(tmp_json[i], null, 4));
  193. }
  194. htmlArr.value.splice(0);
  195. let tmp_html = res.data.html_items;
  196. for (let i = 0; i < tmp_html.length; i++) {
  197. htmlArr.value.push(tmp_html[i]);
  198. }
  199. }
  200. loading.value = false
  201. }).catch(function (err) {
  202. loading.value = false
  203. bug_id.value = ""
  204. predictTime.value = 0
  205. });
  206. })
  207. // console.log(file)
  208. // data.append('images', file.File);
  209. } else {
  210. // console.log(file)
  211. data.append('images', file);
  212. TableRec(data).then(res => {
  213. console.log(res.code)
  214. bug_id.value = res.response_id;
  215. predictTime.value = res.data.cost;
  216. jsonArr.value.splice(0);
  217. let tmp_json = res.data.json_items;
  218. for (let i = 0; i < tmp_json.length; i++) {
  219. jsonArr.value.push(JSON.stringify(tmp_json[i], null, 4));
  220. }
  221. htmlArr.value.splice(0);
  222. let tmp_html = res.data.html_items;
  223. for (let i = 0; i < tmp_html.length; i++) {
  224. htmlArr.value.push(tmp_html[i]);
  225. }
  226. loading.value = false
  227. }).catch(function (err) {
  228. loading.value = false
  229. bug_id.value = ""
  230. predictTime.value = 0
  231. });
  232. }
  233. };
  234. function SetPdfPage() {
  235. if (pdf_page.value >= 1 && pdf_page.value <= state.numPages) {
  236. console.log(pdf_page.value)
  237. state.pageNum = Number(pdf_page.value);
  238. getPdfImage(state.pageNum);
  239. } else {
  240. console.log(pdf_page.value)
  241. alert('输入的pdf页面无效!')
  242. }
  243. }
  244. let copy_text = ref("click this button to copy the html table");
  245. function showText() {
  246. copy_text.value = "click it!";
  247. }
  248. function hideText() {
  249. copy_text.value = "click this button to copy the html table";
  250. }
  251. const submitBug = async () => {
  252. if (bug_id.value == undefined || bug_id.value == "") {
  253. alert('请先预测结果!')
  254. return;
  255. }
  256. SubmitBug(bug_id.value).then(res => {
  257. console.log(res.code, res.data)
  258. }).catch(function (err) {
  259. console.log(err)
  260. // loading.value = !loading.value;
  261. });
  262. };
  263. async function convertCanvasToFile(canvas: HTMLCanvasElement, fileName: any) {
  264. // 将 Canvas 转为 Blob 对象
  265. const blob = await new Promise(resolve => canvas.toBlob(blob => {
  266. resolve(blob);
  267. }, pdf_img.value.type, 1.0));
  268. // 手动构造 File 对象
  269. let file = null;
  270. try {
  271. file = new File([pdf_img.value], fileName, { type: pdf_img.value.type });
  272. } catch (e) {
  273. // Safari 浏览器不支持直接通过 new File() 创建文件对象,需要手动构造
  274. const rawFile = blobToFile(blob, fileName);
  275. file = Object.assign(rawFile, { lastModifiedDate: new Date(), name: fileName });
  276. }
  277. return file;
  278. }
  279. // 构建 File 对象
  280. function blobToFile(blob: any, fileName: any) {
  281. blob.lastModifiedDate = new Date();
  282. blob.name = fileName;
  283. return blob;
  284. }
  285. function dataURLtoBlob(dataURL: any) {
  286. var arr = dataURL.split(','),
  287. mime = arr[0].match(/:(.*?);/)[1],
  288. bstr = atob(arr[1]),
  289. n = bstr.length,
  290. u8arr = new Uint8Array(n);
  291. while (n--) {
  292. u8arr[n] = bstr.charCodeAt(n);
  293. }
  294. return new Blob([u8arr], { type: mime });
  295. }
  296. function getPdfImage(index: number) {
  297. // console.log(index, state.pageNum)
  298. loadingTask.promise.then((pdf: any) => {
  299. state.numPages = pdf.numPages;
  300. pdf.getPage(index).then((page: any) => {
  301. const viewport = page.getViewport({ scale: 4.0 })
  302. canvas.value.height = viewport.height;
  303. canvas.value.width = viewport.width;
  304. // 画布的dom大小, 设置移动端,宽度设置铺满整个屏幕
  305. // const clientWidth = document.body.clientWidth;
  306. const destWidth = 398;
  307. canvas.value.style.width = destWidth + 'px';
  308. // 根据pdf每页的宽高比例设置canvas的高度
  309. canvas.value.style.height = destWidth * (viewport.height / viewport.width) + 'px';
  310. const ctx = canvas.value.getContext('2d');
  311. page.render({
  312. canvasContext: ctx,
  313. viewport,
  314. });
  315. canvas.value.toBlob(function (blob) {
  316. pdf_img.value = dataURLtoBlob(canvas.value.toDataURL('images/png', 1.0))
  317. // console.log(pdf_img.value);
  318. console.log(canvas.value.toDataURL(), state.pageNum)
  319. });
  320. })
  321. });
  322. }
  323. function prePage() {
  324. if (state.pageNum > 1) {
  325. state.pageNum -= 1;
  326. getPdfImage(state.pageNum);
  327. }
  328. }
  329. function nextPage() {
  330. if (state.pageNum < state.numPages) {
  331. state.pageNum += 1;
  332. getPdfImage(state.pageNum);
  333. }
  334. }
  335. const { toClipboard } = useClipboard()
  336. function copy(index: any) {
  337. try {
  338. toClipboard(htmlArr.value[index])
  339. copy_text.value = "copied!";
  340. console.log('Copied to clipboard')
  341. } catch (e) {
  342. console.error(e)
  343. }
  344. }
  345. </script>
  346. <style scoped lang="less">
  347. .small-title {
  348. justify-content: space-between;
  349. align-items: center;
  350. }
  351. .show-area {
  352. width: 100%;
  353. }
  354. .place {
  355. margin-right: auto;
  356. margin-left: auto;
  357. border-right: solid 1px #ccc;
  358. }
  359. .pdf-preview {
  360. position: relative;
  361. height: 100vh;
  362. padding: 20px 0;
  363. box-sizing: border-box;
  364. background: rgb(66, 66, 66);
  365. }
  366. .vue-pdf-embed {
  367. text-align: center;
  368. width: auto;
  369. border: 1px solid #e5e5e5;
  370. margin: 0 auto;
  371. box-sizing: border-box;
  372. }
  373. .pdf-preview {
  374. position: relative;
  375. height: 100vh;
  376. padding: 20px 0;
  377. box-sizing: border-box;
  378. background-color: e9e9e9;
  379. }
  380. .pdf-wrap {
  381. overflow-y: auto;
  382. }
  383. .vue-pdf-embed {
  384. text-align: center;
  385. width: 515px;
  386. border: 1px solid #e5e5e5;
  387. margin: 0 auto;
  388. box-sizing: border-box;
  389. }
  390. .page-tool {
  391. position: absolute;
  392. /* bottom: 35px; */
  393. /* padding-left: 15px; */
  394. /* padding-right: 15px; */
  395. display: flex;
  396. align-items: center;
  397. background: rgb(66, 66, 66);
  398. color: white;
  399. border-radius: 19px;
  400. z-index: 100;
  401. cursor: pointer;
  402. margin-left: 50%;
  403. transform: translateX(-50%);
  404. }
  405. .page-tool-item {
  406. padding: 8px 15px;
  407. padding-left: 10px;
  408. cursor: pointer;
  409. }
  410. .input_text {
  411. background-color: #F5F5F5;
  412. border: 1px solid #CCCCCC;
  413. padding: 10px;
  414. margin: 10px;
  415. border-radius: 5px;
  416. font-size: 14px;
  417. color: #333333;
  418. text-align: left;
  419. box-shadow: 0px 0px 5px 0px rgba(0, 0, 0, 0.25);
  420. }
  421. </style>