pdf.ts 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159
  1. // @ts-ignore
  2. import pdfjs from 'pdfjs-dist';
  3. import { ProgressType, ViewportType } from '../constants/type';
  4. import { objIsEmpty } from './utility';
  5. pdfjs.GlobalWorkerOptions.workerSrc = '../static/pdf.worker.js';
  6. let normalizationRegex: any = null;
  7. const CHARACTERS_TO_NORMALIZE: {[index: string]: any} = {
  8. '\u2018': '\'', // Left single quotation mark
  9. '\u2019': '\'', // Right single quotation mark
  10. '\u201A': '\'', // Single low-9 quotation mark
  11. '\u201B': '\'', // Single high-reversed-9 quotation mark
  12. '\u201C': '"', // Left double quotation mark
  13. '\u201D': '"', // Right double quotation mark
  14. '\u201E': '"', // Double low-9 quotation mark
  15. '\u201F': '"', // Double high-reversed-9 quotation mark
  16. '\u00BC': '1/4', // Vulgar fraction one quarter
  17. '\u00BD': '1/2', // Vulgar fraction one half
  18. '\u00BE': '3/4', // Vulgar fraction three quarters
  19. };
  20. export const fetchPdf = async (
  21. src: string, cb?: (progress: ProgressType) => void,
  22. ): Promise<any> => {
  23. try {
  24. const loadingTask = pdfjs.getDocument({
  25. url: src,
  26. });
  27. if (cb) {
  28. loadingTask.onProgress = (progress: ProgressType): void => {
  29. cb(progress);
  30. };
  31. }
  32. const pdf = await loadingTask.promise;
  33. return pdf;
  34. } catch (e) {
  35. console.log(e);
  36. }
  37. return {};
  38. };
  39. export const renderPdfPage = async ({
  40. rootEle,
  41. page,
  42. viewport,
  43. }: {
  44. rootEle: HTMLElement;
  45. page: any;
  46. viewport: ViewportType;
  47. }): Promise<any> => {
  48. if (rootEle) {
  49. const canvas: HTMLCanvasElement = rootEle.querySelectorAll('canvas')[0] as HTMLCanvasElement;
  50. const textLayer: HTMLDivElement = rootEle.querySelector('[data-id="text-layer"]') as HTMLDivElement;
  51. if (canvas) {
  52. const context: CanvasRenderingContext2D = canvas.getContext('2d') as CanvasRenderingContext2D;
  53. canvas.height = viewport.height;
  54. canvas.width = viewport.width;
  55. const renderContext = {
  56. canvasContext: context,
  57. viewport,
  58. };
  59. if (!objIsEmpty(page)) {
  60. const renderTask = page.render(renderContext);
  61. textLayer.innerHTML = '';
  62. page.getTextContent().then((textContent: any) => {
  63. pdfjs.renderTextLayer({
  64. textContent,
  65. container: textLayer,
  66. viewport,
  67. textDivs: [],
  68. });
  69. });
  70. await renderTask.promise;
  71. }
  72. }
  73. }
  74. };
  75. export const normalize = (text: string): string => {
  76. if (!normalizationRegex) {
  77. // Compile the regular expression for text normalization once.
  78. const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join('');
  79. normalizationRegex = new RegExp(`[${replace}]`, 'g');
  80. }
  81. return text.replace(normalizationRegex, ch => CHARACTERS_TO_NORMALIZE[ch]);
  82. };
  83. export const calcFindPhraseMatch = (pageContent: string, query: string): number[] => {
  84. const matches = [];
  85. const queryLen = query.length;
  86. let matchIdx = -queryLen;
  87. while (query) {
  88. matchIdx = pageContent.indexOf(query, matchIdx + queryLen);
  89. if (matchIdx === -1) break;
  90. matches.push(matchIdx);
  91. }
  92. return matches;
  93. };
  94. export const convertMatches = (
  95. queryString: string,
  96. matchIndex: number,
  97. textContentItem: any[],
  98. ): Record<string, any> => {
  99. let i = 0;
  100. let iIndex = 0;
  101. const end = textContentItem.length - 1;
  102. const queryLen = queryString.length;
  103. // Loop over the divIdxs.
  104. while (i !== end && matchIndex >= (iIndex + textContentItem[i].length)) {
  105. iIndex += textContentItem[i].length;
  106. i += 1;
  107. }
  108. if (i === textContentItem.length) {
  109. console.error('Could not find a matching mapping');
  110. }
  111. const match: Record<string, any> = {
  112. begin: {
  113. divIdx: i,
  114. offset: matchIndex - iIndex,
  115. },
  116. };
  117. // Calculate the end position.
  118. // eslint-disable-next-line no-param-reassign
  119. matchIndex += queryLen;
  120. // Somewhat the same array as above, but use > instead of >= to get
  121. // the end position right.
  122. while (i !== end && matchIndex > (iIndex + textContentItem[i].length)) {
  123. iIndex += textContentItem[i].length;
  124. i += 1;
  125. }
  126. match.end = {
  127. divIdx: i,
  128. offset: matchIndex - iIndex,
  129. };
  130. return match;
  131. };
  132. export const getPdfPage = async (pdf: any, pageNum: number): Promise<any> => {
  133. const page = await pdf.getPage(pageNum);
  134. return page;
  135. };