pdf.ts 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. import pdfjs from 'pdfjs-dist';
  2. import { delay } from './time';
  3. pdfjs.GlobalWorkerOptions.workerSrc = '/static/build/pdf.worker.min.js';
  4. pdfjs.cMapUrl = '/static/cmaps/';
  5. pdfjs.cMapPacked = true;
  6. let normalizationRegex: any = null;
  7. const CHARACTERS_TO_NORMALIZE: { [index: string]: any } = {
  8. '\u2018': "'", // Left single quotation mark
  9. '\u2019': "'", // Right single quotation mark
  10. '\u201A': "'", // Single low-9 quotation mark
  11. '\u201B': "'", // Single high-reversed-9 quotation mark
  12. '\u201C': '"', // Left double quotation mark
  13. '\u201D': '"', // Right double quotation mark
  14. '\u201E': '"', // Double low-9 quotation mark
  15. '\u201F': '"', // Double high-reversed-9 quotation mark
  16. '\u00BC': '1/4', // Vulgar fraction one quarter
  17. '\u00BD': '1/2', // Vulgar fraction one half
  18. '\u00BE': '3/4', // Vulgar fraction three quarters
  19. };
  20. export const fetchPdf = async (
  21. src: string,
  22. cb?: (progress: ProgressType) => void
  23. ): Promise<any> => {
  24. try {
  25. const loadingTask = pdfjs.getDocument({
  26. url: src,
  27. cMapUrl: '/static/cmaps/',
  28. cMapPacked: true,
  29. });
  30. if (cb) {
  31. loadingTask.onProgress = (progress: ProgressType): void => {
  32. cb(progress);
  33. };
  34. }
  35. const pdf = await loadingTask.promise;
  36. return pdf;
  37. } catch (e) {
  38. console.log(e);
  39. }
  40. return {};
  41. };
  42. export const renderTextLayer = async ({
  43. pdfPage,
  44. textLayer,
  45. viewport,
  46. setTextDivs,
  47. }: {
  48. pdfPage: any;
  49. textLayer: HTMLElement;
  50. viewport: ViewportType;
  51. setTextDivs?: (elements: HTMLElement[]) => void;
  52. }): Promise<any> => {
  53. const textContent = await pdfPage.getTextContent();
  54. const textDivs: any[] = [];
  55. await pdfjs.renderTextLayer({
  56. textContent,
  57. container: textLayer,
  58. viewport,
  59. textDivs,
  60. });
  61. if (setTextDivs) {
  62. setTextDivs(textDivs);
  63. }
  64. };
  65. export const renderPdfPage = async ({
  66. rootEle,
  67. pdfPage,
  68. viewport,
  69. setRenderTask,
  70. setTextDivs,
  71. }: {
  72. rootEle: HTMLElement;
  73. pdfPage: any;
  74. viewport: ViewportType;
  75. setRenderTask: any;
  76. setTextDivs: (elements: HTMLElement[]) => void;
  77. }): Promise<any> => {
  78. if (rootEle) {
  79. const canvas: HTMLCanvasElement = rootEle.querySelectorAll(
  80. 'canvas'
  81. )[0] as HTMLCanvasElement;
  82. const textLayer: HTMLDivElement = rootEle.querySelector(
  83. '[data-id="text-layer"]'
  84. ) as HTMLDivElement;
  85. if (canvas) {
  86. const context: CanvasRenderingContext2D = canvas.getContext(
  87. '2d'
  88. ) as CanvasRenderingContext2D;
  89. canvas.height = viewport.height;
  90. canvas.width = viewport.width;
  91. const renderContext = {
  92. canvasContext: context,
  93. viewport,
  94. };
  95. if (pdfPage) {
  96. const renderTask = pdfPage.render(renderContext);
  97. setRenderTask(renderTask);
  98. await renderTask.promise.catch((reason: string) => {
  99. console.log(`stopped ${reason}`);
  100. });
  101. }
  102. textLayer.innerHTML = '';
  103. await delay(200);
  104. await renderTextLayer({
  105. pdfPage,
  106. textLayer,
  107. viewport,
  108. setTextDivs,
  109. });
  110. }
  111. }
  112. };
  113. export const normalize = (text: string): string => {
  114. if (!normalizationRegex) {
  115. // Compile the regular expression for text normalization once.
  116. const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join('');
  117. normalizationRegex = new RegExp(`[${replace}]`, 'g');
  118. }
  119. return text.replace(normalizationRegex, ch => CHARACTERS_TO_NORMALIZE[ch]);
  120. };
  121. export const calculatePhraseMatch = (
  122. pageContent: string,
  123. query: string
  124. ): number[] => {
  125. const matches = [];
  126. const queryLen = query.length;
  127. let matchIdx = -queryLen;
  128. if (pageContent) {
  129. while (query) {
  130. matchIdx = pageContent.indexOf(query, matchIdx + queryLen);
  131. if (matchIdx === -1) break;
  132. matches.push(matchIdx);
  133. }
  134. }
  135. return matches;
  136. };
  137. export const getPdfPage = async (pdf: any, pageNum: number): Promise<any> => {
  138. const page = await pdf.getPage(pageNum);
  139. return page;
  140. };
  141. export const switchPdfViewerScrollState = (state = 'auto') => {
  142. const pdfViewer = document.getElementById('pdf_viewer') as HTMLDivElement;
  143. pdfViewer.style.overflow = state;
  144. };