pdf.ts 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. import pdfjs from 'pdfjs-dist/es5/build/pdf.js';
  2. import { delay } from './time';
  3. pdfjs.GlobalWorkerOptions.workerSrc = '/static/build/pdf.worker.min.js';
  4. let normalizationRegex: RegExp | null = null;
  5. const CHARACTERS_TO_NORMALIZE: { [index: string]: string } = {
  6. '\u2018': "'", // Left single quotation mark
  7. '\u2019': "'", // Right single quotation mark
  8. '\u201A': "'", // Single low-9 quotation mark
  9. '\u201B': "'", // Single high-reversed-9 quotation mark
  10. '\u201C': '"', // Left double quotation mark
  11. '\u201D': '"', // Right double quotation mark
  12. '\u201E': '"', // Double low-9 quotation mark
  13. '\u201F': '"', // Double high-reversed-9 quotation mark
  14. '\u00BC': '1/4', // Vulgar fraction one quarter
  15. '\u00BD': '1/2', // Vulgar fraction one half
  16. '\u00BE': '3/4', // Vulgar fraction three quarters
  17. };
  18. export const fetchPdf = async (
  19. src: string,
  20. cb?: (progress: ProgressType) => void,
  21. ): Promise<PdfType> => {
  22. try {
  23. const loadingTask = pdfjs.getDocument({
  24. url: src,
  25. cMapUrl: '/static/cmaps/',
  26. cMapPacked: true,
  27. });
  28. if (cb) {
  29. loadingTask.onProgress = (progress: ProgressType): void => {
  30. cb(progress);
  31. };
  32. }
  33. const pdf = await loadingTask.promise;
  34. return pdf;
  35. } catch (e) {
  36. console.log(e);
  37. }
  38. return null;
  39. };
  40. export const renderTextLayer = async ({
  41. pdfPage,
  42. textLayer,
  43. viewport,
  44. setTextDivs,
  45. }: {
  46. pdfPage: PdfPageType;
  47. textLayer: HTMLElement;
  48. viewport: ViewportType;
  49. setTextDivs?: (elements: HTMLElement[]) => void;
  50. }): Promise<void> => {
  51. if (!pdfPage) return;
  52. const textContent = await pdfPage.getTextContent({
  53. normalizeWhitespace: true,
  54. });
  55. const textDivs: HTMLElement[] = [];
  56. await pdfjs.renderTextLayer({
  57. textContent,
  58. container: textLayer,
  59. viewport,
  60. textDivs,
  61. });
  62. if (setTextDivs) {
  63. setTextDivs(textDivs);
  64. }
  65. };
  66. export const renderPdfPage = async ({
  67. rootEle,
  68. pdfPage,
  69. viewport,
  70. setRenderTask,
  71. setTextDivs,
  72. }: {
  73. rootEle: HTMLElement;
  74. pdfPage: PdfPageType;
  75. viewport: ViewportType;
  76. setRenderTask: (arg0: RenderTaskType) => void;
  77. setTextDivs: (elements: HTMLElement[]) => void;
  78. }): Promise<void> => {
  79. if (rootEle) {
  80. const canvas: HTMLCanvasElement = rootEle.querySelectorAll(
  81. 'canvas',
  82. )[0] as HTMLCanvasElement;
  83. const textLayer: HTMLDivElement = rootEle.querySelector(
  84. '[data-id="text-layer"]',
  85. ) as HTMLDivElement;
  86. if (canvas) {
  87. const context: CanvasRenderingContext2D = canvas.getContext(
  88. '2d',
  89. ) as CanvasRenderingContext2D;
  90. canvas.height = viewport.height;
  91. canvas.width = viewport.width;
  92. const renderContext = {
  93. canvasContext: context,
  94. viewport,
  95. };
  96. if (pdfPage) {
  97. const renderTask = pdfPage.render(renderContext);
  98. setRenderTask(renderTask);
  99. renderTask.promise.catch((reason: string) => {
  100. console.log(`stopped ${reason}`);
  101. });
  102. }
  103. textLayer.innerHTML = '';
  104. await delay(200);
  105. await renderTextLayer({
  106. pdfPage,
  107. textLayer,
  108. viewport,
  109. setTextDivs,
  110. });
  111. }
  112. }
  113. };
  114. export const normalize = (text: string): string => {
  115. if (!normalizationRegex) {
  116. // Compile the regular expression for text normalization once.
  117. const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join('');
  118. normalizationRegex = new RegExp(`[${replace}]`, 'g');
  119. }
  120. return text.replace(normalizationRegex, (ch) => CHARACTERS_TO_NORMALIZE[ch]);
  121. };
  122. export const calculatePhraseMatch = (
  123. pageContent: string,
  124. query: string,
  125. ): number[] => {
  126. const matches = [];
  127. const queryLen = query.length;
  128. let matchIdx = -queryLen;
  129. if (pageContent) {
  130. while (query) {
  131. matchIdx = pageContent.indexOf(query, matchIdx + queryLen);
  132. if (matchIdx === -1) break;
  133. matches.push(matchIdx);
  134. }
  135. }
  136. return matches;
  137. };
  138. export const getPdfPage = async (
  139. pdf: PdfType,
  140. pageNum: number,
  141. ): Promise<PdfPageType> => {
  142. if (pdf) {
  143. const page = await pdf.getPage(pageNum);
  144. return page;
  145. }
  146. return null;
  147. };
  148. export const switchPdfViewerScrollState = (state = 'auto') => {
  149. const pdfViewer = document.getElementById('pdf_viewer') as HTMLDivElement;
  150. pdfViewer.style.overflow = state;
  151. };