pdf.ts 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177
  1. import { delay } from './time';
  2. let normalizationRegex: RegExp | null = null;
  3. const CHARACTERS_TO_NORMALIZE: { [index: string]: string } = {
  4. '\u2018': "'", // Left single quotation mark
  5. '\u2019': "'", // Right single quotation mark
  6. '\u201A': "'", // Single low-9 quotation mark
  7. '\u201B': "'", // Single high-reversed-9 quotation mark
  8. '\u201C': '"', // Left double quotation mark
  9. '\u201D': '"', // Right double quotation mark
  10. '\u201E': '"', // Double low-9 quotation mark
  11. '\u201F': '"', // Double high-reversed-9 quotation mark
  12. '\u00BC': '1/4', // Vulgar fraction one quarter
  13. '\u00BD': '1/2', // Vulgar fraction one half
  14. '\u00BE': '3/4', // Vulgar fraction three quarters
  15. };
  16. export const fetchPdf = async (
  17. src: string,
  18. cb?: (progress: ProgressType) => void,
  19. ): Promise<PdfType> => {
  20. try {
  21. const pdfjs = await import('pdfjs-dist/es5/build/pdf.js');
  22. pdfjs.GlobalWorkerOptions.workerSrc = '/static/build/pdf.worker.min.js';
  23. const loadingTask = pdfjs.getDocument({
  24. url: src,
  25. cMapUrl: '/static/cmaps/',
  26. cMapPacked: true,
  27. });
  28. if (cb) {
  29. loadingTask.onProgress = (progress: ProgressType): void => {
  30. cb(progress);
  31. };
  32. }
  33. const pdf = await loadingTask.promise;
  34. return pdf;
  35. } catch (e) {
  36. console.log(e);
  37. }
  38. return null;
  39. };
  40. export const renderTextLayer = async ({
  41. pdfPage,
  42. textLayer,
  43. viewport,
  44. setTextDivs,
  45. }: {
  46. pdfPage: PdfPageType;
  47. textLayer: HTMLElement;
  48. viewport: ViewportType;
  49. setTextDivs?: (elements: HTMLElement[]) => void;
  50. }): Promise<void> => {
  51. if (!pdfPage) return;
  52. const pdfjs = await import('pdfjs-dist/es5/build/pdf.js');
  53. pdfjs.GlobalWorkerOptions.workerSrc = '/static/build/pdf.worker.min.js';
  54. const textContent = await pdfPage.getTextContent({
  55. normalizeWhitespace: true,
  56. });
  57. const textDivs: HTMLElement[] = [];
  58. await pdfjs.renderTextLayer({
  59. textContent,
  60. container: textLayer,
  61. viewport,
  62. textDivs,
  63. });
  64. if (setTextDivs) {
  65. setTextDivs(textDivs);
  66. }
  67. };
  68. export const renderPdfPage = async ({
  69. rootEle,
  70. pdfPage,
  71. viewport,
  72. setRenderTask,
  73. setTextDivs,
  74. }: {
  75. rootEle: HTMLElement;
  76. pdfPage: PdfPageType;
  77. viewport: ViewportType;
  78. setRenderTask: (arg0: RenderTaskType) => void;
  79. setTextDivs: (elements: HTMLElement[]) => void;
  80. }): Promise<void> => {
  81. if (rootEle) {
  82. const canvas: HTMLCanvasElement = rootEle.querySelectorAll(
  83. 'canvas',
  84. )[0] as HTMLCanvasElement;
  85. const textLayer: HTMLDivElement = rootEle.querySelector(
  86. '[data-id="text-layer"]',
  87. ) as HTMLDivElement;
  88. if (canvas) {
  89. const context: CanvasRenderingContext2D = canvas.getContext(
  90. '2d',
  91. ) as CanvasRenderingContext2D;
  92. canvas.height = viewport.height;
  93. canvas.width = viewport.width;
  94. const renderContext = {
  95. canvasContext: context,
  96. viewport,
  97. };
  98. if (pdfPage) {
  99. const renderTask = pdfPage.render(renderContext);
  100. setRenderTask(renderTask);
  101. renderTask.promise.catch((reason: string) => {
  102. console.log(`stopped ${reason}`);
  103. });
  104. }
  105. textLayer.innerHTML = '';
  106. await delay(200);
  107. await renderTextLayer({
  108. pdfPage,
  109. textLayer,
  110. viewport,
  111. setTextDivs,
  112. });
  113. }
  114. }
  115. };
  116. export const normalize = (text: string): string => {
  117. if (!normalizationRegex) {
  118. // Compile the regular expression for text normalization once.
  119. const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join('');
  120. normalizationRegex = new RegExp(`[${replace}]`, 'g');
  121. }
  122. return text.replace(normalizationRegex, (ch) => CHARACTERS_TO_NORMALIZE[ch]);
  123. };
  124. export const calculatePhraseMatch = (
  125. pageContent: string,
  126. query: string,
  127. ): number[] => {
  128. const matches = [];
  129. const queryLen = query.length;
  130. let matchIdx = -queryLen;
  131. if (pageContent) {
  132. while (query) {
  133. matchIdx = pageContent.indexOf(query, matchIdx + queryLen);
  134. if (matchIdx === -1) break;
  135. matches.push(matchIdx);
  136. }
  137. }
  138. return matches;
  139. };
  140. export const getPdfPage = async (
  141. pdf: PdfType,
  142. pageNum: number,
  143. ): Promise<PdfPageType> => {
  144. if (pdf) {
  145. const page = await pdf.getPage(pageNum);
  146. return page;
  147. }
  148. return null;
  149. };
  150. export const switchPdfViewerScrollState = (state = 'auto') => {
  151. const pdfViewer = document.getElementById('pdf_viewer') as HTMLDivElement;
  152. pdfViewer.style.overflow = state;
  153. };