import pdfjs from 'pdfjs-dist'; import { delay } from './time'; pdfjs.GlobalWorkerOptions.workerSrc = '/static/build/pdf.worker.min.js'; pdfjs.cMapUrl = '/static/cmaps/'; pdfjs.cMapPacked = true; let normalizationRegex: any = null; const CHARACTERS_TO_NORMALIZE: { [index: string]: any } = { '\u2018': "'", // Left single quotation mark '\u2019': "'", // Right single quotation mark '\u201A': "'", // Single low-9 quotation mark '\u201B': "'", // Single high-reversed-9 quotation mark '\u201C': '"', // Left double quotation mark '\u201D': '"', // Right double quotation mark '\u201E': '"', // Double low-9 quotation mark '\u201F': '"', // Double high-reversed-9 quotation mark '\u00BC': '1/4', // Vulgar fraction one quarter '\u00BD': '1/2', // Vulgar fraction one half '\u00BE': '3/4', // Vulgar fraction three quarters }; export const fetchPdf = async ( src: string, cb?: (progress: ProgressType) => void ): Promise => { try { const loadingTask = pdfjs.getDocument({ url: src, cMapUrl: '/static/cmaps/', cMapPacked: true, }); if (cb) { loadingTask.onProgress = (progress: ProgressType): void => { cb(progress); }; } const pdf = await loadingTask.promise; return pdf; } catch (e) { console.log(e); } return {}; }; export const renderTextLayer = async ({ pdfPage, textLayer, viewport, setTextDivs, }: { pdfPage: any; textLayer: HTMLElement; viewport: ViewportType; setTextDivs?: (elements: HTMLElement[]) => void; }): Promise => { const textContent = await pdfPage.getTextContent(); const textDivs: any[] = []; await pdfjs.renderTextLayer({ textContent, container: textLayer, viewport, textDivs, }); if (setTextDivs) { setTextDivs(textDivs); } }; export const renderPdfPage = async ({ rootEle, pdfPage, viewport, setRenderTask, setTextDivs, }: { rootEle: HTMLElement; pdfPage: any; viewport: ViewportType; setRenderTask: any; setTextDivs: (elements: HTMLElement[]) => void; }): Promise => { if (rootEle) { const canvas: HTMLCanvasElement = rootEle.querySelectorAll( 'canvas' )[0] as HTMLCanvasElement; const textLayer: HTMLDivElement = rootEle.querySelector( '[data-id="text-layer"]' ) as HTMLDivElement; if (canvas) { const context: CanvasRenderingContext2D = canvas.getContext( '2d' ) as CanvasRenderingContext2D; canvas.height = viewport.height; canvas.width = viewport.width; const renderContext = { canvasContext: context, viewport, }; if (pdfPage) { const renderTask = pdfPage.render(renderContext); setRenderTask(renderTask); await renderTask.promise.catch((reason: string) => { console.log(`stopped ${reason}`); }); } textLayer.innerHTML = ''; await delay(200); await renderTextLayer({ pdfPage, textLayer, viewport, setTextDivs, }); } } }; export const normalize = (text: string): string => { if (!normalizationRegex) { // Compile the regular expression for text normalization once. const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join(''); normalizationRegex = new RegExp(`[${replace}]`, 'g'); } return text.replace(normalizationRegex, ch => CHARACTERS_TO_NORMALIZE[ch]); }; export const calculatePhraseMatch = ( pageContent: string, query: string ): number[] => { const matches = []; const queryLen = query.length; let matchIdx = -queryLen; if (pageContent) { while (query) { matchIdx = pageContent.indexOf(query, matchIdx + queryLen); if (matchIdx === -1) break; matches.push(matchIdx); } } return matches; }; export const getPdfPage = async (pdf: any, pageNum: number): Promise => { const page = await pdf.getPage(pageNum); return page; }; export const switchPdfViewerScrollState = (state = 'auto') => { const pdfViewer = document.getElementById('pdf_viewer') as HTMLDivElement; pdfViewer.style.overflow = state; };