import pdfjs from 'pdfjs-dist'; import { delay } from './time'; pdfjs.GlobalWorkerOptions.workerSrc = '/static/build/pdf.worker.min.js'; pdfjs.cMapUrl = '/static/cmaps/'; pdfjs.cMapPacked = true; let normalizationRegex: any = null; const CHARACTERS_TO_NORMALIZE: { [index: string]: any } = { '\u2018': "'", // Left single quotation mark '\u2019': "'", // Right single quotation mark '\u201A': "'", // Single low-9 quotation mark '\u201B': "'", // Single high-reversed-9 quotation mark '\u201C': '"', // Left double quotation mark '\u201D': '"', // Right double quotation mark '\u201E': '"', // Double low-9 quotation mark '\u201F': '"', // Double high-reversed-9 quotation mark '\u00BC': '1/4', // Vulgar fraction one quarter '\u00BD': '1/2', // Vulgar fraction one half '\u00BE': '3/4', // Vulgar fraction three quarters }; export const fetchPdf = async ( src: string, cb?: (progress: ProgressType) => void ): Promise => { try { const loadingTask = pdfjs.getDocument({ url: src, cMapUrl: '/static/cmaps/', cMapPacked: true, }); if (cb) { loadingTask.onProgress = (progress: ProgressType): void => { cb(progress); }; } const pdf = await loadingTask.promise; return pdf; } catch (e) { console.log(e); } return {}; }; export const renderTextLayer = async ({ pdfPage, textLayer, viewport, setTextDivs, }: { pdfPage: any; textLayer: HTMLElement; viewport: ViewportType; setTextDivs?: (elements: HTMLElement[]) => void; }): Promise => { const textContent = await pdfPage.getTextContent(); const textDivs: any[] = []; await pdfjs.renderTextLayer({ textContent, container: textLayer, viewport, textDivs, }); if (setTextDivs) { setTextDivs(textDivs); } }; export const renderPdfPage = async ({ rootEle, pdfPage, viewport, setRenderTask, setTextDivs, }: { rootEle: HTMLElement; pdfPage: any; viewport: ViewportType; setRenderTask: any; setTextDivs: (elements: HTMLElement[]) => void; }): Promise => { if (rootEle) { const canvas: HTMLCanvasElement = rootEle.querySelectorAll( 'canvas' )[0] as HTMLCanvasElement; const textLayer: HTMLDivElement = rootEle.querySelector( '[data-id="text-layer"]' ) as HTMLDivElement; if (canvas) { const context: CanvasRenderingContext2D = canvas.getContext( '2d' ) as CanvasRenderingContext2D; canvas.height = viewport.height; canvas.width = viewport.width; const renderContext = { canvasContext: context, viewport, }; if (pdfPage) { const renderTask = pdfPage.render(renderContext); setRenderTask(renderTask); await renderTask.promise.catch((reason: string) => { console.log(`stopped ${reason}`); }); } textLayer.innerHTML = ''; await delay(200); await renderTextLayer({ pdfPage, textLayer, viewport, setTextDivs, }); } } }; export const normalize = (text: string): string => { if (!normalizationRegex) { // Compile the regular expression for text normalization once. const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join(''); normalizationRegex = new RegExp(`[${replace}]`, 'g'); } return text.replace(normalizationRegex, ch => CHARACTERS_TO_NORMALIZE[ch]); }; export const calculatePhraseMatch = ( pageContent: string, query: string ): number[] => { const matches = []; const queryLen = query.length; let matchIdx = -queryLen; if (pageContent) { while (query) { matchIdx = pageContent.indexOf(query, matchIdx + queryLen); if (matchIdx === -1) break; matches.push(matchIdx); } } return matches; }; export const convertMatches = ( queryString: string, matchIndex: number, textContentItem: any[] ): Record => { let i = 0; let iIndex = 0; const end = textContentItem.length - 1; const queryLen = queryString.length; // Loop over the divIdxs. while (i !== end && matchIndex >= iIndex + textContentItem[i].length) { iIndex += textContentItem[i].length; i += 1; } if (i === textContentItem.length) { console.error('Could not find a matching mapping'); } const match: Record = { begin: { divIdx: i, offset: matchIndex - iIndex, }, }; // Calculate the end position. // eslint-disable-next-line no-param-reassign matchIndex += queryLen; // Somewhat the same array as above, but use > instead of >= to get // the end position right. while (i !== end && matchIndex > iIndex + textContentItem[i].length) { iIndex += textContentItem[i].length; i += 1; } match.end = { divIdx: i, offset: matchIndex - iIndex, }; return match; }; export const getPdfPage = async (pdf: any, pageNum: number): Promise => { const page = await pdf.getPage(pageNum); return page; };