123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177 |
- import { delay } from './time';
- let normalizationRegex: RegExp | null = null;
- const CHARACTERS_TO_NORMALIZE: { [index: string]: string } = {
- '\u2018': "'", // Left single quotation mark
- '\u2019': "'", // Right single quotation mark
- '\u201A': "'", // Single low-9 quotation mark
- '\u201B': "'", // Single high-reversed-9 quotation mark
- '\u201C': '"', // Left double quotation mark
- '\u201D': '"', // Right double quotation mark
- '\u201E': '"', // Double low-9 quotation mark
- '\u201F': '"', // Double high-reversed-9 quotation mark
- '\u00BC': '1/4', // Vulgar fraction one quarter
- '\u00BD': '1/2', // Vulgar fraction one half
- '\u00BE': '3/4', // Vulgar fraction three quarters
- };
- export const fetchPdf = async (
- src: string,
- cb?: (progress: ProgressType) => void,
- ): Promise<PdfType> => {
- try {
- const pdfjs = await import('pdfjs-dist/es5/build/pdf.js');
- pdfjs.GlobalWorkerOptions.workerSrc = '/static/build/pdf.worker.min.js';
- const loadingTask = pdfjs.getDocument({
- url: src,
- cMapUrl: '/static/cmaps/',
- cMapPacked: true,
- });
- if (cb) {
- loadingTask.onProgress = (progress: ProgressType): void => {
- cb(progress);
- };
- }
- const pdf = await loadingTask.promise;
- return pdf;
- } catch (e) {
- console.log(e);
- }
- return null;
- };
- export const renderTextLayer = async ({
- pdfPage,
- textLayer,
- viewport,
- setTextDivs,
- }: {
- pdfPage: PdfPageType;
- textLayer: HTMLElement;
- viewport: ViewportType;
- setTextDivs?: (elements: HTMLElement[]) => void;
- }): Promise<void> => {
- if (!pdfPage) return;
- const pdfjs = await import('pdfjs-dist/es5/build/pdf.js');
- pdfjs.GlobalWorkerOptions.workerSrc = '/static/build/pdf.worker.min.js';
- const textContent = await pdfPage.getTextContent({
- normalizeWhitespace: true,
- });
- const textDivs: HTMLElement[] = [];
- await pdfjs.renderTextLayer({
- textContent,
- container: textLayer,
- viewport,
- textDivs,
- });
- if (setTextDivs) {
- setTextDivs(textDivs);
- }
- };
- export const renderPdfPage = async ({
- rootEle,
- pdfPage,
- viewport,
- setRenderTask,
- setTextDivs,
- }: {
- rootEle: HTMLElement;
- pdfPage: PdfPageType;
- viewport: ViewportType;
- setRenderTask: (arg0: RenderTaskType) => void;
- setTextDivs: (elements: HTMLElement[]) => void;
- }): Promise<void> => {
- if (rootEle) {
- const canvas: HTMLCanvasElement = rootEle.querySelectorAll(
- 'canvas',
- )[0] as HTMLCanvasElement;
- const textLayer: HTMLDivElement = rootEle.querySelector(
- '[data-id="text-layer"]',
- ) as HTMLDivElement;
- if (canvas) {
- const context: CanvasRenderingContext2D = canvas.getContext(
- '2d',
- ) as CanvasRenderingContext2D;
- canvas.height = viewport.height;
- canvas.width = viewport.width;
- const renderContext = {
- canvasContext: context,
- viewport,
- };
- if (pdfPage) {
- const renderTask = pdfPage.render(renderContext);
- setRenderTask(renderTask);
- renderTask.promise.catch((reason: string) => {
- console.log(`stopped ${reason}`);
- });
- }
- textLayer.innerHTML = '';
- await delay(200);
- await renderTextLayer({
- pdfPage,
- textLayer,
- viewport,
- setTextDivs,
- });
- }
- }
- };
- export const normalize = (text: string): string => {
- if (!normalizationRegex) {
- // Compile the regular expression for text normalization once.
- const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join('');
- normalizationRegex = new RegExp(`[${replace}]`, 'g');
- }
- return text.replace(normalizationRegex, (ch) => CHARACTERS_TO_NORMALIZE[ch]);
- };
- export const calculatePhraseMatch = (
- pageContent: string,
- query: string,
- ): number[] => {
- const matches = [];
- const queryLen = query.length;
- let matchIdx = -queryLen;
- if (pageContent) {
- while (query) {
- matchIdx = pageContent.indexOf(query, matchIdx + queryLen);
- if (matchIdx === -1) break;
- matches.push(matchIdx);
- }
- }
- return matches;
- };
- export const getPdfPage = async (
- pdf: PdfType,
- pageNum: number,
- ): Promise<PdfPageType> => {
- if (pdf) {
- const page = await pdf.getPage(pageNum);
- return page;
- }
- return null;
- };
- export const switchPdfViewerScrollState = (state = 'auto') => {
- const pdfViewer = document.getElementById('pdf_viewer') as HTMLDivElement;
- pdfViewer.style.overflow = state;
- };
|