123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182 |
- // @ts-ignore
- import pdfjs from 'pdfjs-dist';
- // @ts-ignore
- import pdfjsWorker from 'pdfjs-dist/build/pdf.worker.entry';
- import { ProgressType, ViewportType } from '../constants/type';
- import { objIsEmpty } from './utility';
- import { delay } from './time';
- pdfjs.GlobalWorkerOptions.workerSrc = pdfjsWorker;
- let normalizationRegex: any = null;
- const CHARACTERS_TO_NORMALIZE: {[index: string]: any} = {
- '\u2018': '\'', // Left single quotation mark
- '\u2019': '\'', // Right single quotation mark
- '\u201A': '\'', // Single low-9 quotation mark
- '\u201B': '\'', // Single high-reversed-9 quotation mark
- '\u201C': '"', // Left double quotation mark
- '\u201D': '"', // Right double quotation mark
- '\u201E': '"', // Double low-9 quotation mark
- '\u201F': '"', // Double high-reversed-9 quotation mark
- '\u00BC': '1/4', // Vulgar fraction one quarter
- '\u00BD': '1/2', // Vulgar fraction one half
- '\u00BE': '3/4', // Vulgar fraction three quarters
- };
- export const fetchPdf = async (
- src: string, cb?: (progress: ProgressType) => void,
- ): Promise<any> => {
- try {
- const loadingTask = pdfjs.getDocument({
- url: src,
- });
- if (cb) {
- loadingTask.onProgress = (progress: ProgressType): void => {
- cb(progress);
- };
- }
- const pdf = await loadingTask.promise;
- return pdf;
- } catch (e) {
- console.log(e);
- }
- return {};
- };
- export const renderTextLayer = async ({
- pdfPage,
- textLayer,
- viewport,
- }: {
- pdfPage: any;
- textLayer: HTMLElement;
- viewport: ViewportType;
- }): Promise<any> => {
- const textContent = await pdfPage.getTextContent();
- pdfjs.renderTextLayer({
- textContent,
- container: textLayer,
- viewport,
- textDivs: [],
- });
- };
- export const renderPdfPage = async ({
- rootEle,
- pdfPage,
- viewport,
- }: {
- rootEle: HTMLElement;
- pdfPage: any;
- viewport: ViewportType;
- }): Promise<any> => {
- if (rootEle) {
- const canvas: HTMLCanvasElement = rootEle.querySelectorAll('canvas')[0] as HTMLCanvasElement;
- const textLayer: HTMLDivElement = rootEle.querySelector('[data-id="text-layer"]') as HTMLDivElement;
- if (canvas) {
- const context: CanvasRenderingContext2D = canvas.getContext('2d') as CanvasRenderingContext2D;
- canvas.height = viewport.height;
- canvas.width = viewport.width;
- const renderContext = {
- canvasContext: context,
- viewport,
- };
- if (!objIsEmpty(pdfPage)) {
- const renderTask = pdfPage.render(renderContext);
- await renderTask.promise;
- }
- textLayer.innerHTML = '';
- await delay(200);
- await renderTextLayer({
- pdfPage,
- textLayer,
- viewport,
- });
- }
- }
- };
- export const normalize = (text: string): string => {
- if (!normalizationRegex) {
- // Compile the regular expression for text normalization once.
- const replace = Object.keys(CHARACTERS_TO_NORMALIZE).join('');
- normalizationRegex = new RegExp(`[${replace}]`, 'g');
- }
- return text.replace(normalizationRegex, ch => CHARACTERS_TO_NORMALIZE[ch]);
- };
- export const calcFindPhraseMatch = (pageContent: string, query: string): number[] => {
- const matches = [];
- const queryLen = query.length;
- let matchIdx = -queryLen;
- if (pageContent) {
- while (query) {
- matchIdx = pageContent.indexOf(query, matchIdx + queryLen);
- if (matchIdx === -1) break;
- matches.push(matchIdx);
- }
- }
- return matches;
- };
- export const convertMatches = (
- queryString: string,
- matchIndex: number,
- textContentItem: any[],
- ): Record<string, any> => {
- let i = 0;
- let iIndex = 0;
- const end = textContentItem.length - 1;
- const queryLen = queryString.length;
- // Loop over the divIdxs.
- while (i !== end && matchIndex >= (iIndex + textContentItem[i].length)) {
- iIndex += textContentItem[i].length;
- i += 1;
- }
- if (i === textContentItem.length) {
- console.error('Could not find a matching mapping');
- }
- const match: Record<string, any> = {
- begin: {
- divIdx: i,
- offset: matchIndex - iIndex,
- },
- };
- // Calculate the end position.
- // eslint-disable-next-line no-param-reassign
- matchIndex += queryLen;
- // Somewhat the same array as above, but use > instead of >= to get
- // the end position right.
- while (i !== end && matchIndex > (iIndex + textContentItem[i].length)) {
- iIndex += textContentItem[i].length;
- i += 1;
- }
- match.end = {
- divIdx: i,
- offset: matchIndex - iIndex,
- };
- return match;
- };
- export const getPdfPage = async (pdf: any, pageNum: number): Promise<any> => {
- const page = await pdf.getPage(pageNum);
- return page;
- };
|