Kaynağa Gözat

optimize search feature

RoyLiu 4 yıl önce
ebeveyn
işleme
57911da94f

+ 2 - 0
actions/pdf.ts

@@ -21,6 +21,8 @@ const actions: ActionType = dispatch => ({
     dispatch({ type: types.UPDATE_ANNOTS, payload: annotations }),
   updateWatermark: (watermark: WatermarkType): void =>
     dispatch({ type: types.UPDATE_WATERMARK, payload: watermark }),
+  setTextDivs: (pageNum: number, elements: HTMLElement[]): void =>
+    dispatch({ type: types.SET_TEXT_DIV, payload: { [pageNum]: elements } }),
 });
 
 export default actions;

+ 12 - 1
components/Page/index.tsx

@@ -21,6 +21,8 @@ type Props = {
   annotations?: React.ReactNode[];
   drawing?: React.ReactNode[];
   watermark?: WatermarkType;
+  setTextDivs: (pageNum: number, elements: HTMLElement[]) => void;
+  currentPage: number;
 };
 
 const PageView: React.FC<Props> = ({
@@ -32,6 +34,8 @@ const PageView: React.FC<Props> = ({
   scale,
   annotations = [],
   watermark = {},
+  currentPage,
+  setTextDivs,
 }: Props) => {
   const rootEle = useRef<HTMLDivElement | null>(null);
   const [pdfPage, setPdfPage] = useState<any>(null);
@@ -42,12 +46,19 @@ const PageView: React.FC<Props> = ({
       getPage().then(obj => {
         setPdfPage(obj);
 
+        const setTextDivsWithPage = (elements: HTMLElement[]) => {
+          if (currentPage === pageNum) {
+            setTextDivs(pageNum, elements);
+          }
+        };
+
         if (rootEle.current) {
           renderPdfPage({
             rootEle: rootEle.current,
             pdfPage: obj,
             viewport,
             setRenderTask,
+            setTextDivs: setTextDivsWithPage,
           });
         }
       });
@@ -65,7 +76,7 @@ const PageView: React.FC<Props> = ({
     if (renderingState === 'RENDERING') {
       renderPage();
     }
-  }, [renderingState, viewport]);
+  }, [currentPage, renderingState, viewport]);
 
   return (
     <PageWrapper

+ 2 - 0
constants/actionTypes.ts

@@ -16,3 +16,5 @@ export const CHANGE_ROTATE = 'CHANGE_ROTATE';
 export const ADD_ANNOTS = 'ADD_ANNOTS';
 export const UPDATE_ANNOTS = 'UPDATE_ANNOTS';
 export const UPDATE_WATERMARK = 'UPDATE_WATERMARK';
+
+export const SET_TEXT_DIV = 'SET_TEXT_DIV';

+ 6 - 1
containers/PdfPage.tsx

@@ -4,6 +4,7 @@ import Page from '../components/Page';
 import Annotation from './Annotation';
 import { getPdfPage } from '../helpers/pdf';
 
+import useActions from '../actions';
 import useStore from '../store';
 
 type Props = {
@@ -13,8 +14,10 @@ type Props = {
 
 const PdfPage: React.FC<Props> = ({ index, renderingState }: Props) => {
   const [
-    { viewport, pdf, rotation, annotations, scale, watermark },
+    { viewport, pdf, rotation, annotations, scale, watermark, currentPage },
+    dispatch,
   ] = useStore();
+  const { setTextDivs } = useActions(dispatch);
 
   const getAnnotationWithPage = (
     arr: AnnotationType[],
@@ -45,6 +48,8 @@ const PdfPage: React.FC<Props> = ({ index, renderingState }: Props) => {
       rotation={rotation}
       watermark={watermark}
       annotations={getAnnotationWithPage(annotations, index)}
+      setTextDivs={setTextDivs}
+      currentPage={currentPage}
     />
   );
 };

+ 162 - 21
containers/Search.tsx

@@ -3,7 +3,11 @@ import React, { useState, useEffect } from 'react';
 import useActions from '../actions';
 import useStore from '../store';
 import SearchComponent from '../components/Search';
-import { normalize, calcFindPhraseMatch } from '../helpers/pdf';
+import {
+  normalize,
+  calculatePhraseMatch,
+  convertMatches,
+} from '../helpers/pdf';
 import { scrollIntoView } from '../helpers/utility';
 
 type MatchType = {
@@ -12,34 +16,44 @@ type MatchType = {
 };
 
 const Search: React.FC = () => {
-  let queryString = '';
+  const [queryString, setQueryString] = useState('');
   const [matchesMap, setMatchesMap] = useState<MatchType[]>([]);
   const [matchTotal, setMatchTotal] = useState(0);
+  const [prevIndex, setPrevIndex] = useState(-1);
   const [currentIndex, setCurrentIndex] = useState(-1);
-  const [{ navbarState, pdf, totalPage }, dispatch] = useStore();
+  const [{ navbarState, pdf, totalPage, textDivs }, dispatch] = useStore();
   const { setNavbar } = useActions(dispatch);
 
-  const extractTextItems = async (pageNum: number): Promise<string[]> => {
+  const getTextContent = async (pageNum: number): Promise<any[]> => {
     const page = await pdf.getPage(pageNum);
     const textContent = await page.getTextContent({
       normalizeWhitespace: true,
     });
-    const { items } = textContent;
+
+    return textContent.items;
+  };
+
+  const extractTextItems = async (pageNum: number): Promise<string[]> => {
+    const textContent = await getTextContent(pageNum);
     const strBuf = [];
 
-    for (let j = 0, len = items.length; j < len; j += 1) {
-      if (items[j].str.match(/[^\s]/)) {
-        strBuf.push(items[j].str);
+    for (let j = 0, len = textContent.length; j < len; j += 1) {
+      if (textContent[j].str) {
+        strBuf.push(textContent[j].str);
       }
     }
 
     return strBuf;
   };
 
-  const getMatchTextIndex = async (pageNum: number): Promise<void> => {
+  const getMatchTextIndex = async (
+    pageNum: number,
+    queryStr: string
+  ): Promise<void> => {
     const contentItems = await extractTextItems(pageNum);
     const content = normalize(contentItems.join('').toLowerCase());
-    const matches = calcFindPhraseMatch(content, queryString);
+    const matches = calculatePhraseMatch(content, queryStr);
+
     if (matches.length) {
       matches.forEach(ele => {
         matchesMap.push({
@@ -53,40 +67,144 @@ const Search: React.FC = () => {
     }
 
     if (pageNum < totalPage) {
-      getMatchTextIndex(pageNum + 1);
+      await getMatchTextIndex(pageNum + 1, queryStr);
+    }
+  };
+
+  const startSearchPdf = async (queryStr: string): Promise<void> => {
+    getMatchTextIndex(1, queryStr);
+  };
+
+  const appendTextToDiv = async (
+    pageNum: number,
+    divIdx: number,
+    fromOffset: number,
+    toOffset: number | undefined,
+    highlight: boolean
+  ): Promise<any> => {
+    const textContentItem = await extractTextItems(pageNum);
+
+    if (textDivs[pageNum]) {
+      const domElements = textDivs[pageNum][divIdx];
+
+      const content = textContentItem[divIdx].substring(fromOffset, toOffset);
+      const node = document.createTextNode(content);
+      const span = document.createElement('span');
+      if (highlight) {
+        span.style.backgroundColor = 'rgba(255, 211, 0, 0.7)';
+        span.appendChild(node);
+        domElements.appendChild(span);
+        scrollIntoView(domElements, { top: -120 });
+      } else {
+        domElements.textContent = '';
+        domElements.appendChild(node);
+      }
     }
   };
 
-  const searchPdfPages = async (): Promise<void> => {
-    getMatchTextIndex(1);
+  const beginText = async (
+    pageNum: number,
+    begin: Record<string, any>
+  ): Promise<any> => {
+    const { divIdx } = begin;
+    const domElements = textDivs[pageNum][divIdx];
+    if (domElements) {
+      domElements.textContent = '';
+      appendTextToDiv(pageNum, divIdx, 0, begin.offset, false);
+    }
+  };
+
+  const cleanMatches = async (
+    pageNum: number,
+    matchIndex: number,
+    queryStr: string
+  ): Promise<any> => {
+    const textContentItem = await extractTextItems(pageNum);
+    const { begin, end } = convertMatches(
+      queryStr,
+      matchIndex,
+      textContentItem
+    );
+
+    for (let i = begin.divIdx; i <= end.divIdx; i += 1) {
+      appendTextToDiv(pageNum, i, 0, undefined, false);
+    }
   };
 
-  const cleanMatch = (): void => {
+  const reset = (): void => {
     setMatchTotal(0);
     setCurrentIndex(-1);
+    setPrevIndex(-1);
     setMatchesMap([]);
   };
 
+  const renderMatches = async (
+    pageNum: number,
+    matchIndex: number,
+    queryStr: string
+  ): Promise<any> => {
+    const textContentItem = await extractTextItems(pageNum);
+    const { begin, end } = convertMatches(
+      queryStr,
+      matchIndex,
+      textContentItem
+    );
+
+    beginText(pageNum, begin);
+
+    if (begin.divIdx === end.divIdx) {
+      appendTextToDiv(pageNum, begin.divIdx, begin.offset, end.offset, true);
+    } else {
+      for (let i = begin.divIdx; i <= end.divIdx; i += 1) {
+        switch (i) {
+          case begin.divIdx:
+            appendTextToDiv(
+              pageNum,
+              begin.divIdx,
+              begin.offset,
+              undefined,
+              true
+            );
+            break;
+          case end.divIdx:
+            beginText(pageNum, { divIdx: end.divIdx, offset: 0 });
+            appendTextToDiv(pageNum, end.divIdx, 0, end.offset, true);
+            break;
+          default: {
+            beginText(pageNum, { divIdx: i, offset: 0 });
+            appendTextToDiv(pageNum, i, 0, undefined, true);
+            break;
+          }
+        }
+      }
+    }
+  };
+
   const handleSearch = (val: string): void => {
-    cleanMatch();
+    if (!val) return;
 
-    if (val) {
-      queryString = val.toLowerCase();
-      searchPdfPages();
+    const queryStr = normalize(val.toLowerCase());
+    if (queryStr !== queryString) {
+      reset();
+      setQueryString(queryStr);
+      startSearchPdf(queryStr);
     }
   };
 
   const clickPrev = (): void => {
     setCurrentIndex(cur => {
+      setPrevIndex(cur);
       if (cur > 0) {
         return cur - 1;
       }
+      setPrevIndex(cur - 1);
       return cur;
     });
   };
 
   const clickNext = (): void => {
     setCurrentIndex(cur => {
+      setPrevIndex(cur);
       if (cur + 1 < matchTotal) {
         return cur + 1;
       }
@@ -96,14 +214,19 @@ const Search: React.FC = () => {
 
   const handleClose = (): void => {
     setNavbar('');
-    cleanMatch();
+    reset();
+
+    const currentMatches = matchesMap[currentIndex];
+    if (currentMatches) {
+      cleanMatches(currentMatches.page, currentMatches.index, queryString);
+    }
   };
 
   useEffect(() => {
-    if (matchTotal === 1) {
+    if (matchTotal >= 1 && currentIndex === -1) {
       clickNext();
     }
-  }, [matchTotal]);
+  }, [matchTotal, currentIndex]);
 
   useEffect(() => {
     if (currentIndex >= 0) {
@@ -111,10 +234,28 @@ const Search: React.FC = () => {
       const pageDiv: HTMLDivElement = document.getElementById(
         `page_${indexObj.page}`
       ) as HTMLDivElement;
+
       scrollIntoView(pageDiv);
     }
   }, [currentIndex, matchesMap]);
 
+  useEffect(() => {
+    if (currentIndex >= 0) {
+      const currentMatches = matchesMap[currentIndex];
+
+      if (textDivs[currentMatches.page]) {
+        renderMatches(currentMatches.page, currentMatches.index, queryString);
+      }
+
+      if (currentIndex !== prevIndex && prevIndex >= 0) {
+        const prevMatches = matchesMap[prevIndex];
+        if (prevMatches) {
+          cleanMatches(prevMatches.page, prevMatches.index, queryString);
+        }
+      }
+    }
+  }, [currentIndex, prevIndex, matchesMap, queryString, textDivs]);
+
   return (
     <SearchComponent
       matchesTotal={matchTotal}

+ 12 - 3
helpers/pdf.ts

@@ -50,18 +50,24 @@ export const renderTextLayer = async ({
   pdfPage,
   textLayer,
   viewport,
+  setTextDivs,
 }: {
   pdfPage: any;
   textLayer: HTMLElement;
   viewport: ViewportType;
+  setTextDivs?: (elements: HTMLElement[]) => void;
 }): Promise<any> => {
   const textContent = await pdfPage.getTextContent();
-  pdfjs.renderTextLayer({
+  const textDivs: any[] = [];
+  await pdfjs.renderTextLayer({
     textContent,
     container: textLayer,
     viewport,
-    textDivs: [],
+    textDivs,
   });
+  if (setTextDivs) {
+    setTextDivs(textDivs);
+  }
 };
 
 export const renderPdfPage = async ({
@@ -69,11 +75,13 @@ export const renderPdfPage = async ({
   pdfPage,
   viewport,
   setRenderTask,
+  setTextDivs,
 }: {
   rootEle: HTMLElement;
   pdfPage: any;
   viewport: ViewportType;
   setRenderTask: any;
+  setTextDivs: (elements: HTMLElement[]) => void;
 }): Promise<any> => {
   if (rootEle) {
     const canvas: HTMLCanvasElement = rootEle.querySelectorAll(
@@ -111,6 +119,7 @@ export const renderPdfPage = async ({
         pdfPage,
         textLayer,
         viewport,
+        setTextDivs,
       });
     }
   }
@@ -125,7 +134,7 @@ export const normalize = (text: string): string => {
   return text.replace(normalizationRegex, ch => CHARACTERS_TO_NORMALIZE[ch]);
 };
 
-export const calcFindPhraseMatch = (
+export const calculatePhraseMatch = (
   pageContent: string,
   query: string
 ): number[] => {

+ 1 - 0
reducers/index.ts

@@ -31,4 +31,5 @@ export default createReducer({
   [types.ADD_ANNOTS]: pdfActions.addAnnotation,
   [types.UPDATE_ANNOTS]: pdfActions.updateAnnotation,
   [types.UPDATE_WATERMARK]: pdfActions.updateWatermark,
+  [types.SET_TEXT_DIV]: pdfActions.setTextDivs,
 });

+ 5 - 0
reducers/pdf.ts

@@ -52,3 +52,8 @@ export const updateWatermark: ReducerFuncType = (state, { payload }) => ({
     ...payload,
   },
 });
+
+export const setTextDivs: ReducerFuncType = (state, { payload }) => ({
+  ...state,
+  textDivs: payload,
+});

+ 2 - 0
store/initialPdfState.ts

@@ -8,6 +8,7 @@ export type StateType = {
   rotation: number;
   annotations: AnnotationType[];
   watermark: WatermarkType;
+  textDivs: any;
 };
 
 export default {
@@ -36,4 +37,5 @@ export default {
     imagepath: '',
     isfront: 'yes',
   },
+  textDivs: {},
 };