search.ts 4.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. export const extractTextItems = async (
  2. getPage: () => Promise<any>
  3. ): Promise<string[]> => {
  4. const page = await getPage();
  5. let textContent = await page.getTextContent({
  6. normalizeWhitespace: true,
  7. });
  8. textContent = textContent.items;
  9. const strBuf = [];
  10. for (let j = 0, len = textContent.length; j < len; j += 1) {
  11. // add whitespace in front if start character is Uppercase
  12. strBuf.push(textContent[j].str);
  13. // if (
  14. // textContent[j].str.match(/^[A-Z]/) &&
  15. // j > 0 &&
  16. // textContent[j - 1].str !== ' '
  17. // ) {
  18. // strBuf.push(` ${textContent[j].str}`);
  19. // } else {
  20. // strBuf.push(textContent[j].str);
  21. // }
  22. }
  23. return strBuf;
  24. };
  25. export const convertMatches = (
  26. queryString: string,
  27. matchIndex: number,
  28. textContentItem: any[]
  29. ): Record<string, any> => {
  30. let i = 0;
  31. let iIndex = 0;
  32. const end = textContentItem.length - 1;
  33. const queryLen = queryString.length;
  34. // Loop over the divIdxs.
  35. while (i !== end && matchIndex >= iIndex + textContentItem[i].length) {
  36. iIndex += textContentItem[i].length;
  37. i += 1;
  38. }
  39. if (i === textContentItem.length) {
  40. console.error('Could not find a matching mapping');
  41. }
  42. const match: Record<string, any> = {
  43. begin: {
  44. divIdx: i,
  45. offset: matchIndex - iIndex,
  46. },
  47. };
  48. // Calculate the end position.
  49. // eslint-disable-next-line no-param-reassign
  50. matchIndex += queryLen;
  51. // Somewhat the same array as above, but use > instead of >= to get
  52. // the end position right.
  53. while (i !== end && matchIndex > iIndex + textContentItem[i].length) {
  54. iIndex += textContentItem[i].length;
  55. i += 1;
  56. }
  57. match.end = {
  58. divIdx: i,
  59. offset: matchIndex - iIndex,
  60. };
  61. return match;
  62. };
  63. const appendTextToDiv = async (
  64. domElements: HTMLElement[],
  65. getPage: () => Promise<any>,
  66. divIdx: number,
  67. fromOffset: number,
  68. toOffset: number | undefined,
  69. highlight: boolean,
  70. id: string,
  71. queryStr = ''
  72. ): Promise<any> => {
  73. const textContentItem = await extractTextItems(getPage);
  74. const domElement = domElements[divIdx];
  75. const content = textContentItem[divIdx].substring(fromOffset, toOffset);
  76. const node = document.createTextNode(
  77. queryStr ? content.replace(queryStr, '') : content
  78. );
  79. const span = document.createElement('span');
  80. if (highlight) {
  81. span.setAttribute('class', id);
  82. span.style.backgroundColor = 'rgba(255, 211, 0, 0.7)';
  83. span.appendChild(node);
  84. domElement.appendChild(span);
  85. } else {
  86. domElement.appendChild(node);
  87. }
  88. };
  89. const beginText = async (
  90. domElements: HTMLElement[],
  91. getPage: () => Promise<any>,
  92. begin: Record<string, any>,
  93. queryStr: string
  94. ): Promise<any> => {
  95. const { divIdx } = begin;
  96. const domElement = domElements[divIdx];
  97. if (domElement) {
  98. // eslint-disable-next-line no-param-reassign
  99. domElement.textContent = '';
  100. appendTextToDiv(
  101. domElements,
  102. getPage,
  103. divIdx,
  104. 0,
  105. begin.offset,
  106. false,
  107. '',
  108. queryStr
  109. );
  110. }
  111. };
  112. export const renderMatches = async (
  113. domElements: HTMLElement[],
  114. getPage: () => Promise<any>,
  115. matchIndex: number,
  116. queryStr: string,
  117. id: string
  118. ): Promise<any> => {
  119. const textContentItem = await extractTextItems(getPage);
  120. const { begin, end } = convertMatches(queryStr, matchIndex, textContentItem);
  121. beginText(domElements, getPage, begin, queryStr);
  122. if (begin.divIdx === end.divIdx) {
  123. appendTextToDiv(
  124. domElements,
  125. getPage,
  126. begin.divIdx,
  127. begin.offset,
  128. end.offset,
  129. true,
  130. id
  131. );
  132. } else {
  133. for (let i = begin.divIdx; i <= end.divIdx; i += 1) {
  134. switch (i) {
  135. case begin.divIdx:
  136. appendTextToDiv(
  137. domElements,
  138. getPage,
  139. begin.divIdx,
  140. begin.offset,
  141. undefined,
  142. true,
  143. id
  144. );
  145. break;
  146. case end.divIdx:
  147. beginText(
  148. domElements,
  149. getPage,
  150. { divIdx: end.divIdx, offset: 0 },
  151. queryStr
  152. );
  153. appendTextToDiv(
  154. domElements,
  155. getPage,
  156. end.divIdx,
  157. 0,
  158. end.offset,
  159. true,
  160. id
  161. );
  162. break;
  163. default: {
  164. beginText(domElements, getPage, { divIdx: i, offset: 0 }, queryStr);
  165. appendTextToDiv(domElements, getPage, i, 0, undefined, true, id);
  166. break;
  167. }
  168. }
  169. }
  170. }
  171. };