search.ts 4.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. export const extractTextItems = async (
  2. // eslint-disable-next-line @typescript-eslint/no-explicit-any
  3. getPage: GetPageType,
  4. ): Promise<string[]> => {
  5. const page = await getPage();
  6. if (!page) return [];
  7. let textContent = await page.getTextContent({
  8. normalizeWhitespace: true,
  9. });
  10. textContent = textContent.items;
  11. const strBuf = [];
  12. for (let j = 0, len = textContent.length; j < len; j += 1) {
  13. // add whitespace in front if start character is Uppercase
  14. strBuf.push(textContent[j].str);
  15. // if (
  16. // textContent[j].str.match(/^[A-Z]/) &&
  17. // j > 0 &&
  18. // textContent[j - 1].str !== ' '
  19. // ) {
  20. // strBuf.push(` ${textContent[j].str}`);
  21. // } else {
  22. // strBuf.push(textContent[j].str);
  23. // }
  24. }
  25. return strBuf;
  26. };
  27. export const convertMatches = (
  28. queryString: string,
  29. matchIndex: number,
  30. textContentItem: string[],
  31. ): Record<string, { divIdx: number; offset: number }> => {
  32. let i = 0;
  33. let iIndex = 0;
  34. const end = textContentItem.length - 1;
  35. const queryLen = queryString.length;
  36. // Loop over the divIdxs.
  37. while (i !== end && matchIndex >= iIndex + textContentItem[i].length) {
  38. iIndex += textContentItem[i].length;
  39. i += 1;
  40. }
  41. if (i === textContentItem.length) {
  42. console.error('Could not find a matching mapping');
  43. }
  44. const match: Record<string, { divIdx: number; offset: number }> = {
  45. begin: {
  46. divIdx: i,
  47. offset: matchIndex - iIndex,
  48. },
  49. };
  50. // Calculate the end position.
  51. // eslint-disable-next-line no-param-reassign
  52. matchIndex += queryLen;
  53. // Somewhat the same array as above, but use > instead of >= to get
  54. // the end position right.
  55. while (i !== end && matchIndex > iIndex + textContentItem[i].length) {
  56. iIndex += textContentItem[i].length;
  57. i += 1;
  58. }
  59. match.end = {
  60. divIdx: i,
  61. offset: matchIndex - iIndex,
  62. };
  63. return match;
  64. };
  65. const appendTextToDiv = async (
  66. domElements: HTMLElement[],
  67. // eslint-disable-next-line @typescript-eslint/no-explicit-any
  68. getPage: GetPageType,
  69. divIdx: number,
  70. fromOffset: number,
  71. toOffset: number | undefined,
  72. highlight: boolean,
  73. id: string,
  74. queryStr = '',
  75. ): Promise<void> => {
  76. const textContentItem = await extractTextItems(getPage);
  77. const domElement = domElements[divIdx];
  78. const content = textContentItem[divIdx].substring(fromOffset, toOffset);
  79. const node = document.createTextNode(
  80. queryStr ? content.replace(queryStr, '') : content,
  81. );
  82. const span = document.createElement('span');
  83. if (highlight) {
  84. span.setAttribute('class', id);
  85. span.style.backgroundColor = 'rgba(255, 211, 0, 0.7)';
  86. span.appendChild(node);
  87. domElement.appendChild(span);
  88. } else {
  89. domElement.appendChild(node);
  90. }
  91. };
  92. const beginText = async (
  93. domElements: HTMLElement[],
  94. getPage: GetPageType,
  95. begin: { divIdx: number; offset: number },
  96. queryStr: string,
  97. ): Promise<void> => {
  98. const { divIdx } = begin;
  99. const domElement = domElements[divIdx];
  100. if (domElement) {
  101. // eslint-disable-next-line no-param-reassign
  102. domElement.textContent = '';
  103. appendTextToDiv(
  104. domElements,
  105. getPage,
  106. divIdx,
  107. 0,
  108. begin.offset,
  109. false,
  110. '',
  111. queryStr,
  112. );
  113. }
  114. };
  115. export const renderMatches = async (
  116. domElements: HTMLElement[],
  117. getPage: GetPageType,
  118. matchIndex: number,
  119. queryStr: string,
  120. id: string,
  121. ): Promise<void> => {
  122. const textContentItem = await extractTextItems(getPage);
  123. const { begin, end } = convertMatches(queryStr, matchIndex, textContentItem);
  124. beginText(domElements, getPage, begin, queryStr);
  125. if (begin.divIdx === end.divIdx) {
  126. appendTextToDiv(
  127. domElements,
  128. getPage,
  129. begin.divIdx,
  130. begin.offset,
  131. end.offset,
  132. true,
  133. id,
  134. );
  135. } else {
  136. for (let i = begin.divIdx; i <= end.divIdx; i += 1) {
  137. switch (i) {
  138. case begin.divIdx:
  139. appendTextToDiv(
  140. domElements,
  141. getPage,
  142. begin.divIdx,
  143. begin.offset,
  144. undefined,
  145. true,
  146. id,
  147. );
  148. break;
  149. case end.divIdx:
  150. beginText(
  151. domElements,
  152. getPage,
  153. { divIdx: end.divIdx, offset: 0 },
  154. queryStr,
  155. );
  156. appendTextToDiv(
  157. domElements,
  158. getPage,
  159. end.divIdx,
  160. 0,
  161. end.offset,
  162. true,
  163. id,
  164. );
  165. break;
  166. default: {
  167. beginText(domElements, getPage, { divIdx: i, offset: 0 }, queryStr);
  168. appendTextToDiv(domElements, getPage, i, 0, undefined, true, id);
  169. break;
  170. }
  171. }
  172. }
  173. }
  174. };