search.ts 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174
  1. export const extractTextItems = async (
  2. getPage: () => Promise<any>
  3. ): Promise<string[]> => {
  4. const page = await getPage();
  5. let textContent = await page.getTextContent({
  6. normalizeWhitespace: true,
  7. });
  8. textContent = textContent.items;
  9. const strBuf = [];
  10. for (let j = 0, len = textContent.length; j < len; j += 1) {
  11. // add whitespace in front if start character is Uppercase
  12. if (
  13. textContent[j].str.match(/^[A-Z]/) &&
  14. j > 0 &&
  15. textContent[j - 1].str !== ' '
  16. ) {
  17. strBuf.push(` ${textContent[j].str}`);
  18. } else {
  19. strBuf.push(textContent[j].str);
  20. }
  21. }
  22. return strBuf;
  23. };
  24. export const convertMatches = (
  25. queryString: string,
  26. matchIndex: number,
  27. textContentItem: any[]
  28. ): Record<string, any> => {
  29. let i = 0;
  30. let iIndex = 0;
  31. const end = textContentItem.length - 1;
  32. const queryLen = queryString.length;
  33. // Loop over the divIdxs.
  34. while (i !== end && matchIndex >= iIndex + textContentItem[i].length) {
  35. iIndex += textContentItem[i].length;
  36. i += 1;
  37. }
  38. if (i === textContentItem.length) {
  39. console.error('Could not find a matching mapping');
  40. }
  41. const match: Record<string, any> = {
  42. begin: {
  43. divIdx: i,
  44. offset: matchIndex - iIndex,
  45. },
  46. };
  47. // Calculate the end position.
  48. // eslint-disable-next-line no-param-reassign
  49. matchIndex += queryLen;
  50. // Somewhat the same array as above, but use > instead of >= to get
  51. // the end position right.
  52. while (i !== end && matchIndex > iIndex + textContentItem[i].length) {
  53. iIndex += textContentItem[i].length;
  54. i += 1;
  55. }
  56. match.end = {
  57. divIdx: i,
  58. offset: matchIndex - iIndex,
  59. };
  60. return match;
  61. };
  62. const appendTextToDiv = async (
  63. domElements: HTMLElement[],
  64. getPage: () => Promise<any>,
  65. divIdx: number,
  66. fromOffset: number,
  67. toOffset: number | undefined,
  68. highlight: boolean,
  69. id: string
  70. ): Promise<any> => {
  71. const textContentItem = await extractTextItems(getPage);
  72. const domElement = domElements[divIdx];
  73. const content = textContentItem[divIdx].substring(fromOffset, toOffset);
  74. const node = document.createTextNode(content);
  75. const span = document.createElement('span');
  76. if (highlight) {
  77. span.setAttribute('class', id);
  78. span.style.backgroundColor = 'rgba(255, 211, 0, 0.7)';
  79. span.appendChild(node);
  80. domElement.appendChild(span);
  81. } else {
  82. // eslint-disable-next-line no-param-reassign
  83. domElement.textContent = '';
  84. domElement.appendChild(node);
  85. }
  86. };
  87. const beginText = async (
  88. domElements: HTMLElement[],
  89. getPage: () => Promise<any>,
  90. begin: Record<string, any>
  91. ): Promise<any> => {
  92. const { divIdx } = begin;
  93. const domElement = domElements[divIdx];
  94. if (domElement) {
  95. // eslint-disable-next-line no-param-reassign
  96. domElement.textContent = '';
  97. appendTextToDiv(domElements, getPage, divIdx, 0, begin.offset, false, '');
  98. }
  99. };
  100. export const renderMatches = async (
  101. domElements: HTMLElement[],
  102. getPage: () => Promise<any>,
  103. matchIndex: number,
  104. queryStr: string,
  105. id: string
  106. ): Promise<any> => {
  107. const textContentItem = await extractTextItems(getPage);
  108. const { begin, end } = convertMatches(queryStr, matchIndex, textContentItem);
  109. beginText(domElements, getPage, begin);
  110. if (begin.divIdx === end.divIdx) {
  111. appendTextToDiv(
  112. domElements,
  113. getPage,
  114. begin.divIdx,
  115. begin.offset,
  116. end.offset,
  117. true,
  118. id
  119. );
  120. } else {
  121. for (let i = begin.divIdx; i <= end.divIdx; i += 1) {
  122. switch (i) {
  123. case begin.divIdx:
  124. appendTextToDiv(
  125. domElements,
  126. getPage,
  127. begin.divIdx,
  128. begin.offset,
  129. undefined,
  130. true,
  131. id
  132. );
  133. break;
  134. case end.divIdx:
  135. beginText(domElements, getPage, { divIdx: end.divIdx, offset: 0 });
  136. appendTextToDiv(
  137. domElements,
  138. getPage,
  139. end.divIdx,
  140. 0,
  141. end.offset,
  142. true,
  143. id
  144. );
  145. break;
  146. default: {
  147. beginText(domElements, getPage, { divIdx: i, offset: 0 });
  148. appendTextToDiv(domElements, getPage, i, 0, undefined, true, id);
  149. break;
  150. }
  151. }
  152. }
  153. }
  154. };