annotation.ts 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225
  1. import { v4 as uuidv4 } from 'uuid';
  2. import { ANNOTATION_TYPE } from '../constants';
  3. import { getPdfPage, renderTextLayer } from './pdf';
  4. import { getPosition, parsePositionForBackend } from './position';
  5. import { normalizeRound, floatToHex } from './utility';
  6. import { xmlParser, getElementsByTagName } from './dom';
  7. type GetFontAttributeFunc = (
  8. type: string,
  9. element: Record<string, any>
  10. ) => Record<string, any>;
  11. const getContent = (type: string, element: Record<string, any>): string => {
  12. if (type !== 'Text' && type !== 'FreeText') return '';
  13. let content = '';
  14. let nodes: any = element.childNodes;
  15. nodes = Array.prototype.slice.call(nodes);
  16. nodes.forEach((ele: HTMLElement) => {
  17. if (ele.tagName === 'contents') {
  18. content = ele.innerHTML || ele.textContent || '';
  19. }
  20. });
  21. return content;
  22. };
  23. const getFontAttribute: GetFontAttributeFunc = (type, element) => {
  24. if (type !== 'FreeText') return {};
  25. const appearanceString =
  26. element.childNodes[1].innerHTML || element.childNodes[1].textContent;
  27. const arr = appearanceString.split(' ');
  28. return {
  29. fontsize: parseInt(arr[5], 10),
  30. fontname: arr[4].substr(1),
  31. textcolor: floatToHex(
  32. parseFloat(arr[0]),
  33. parseFloat(arr[1]),
  34. parseFloat(arr[2])
  35. ),
  36. };
  37. };
  38. export const parseAnnotationFromXml = (xmlString: string): AnnotationType[] => {
  39. if (!xmlString) return [];
  40. const xmlDoc = xmlParser(xmlString);
  41. const elements = xmlDoc.firstElementChild || xmlDoc.firstChild;
  42. const element = getElementsByTagName(elements as ChildNode, 'annots') || [];
  43. const annotations: Element[] = Array.prototype.slice.call(element);
  44. const filterAnnots = annotations.reduce((acc: any[], cur: any) => {
  45. const type = ANNOTATION_TYPE[cur.tagName];
  46. if (type) {
  47. const page = parseInt(cur.attributes.page.value, 10);
  48. acc.push({
  49. id: uuidv4(),
  50. obj_type: type,
  51. obj_attr: {
  52. page,
  53. position: getPosition(type, cur),
  54. bdcolor: cur.attributes.color
  55. ? cur.attributes.color.value
  56. : undefined,
  57. bdwidth: cur.attributes.width
  58. ? parseInt(cur.attributes.width.value, 10)
  59. : 0,
  60. transparency: cur.attributes.opacity
  61. ? cur.attributes.opacity.value
  62. : 1,
  63. content: getContent(type, cur) || undefined,
  64. fcolor: cur.attributes['interior-color']
  65. ? cur.attributes['interior-color'].value
  66. : undefined,
  67. ftransparency: cur.attributes['interior-opacity']
  68. ? cur.attributes['interior-opacity'].value
  69. : undefined,
  70. is_arrow: cur.attributes.tail,
  71. ...getFontAttribute(type, cur),
  72. },
  73. });
  74. }
  75. return acc;
  76. }, []);
  77. return filterAnnots;
  78. };
  79. // eslint-disable-next-line consistent-return
  80. const getEleText = (
  81. coord: any,
  82. elements: any,
  83. viewport: any,
  84. scale: any
  85. ): string => {
  86. const top = normalizeRound(viewport.height - coord.top * scale);
  87. const left = normalizeRound(coord.left * scale);
  88. const bottom = normalizeRound(viewport.height - coord.bottom * scale);
  89. const right = normalizeRound(coord.right * scale);
  90. for (let i = 0, len = elements.length; i <= len; i += 1) {
  91. const element = elements[i];
  92. if (element) {
  93. const eleTop = normalizeRound(element.offsetTop);
  94. const eleLeft = normalizeRound(element.offsetLeft);
  95. const eleRight = normalizeRound(element.offsetLeft + element.offsetWidth);
  96. if (eleTop >= top && eleTop <= bottom) {
  97. const textLength = element.innerText.length;
  98. const width = element.offsetWidth;
  99. if (eleLeft < left && eleRight > right) {
  100. const distanceL = left - eleLeft;
  101. const rateL = distanceL / width;
  102. const start = Math.floor(textLength * rateL);
  103. const distanceR = eleRight - right;
  104. const rateR = distanceR / width;
  105. const end = Math.floor(textLength - textLength * rateR);
  106. return ` ${element.innerText.slice(start, end)}`;
  107. }
  108. if (eleLeft < left && eleRight > left) {
  109. const distance = left - eleLeft;
  110. const rate = distance / width;
  111. const start = Math.floor(textLength * rate);
  112. return ` ${element.innerText.slice(start)}`;
  113. }
  114. if (eleRight > right && eleLeft < right) {
  115. const distance = eleRight - right;
  116. const rate = distance / width;
  117. const end = Math.floor(textLength - textLength * rate);
  118. return ` ${element.innerText.slice(0, end)}`;
  119. }
  120. if (eleLeft >= left && eleRight <= right) {
  121. return ` ${element.innerText}`;
  122. }
  123. }
  124. }
  125. }
  126. return '';
  127. };
  128. export const getAnnotationText = async ({
  129. viewport,
  130. scale,
  131. page,
  132. coords,
  133. pdf,
  134. }: {
  135. viewport: ViewportType;
  136. scale: number;
  137. page: number;
  138. coords: PositionType[];
  139. pdf: any;
  140. }): Promise<any> => {
  141. const pageContainer = document.getElementById(`page_${page}`) as HTMLElement;
  142. const textLayer = pageContainer.querySelector(
  143. '[data-id="text-layer"]'
  144. ) as HTMLElement;
  145. const pdfPage = await getPdfPage(pdf, page);
  146. if (!textLayer.childNodes.length) {
  147. await renderTextLayer({
  148. textLayer,
  149. pdfPage,
  150. viewport,
  151. });
  152. }
  153. // eslint-disable-next-line @typescript-eslint/ban-ts-ignore
  154. // @ts-ignore
  155. const textElements = [...textLayer.childNodes];
  156. let text = '';
  157. for (let i = 0, len = coords.length; i < len; i += 1) {
  158. const coord = coords[i];
  159. text += getEleText(coord, textElements, viewport, scale);
  160. }
  161. return text;
  162. };
  163. export const parseAnnotationObject = (
  164. {
  165. id,
  166. obj_type,
  167. obj_attr: {
  168. page,
  169. bdcolor,
  170. transparency,
  171. fcolor,
  172. ftransparency,
  173. position = '',
  174. content,
  175. style,
  176. bdwidth,
  177. fontname,
  178. fontsize,
  179. textcolor,
  180. is_arrow,
  181. },
  182. }: AnnotationType,
  183. pageHeight: number,
  184. scale: number
  185. ): AnnotationType => ({
  186. id,
  187. obj_type,
  188. obj_attr: {
  189. page: page - 1,
  190. bdcolor,
  191. position: parsePositionForBackend(obj_type, position, pageHeight, scale),
  192. transparency: transparency ? transparency * 0.01 : 0,
  193. content: content || undefined,
  194. style,
  195. fcolor,
  196. ftransparency: ftransparency ? ftransparency * 0.01 : 0,
  197. bdwidth,
  198. fontname,
  199. fontsize,
  200. textcolor,
  201. is_arrow,
  202. },
  203. });