annotation.ts 6.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. import { v4 as uuidv4 } from 'uuid';
  2. import { ANNOTATION_TYPE } from '../constants';
  3. import { AnnotationType, PositionType, ViewportType } from '../constants/type';
  4. import { getPdfPage, renderTextLayer } from './pdf';
  5. import { getPosition, parsePositionForBackend } from './position';
  6. import { normalizeRound, floatToHex } from './utility';
  7. import { xmlParser, getElementsByTagName } from './dom';
  8. type GetFontAttributeFunc = (
  9. type: string,
  10. element: Record<string, any>
  11. ) => Record<string, any>;
  12. const getContent = (type: string, element: Record<string, any>): string => {
  13. if (type !== 'Text' && type !== 'FreeText') return '';
  14. let content = '';
  15. let nodes: any = element.childNodes;
  16. nodes = Array.prototype.slice.call(nodes);
  17. nodes.forEach((ele: HTMLElement) => {
  18. if (ele.tagName === 'contents') {
  19. content = ele.innerHTML || ele.textContent || '';
  20. }
  21. });
  22. return content;
  23. };
  24. const getFontAttribute: GetFontAttributeFunc = (type, element) => {
  25. if (type !== 'FreeText') return {};
  26. const appearanceString =
  27. element.childNodes[1].innerHTML || element.childNodes[1].textContent;
  28. const arr = appearanceString.split(' ');
  29. return {
  30. fontsize: parseInt(arr[5], 10),
  31. fontname: arr[4].substr(1),
  32. textcolor: floatToHex(
  33. parseFloat(arr[0]),
  34. parseFloat(arr[1]),
  35. parseFloat(arr[2])
  36. ),
  37. };
  38. };
  39. export const parseAnnotationFromXml = (xmlString: string): AnnotationType[] => {
  40. if (!xmlString) return [];
  41. const xmlDoc = xmlParser(xmlString);
  42. const elements = xmlDoc.firstElementChild || xmlDoc.firstChild;
  43. const element = getElementsByTagName(elements as ChildNode, 'annots') || [];
  44. const annotations: Element[] = Array.prototype.slice.call(element);
  45. const filterAnnots = annotations.reduce((acc: any[], cur: any) => {
  46. const type = ANNOTATION_TYPE[cur.tagName];
  47. if (type) {
  48. const page = parseInt(cur.attributes.page.value, 10);
  49. acc.push({
  50. id: uuidv4(),
  51. obj_type: type,
  52. obj_attr: {
  53. page,
  54. position: getPosition(type, cur),
  55. bdcolor: cur.attributes.color
  56. ? cur.attributes.color.value
  57. : undefined,
  58. bdwidth: cur.attributes.width
  59. ? parseInt(cur.attributes.width.value, 10)
  60. : 0,
  61. transparency: cur.attributes.opacity
  62. ? parseFloat(cur.attributes.opacity.value)
  63. : 1,
  64. content: getContent(type, cur) || undefined,
  65. fcolor: cur.attributes['interior-color']
  66. ? cur.attributes['interior-color'].value
  67. : undefined,
  68. ftransparency: cur.attributes['interior-opacity']
  69. ? cur.attributes['interior-opacity'].value
  70. : undefined,
  71. is_arrow: cur.attributes.tail,
  72. ...getFontAttribute(type, cur),
  73. },
  74. });
  75. }
  76. return acc;
  77. }, []);
  78. return filterAnnots;
  79. };
  80. // eslint-disable-next-line consistent-return
  81. const getEleText = (
  82. coord: any,
  83. elements: any,
  84. viewport: any,
  85. scale: any
  86. ): string => {
  87. const top = normalizeRound(viewport.height - coord.top * scale);
  88. const left = normalizeRound(coord.left * scale);
  89. const bottom = normalizeRound(viewport.height - coord.bottom * scale);
  90. const right = normalizeRound(coord.right * scale);
  91. for (let i = 0, len = elements.length; i <= len; i += 1) {
  92. const element = elements[i];
  93. if (element) {
  94. const eleTop = normalizeRound(element.offsetTop);
  95. const eleLeft = normalizeRound(element.offsetLeft);
  96. const eleRight = normalizeRound(element.offsetLeft + element.offsetWidth);
  97. if (eleTop >= top && eleTop <= bottom) {
  98. const textLength = element.innerText.length;
  99. const width = element.offsetWidth;
  100. if (eleLeft < left && eleRight > right) {
  101. const distanceL = left - eleLeft;
  102. const rateL = distanceL / width;
  103. const start = Math.floor(textLength * rateL);
  104. const distanceR = eleRight - right;
  105. const rateR = distanceR / width;
  106. const end = Math.floor(textLength - textLength * rateR);
  107. return ` ${element.innerText.slice(start, end)}`;
  108. }
  109. if (eleLeft < left && eleRight > left) {
  110. const distance = left - eleLeft;
  111. const rate = distance / width;
  112. const start = Math.floor(textLength * rate);
  113. return ` ${element.innerText.slice(start)}`;
  114. }
  115. if (eleRight > right && eleLeft < right) {
  116. const distance = eleRight - right;
  117. const rate = distance / width;
  118. const end = Math.floor(textLength - textLength * rate);
  119. return ` ${element.innerText.slice(0, end)}`;
  120. }
  121. if (eleLeft >= left && eleRight <= right) {
  122. return ` ${element.innerText}`;
  123. }
  124. }
  125. }
  126. }
  127. return '';
  128. };
  129. export const getAnnotationText = async ({
  130. viewport,
  131. scale,
  132. page,
  133. coords,
  134. pdf,
  135. }: {
  136. viewport: ViewportType;
  137. scale: number;
  138. page: number;
  139. coords: PositionType[];
  140. pdf: any;
  141. }): Promise<any> => {
  142. const pageContainer = document.getElementById(`page_${page}`) as HTMLElement;
  143. const textLayer = pageContainer.querySelector(
  144. '[data-id="text-layer"]'
  145. ) as HTMLElement;
  146. const pdfPage = await getPdfPage(pdf, page);
  147. if (!textLayer.childNodes.length) {
  148. await renderTextLayer({
  149. textLayer,
  150. pdfPage,
  151. viewport,
  152. });
  153. }
  154. // eslint-disable-next-line @typescript-eslint/ban-ts-ignore
  155. // @ts-ignore
  156. const textElements = [...textLayer.childNodes];
  157. let text = '';
  158. for (let i = 0, len = coords.length; i < len; i += 1) {
  159. const coord = coords[i];
  160. text += getEleText(coord, textElements, viewport, scale);
  161. }
  162. return text;
  163. };
  164. export const parseAnnotationObject = (
  165. {
  166. id,
  167. obj_type,
  168. obj_attr: {
  169. page,
  170. bdcolor,
  171. transparency,
  172. fcolor,
  173. ftransparency,
  174. position = '',
  175. content,
  176. style,
  177. bdwidth,
  178. fontname,
  179. fontsize,
  180. textcolor,
  181. is_arrow,
  182. },
  183. }: AnnotationType,
  184. pageHeight: number,
  185. scale: number
  186. ): AnnotationType => ({
  187. id,
  188. obj_type,
  189. obj_attr: {
  190. page: page - 1,
  191. bdcolor,
  192. position: parsePositionForBackend(obj_type, position, pageHeight, scale),
  193. transparency: transparency ? transparency * 0.01 : 0,
  194. content: content || undefined,
  195. style,
  196. fcolor,
  197. ftransparency: ftransparency ? ftransparency * 0.01 : 0,
  198. bdwidth,
  199. fontname,
  200. fontsize,
  201. textcolor,
  202. is_arrow,
  203. },
  204. });