table_mv3.yml 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. Global:
  2. use_gpu: true
  3. epoch_num: 400
  4. log_smooth_window: 20
  5. print_batch_step: 5
  6. save_model_dir: ./output/table_mv3/
  7. save_epoch_step: 400
  8. # evaluation is run every 400 iterations after the 0th iteration
  9. eval_batch_step: [0, 400]
  10. cal_metric_during_train: True
  11. pretrained_model:
  12. checkpoints:
  13. save_inference_dir:
  14. use_visualdl: False
  15. infer_img: ppstructure/docs/table/table.jpg
  16. save_res_path: output/table_mv3
  17. # for data or label process
  18. character_dict_path: ppocr/utils/dict/table_structure_dict.txt
  19. character_type: en
  20. max_text_length: &max_text_length 500
  21. box_format: &box_format 'xyxy' # 'xywh', 'xyxy', 'xyxyxyxy'
  22. infer_mode: False
  23. amp_custom_black_list: ['matmul_v2','elementwise_add']
  24. Optimizer:
  25. name: Adam
  26. beta1: 0.9
  27. beta2: 0.999
  28. clip_norm: 5.0
  29. lr:
  30. learning_rate: 0.001
  31. regularizer:
  32. name: 'L2'
  33. factor: 0.00000
  34. Architecture:
  35. model_type: table
  36. algorithm: TableAttn
  37. Backbone:
  38. name: MobileNetV3
  39. scale: 1.0
  40. model_name: small
  41. disable_se: true
  42. Head:
  43. name: TableAttentionHead
  44. hidden_size: 256
  45. max_text_length: *max_text_length
  46. loc_reg_num: &loc_reg_num 4
  47. Loss:
  48. name: TableAttentionLoss
  49. structure_weight: 100.0
  50. loc_weight: 10000.0
  51. PostProcess:
  52. name: TableLabelDecode
  53. Metric:
  54. name: TableMetric
  55. main_indicator: acc
  56. compute_bbox_metric: false # cost many time, set False for training
  57. Train:
  58. dataset:
  59. name: PubTabDataSet
  60. data_dir: train_data/table/pubtabnet/train/
  61. label_file_list: [train_data/table/pubtabnet/PubTabNet_2.0.0_train.jsonl]
  62. transforms:
  63. - DecodeImage: # load image
  64. img_mode: BGR
  65. channel_first: False
  66. - TableLabelEncode:
  67. learn_empty_box: False
  68. merge_no_span_structure: False
  69. replace_empty_cell_token: False
  70. loc_reg_num: *loc_reg_num
  71. max_text_length: *max_text_length
  72. - TableBoxEncode:
  73. - ResizeTableImage:
  74. max_len: 488
  75. - NormalizeImage:
  76. scale: 1./255.
  77. mean: [0.485, 0.456, 0.406]
  78. std: [0.229, 0.224, 0.225]
  79. order: 'hwc'
  80. - PaddingTableImage:
  81. size: [488, 488]
  82. - ToCHWImage:
  83. - KeepKeys:
  84. keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
  85. loader:
  86. shuffle: True
  87. batch_size_per_card: 48
  88. drop_last: True
  89. num_workers: 1
  90. Eval:
  91. dataset:
  92. name: PubTabDataSet
  93. data_dir: train_data/table/pubtabnet/val/
  94. label_file_list: [train_data/table/pubtabnet/PubTabNet_2.0.0_val.jsonl]
  95. transforms:
  96. - DecodeImage: # load image
  97. img_mode: BGR
  98. channel_first: False
  99. - TableLabelEncode:
  100. learn_empty_box: False
  101. merge_no_span_structure: False
  102. replace_empty_cell_token: False
  103. loc_reg_num: *loc_reg_num
  104. max_text_length: *max_text_length
  105. - TableBoxEncode:
  106. - ResizeTableImage:
  107. max_len: 488
  108. - NormalizeImage:
  109. scale: 1./255.
  110. mean: [0.485, 0.456, 0.406]
  111. std: [0.229, 0.224, 0.225]
  112. order: 'hwc'
  113. - PaddingTableImage:
  114. size: [488, 488]
  115. - ToCHWImage:
  116. - KeepKeys:
  117. keep_keys: [ 'image', 'structure', 'bboxes', 'bbox_masks', 'shape' ]
  118. loader:
  119. shuffle: False
  120. drop_last: False
  121. batch_size_per_card: 48
  122. num_workers: 1