det_mv3_pse.yml 3.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. Global:
  2. use_gpu: true
  3. epoch_num: 600
  4. log_smooth_window: 20
  5. print_batch_step: 10
  6. save_model_dir: ./output/det_mv3_pse/
  7. save_epoch_step: 600
  8. # evaluation is run every 63 iterations
  9. eval_batch_step: [ 0,63 ]
  10. cal_metric_during_train: False
  11. pretrained_model: ./pretrain_models/MobileNetV3_large_x0_5_pretrained
  12. checkpoints: #./output/det_r50_vd_pse_batch8_ColorJitter/best_accuracy
  13. save_inference_dir:
  14. use_visualdl: False
  15. infer_img: doc/imgs_en/img_10.jpg
  16. save_res_path: ./output/det_pse/predicts_pse.txt
  17. Architecture:
  18. model_type: det
  19. algorithm: PSE
  20. Transform: null
  21. Backbone:
  22. name: MobileNetV3
  23. scale: 0.5
  24. model_name: large
  25. Neck:
  26. name: FPN
  27. out_channels: 96
  28. Head:
  29. name: PSEHead
  30. hidden_dim: 96
  31. out_channels: 7
  32. Loss:
  33. name: PSELoss
  34. alpha: 0.7
  35. ohem_ratio: 3
  36. kernel_sample_mask: pred
  37. reduction: none
  38. Optimizer:
  39. name: Adam
  40. beta1: 0.9
  41. beta2: 0.999
  42. lr:
  43. name: Step
  44. learning_rate: 0.001
  45. step_size: 200
  46. gamma: 0.1
  47. regularizer:
  48. name: 'L2'
  49. factor: 0.0005
  50. PostProcess:
  51. name: PSEPostProcess
  52. thresh: 0
  53. box_thresh: 0.85
  54. min_area: 16
  55. box_type: quad # 'quad' or 'poly'
  56. scale: 1
  57. Metric:
  58. name: DetMetric
  59. main_indicator: hmean
  60. Train:
  61. dataset:
  62. name: SimpleDataSet
  63. data_dir: ./train_data/icdar2015/text_localization/
  64. label_file_list:
  65. - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
  66. ratio_list: [ 1.0 ]
  67. transforms:
  68. - DecodeImage: # load image
  69. img_mode: BGR
  70. channel_first: False
  71. - DetLabelEncode: # Class handling label
  72. - ColorJitter:
  73. brightness: 0.12549019607843137
  74. saturation: 0.5
  75. - IaaAugment:
  76. augmenter_args:
  77. - { 'type': Resize, 'args': { 'size': [ 0.5, 3 ] } }
  78. - { 'type': Fliplr, 'args': { 'p': 0.5 } }
  79. - { 'type': Affine, 'args': { 'rotate': [ -10, 10 ] } }
  80. - MakePseGt:
  81. kernel_num: 7
  82. min_shrink_ratio: 0.4
  83. size: 640
  84. - RandomCropImgMask:
  85. size: [ 640,640 ]
  86. main_key: gt_text
  87. crop_keys: [ 'image', 'gt_text', 'gt_kernels', 'mask' ]
  88. - NormalizeImage:
  89. scale: 1./255.
  90. mean: [ 0.485, 0.456, 0.406 ]
  91. std: [ 0.229, 0.224, 0.225 ]
  92. order: 'hwc'
  93. - ToCHWImage:
  94. - KeepKeys:
  95. keep_keys: [ 'image', 'gt_text', 'gt_kernels', 'mask' ] # the order of the dataloader list
  96. loader:
  97. shuffle: True
  98. drop_last: False
  99. batch_size_per_card: 16
  100. num_workers: 8
  101. Eval:
  102. dataset:
  103. name: SimpleDataSet
  104. data_dir: ./train_data/icdar2015/text_localization/
  105. label_file_list:
  106. - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
  107. ratio_list: [ 1.0 ]
  108. transforms:
  109. - DecodeImage: # load image
  110. img_mode: BGR
  111. channel_first: False
  112. - DetLabelEncode: # Class handling label
  113. - DetResizeForTest:
  114. limit_side_len: 736
  115. limit_type: min
  116. - NormalizeImage:
  117. scale: 1./255.
  118. mean: [ 0.485, 0.456, 0.406 ]
  119. std: [ 0.229, 0.224, 0.225 ]
  120. order: 'hwc'
  121. - ToCHWImage:
  122. - KeepKeys:
  123. keep_keys: [ 'image', 'shape', 'polys', 'ignore_tags' ]
  124. loader:
  125. shuffle: False
  126. drop_last: False
  127. batch_size_per_card: 1 # must be 1
  128. num_workers: 8