det_r50_db++_icdar15.yml 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. Global:
  2. debug: false
  3. use_gpu: true
  4. epoch_num: 1000
  5. log_smooth_window: 20
  6. print_batch_step: 10
  7. save_model_dir: ./output/det_r50_icdar15/
  8. save_epoch_step: 200
  9. eval_batch_step:
  10. - 0
  11. - 2000
  12. cal_metric_during_train: false
  13. pretrained_model: ./pretrain_models/ResNet50_dcn_asf_synthtext_pretrained
  14. checkpoints: null
  15. save_inference_dir: null
  16. use_visualdl: false
  17. infer_img: doc/imgs_en/img_10.jpg
  18. save_res_path: ./checkpoints/det_db/predicts_db.txt
  19. Architecture:
  20. model_type: det
  21. algorithm: DB++
  22. Transform: null
  23. Backbone:
  24. name: ResNet
  25. layers: 50
  26. dcn_stage: [False, True, True, True]
  27. Neck:
  28. name: DBFPN
  29. out_channels: 256
  30. use_asf: True
  31. Head:
  32. name: DBHead
  33. k: 50
  34. Loss:
  35. name: DBLoss
  36. balance_loss: true
  37. main_loss_type: BCELoss
  38. alpha: 5
  39. beta: 10
  40. ohem_ratio: 3
  41. Optimizer:
  42. name: Momentum
  43. momentum: 0.9
  44. lr:
  45. name: DecayLearningRate
  46. learning_rate: 0.007
  47. epochs: 1000
  48. factor: 0.9
  49. end_lr: 0
  50. weight_decay: 0.0001
  51. PostProcess:
  52. name: DBPostProcess
  53. thresh: 0.3
  54. box_thresh: 0.6
  55. max_candidates: 1000
  56. unclip_ratio: 1.5
  57. det_box_type: 'quad' # 'quad' or 'poly'
  58. Metric:
  59. name: DetMetric
  60. main_indicator: hmean
  61. Train:
  62. dataset:
  63. name: SimpleDataSet
  64. data_dir: ./train_data/icdar2015/text_localization/
  65. label_file_list:
  66. - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt
  67. ratio_list:
  68. - 1.0
  69. transforms:
  70. - DecodeImage:
  71. img_mode: BGR
  72. channel_first: false
  73. - DetLabelEncode: null
  74. - IaaAugment:
  75. augmenter_args:
  76. - type: Fliplr
  77. args:
  78. p: 0.5
  79. - type: Affine
  80. args:
  81. rotate:
  82. - -10
  83. - 10
  84. - type: Resize
  85. args:
  86. size:
  87. - 0.5
  88. - 3
  89. - EastRandomCropData:
  90. size:
  91. - 640
  92. - 640
  93. max_tries: 10
  94. keep_ratio: true
  95. - MakeShrinkMap:
  96. shrink_ratio: 0.4
  97. min_text_size: 8
  98. - MakeBorderMap:
  99. shrink_ratio: 0.4
  100. thresh_min: 0.3
  101. thresh_max: 0.7
  102. - NormalizeImage:
  103. scale: 1./255.
  104. mean:
  105. - 0.48109378172549
  106. - 0.45752457890196
  107. - 0.40787054090196
  108. std:
  109. - 1.0
  110. - 1.0
  111. - 1.0
  112. order: hwc
  113. - ToCHWImage: null
  114. - KeepKeys:
  115. keep_keys:
  116. - image
  117. - threshold_map
  118. - threshold_mask
  119. - shrink_map
  120. - shrink_mask
  121. loader:
  122. shuffle: true
  123. drop_last: false
  124. batch_size_per_card: 4
  125. num_workers: 8
  126. Eval:
  127. dataset:
  128. name: SimpleDataSet
  129. data_dir: ./train_data/icdar2015/text_localization
  130. label_file_list:
  131. - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
  132. transforms:
  133. - DecodeImage:
  134. img_mode: BGR
  135. channel_first: false
  136. - DetLabelEncode: null
  137. - DetResizeForTest:
  138. image_shape:
  139. - 1152
  140. - 2048
  141. - NormalizeImage:
  142. scale: 1./255.
  143. mean:
  144. - 0.48109378172549
  145. - 0.45752457890196
  146. - 0.40787054090196
  147. std:
  148. - 1.0
  149. - 1.0
  150. - 1.0
  151. order: hwc
  152. - ToCHWImage: null
  153. - KeepKeys:
  154. keep_keys:
  155. - image
  156. - shape
  157. - polys
  158. - ignore_tags
  159. loader:
  160. shuffle: false
  161. drop_last: false
  162. batch_size_per_card: 1
  163. num_workers: 2
  164. profiler_options: null