Others
/
DocumentAIKit


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
							Global:
  debug: false
  use_gpu: true
  epoch_num: 1000
  log_smooth_window: 20
  print_batch_step: 10
  save_model_dir: ./output/det_r50_icdar15/
  save_epoch_step: 200
  eval_batch_step:
  - 0
  - 2000
  cal_metric_during_train: false
  pretrained_model: ./pretrain_models/ResNet50_dcn_asf_synthtext_pretrained
  checkpoints: null
  save_inference_dir: null
  use_visualdl: false
  infer_img: doc/imgs_en/img_10.jpg
  save_res_path: ./checkpoints/det_db/predicts_db.txt
Architecture:
  model_type: det
  algorithm: DB++
  Transform: null
  Backbone:
    name: ResNet
    layers: 50
    dcn_stage: [False, True, True, True]
  Neck:
    name: DBFPN
    out_channels: 256
    use_asf: True
  Head:
    name: DBHead
    k: 50
Loss:
  name: DBLoss
  balance_loss: true
  main_loss_type: BCELoss
  alpha: 5
  beta: 10
  ohem_ratio: 3
Optimizer:
  name: Momentum
  momentum: 0.9
  lr:
    name: DecayLearningRate
    learning_rate: 0.007
    epochs: 1000
    factor: 0.9
    end_lr: 0
  weight_decay: 0.0001
PostProcess:
  name: DBPostProcess
  thresh: 0.3
  box_thresh: 0.6
  max_candidates: 1000
  unclip_ratio: 1.5
  det_box_type: 'quad' # 'quad' or 'poly'
Metric:
  name: DetMetric
  main_indicator: hmean
Train:
  dataset:
    name: SimpleDataSet
    data_dir: ./train_data/icdar2015/text_localization/                                                                        
    label_file_list:                                                                                            
    - ./train_data/icdar2015/text_localization/train_icdar2015_label.txt                                                     
    ratio_list:                                                                                                 
    - 1.0 
    transforms:
    - DecodeImage:
        img_mode: BGR
        channel_first: false
    - DetLabelEncode: null
    - IaaAugment:
        augmenter_args:
        - type: Fliplr
          args:
            p: 0.5
        - type: Affine
          args:
            rotate:
            - -10
            - 10
        - type: Resize
          args:
            size:
            - 0.5
            - 3
    - EastRandomCropData:
        size:
        - 640
        - 640
        max_tries: 10
        keep_ratio: true
    - MakeShrinkMap:
        shrink_ratio: 0.4
        min_text_size: 8
    - MakeBorderMap:
        shrink_ratio: 0.4
        thresh_min: 0.3
        thresh_max: 0.7
    - NormalizeImage:
        scale: 1./255.
        mean:
        - 0.48109378172549
        - 0.45752457890196
        - 0.40787054090196
        std:
        - 1.0
        - 1.0
        - 1.0
        order: hwc
    - ToCHWImage: null
    - KeepKeys:
        keep_keys:
        - image
        - threshold_map
        - threshold_mask
        - shrink_map
        - shrink_mask
  loader:
    shuffle: true
    drop_last: false
    batch_size_per_card: 4
    num_workers: 8
Eval:
  dataset:
    name: SimpleDataSet
    data_dir: ./train_data/icdar2015/text_localization                                                                          
    label_file_list:                                                                                            
    - ./train_data/icdar2015/text_localization/test_icdar2015_label.txt
    transforms:
    - DecodeImage:
        img_mode: BGR
        channel_first: false
    - DetLabelEncode: null
    - DetResizeForTest:
        image_shape:
        - 1152
        - 2048
    - NormalizeImage:
        scale: 1./255.
        mean:
        - 0.48109378172549
        - 0.45752457890196
        - 0.40787054090196
        std:
        - 1.0
        - 1.0
        - 1.0
        order: hwc
    - ToCHWImage: null
    - KeepKeys:
        keep_keys:
        - image
        - shape
        - polys
        - ignore_tags
  loader:
    shuffle: false
    drop_last: false
    batch_size_per_card: 1
    num_workers: 2
profiler_options: null