epoch: 12 LearningRate: base_lr: 0.0001 schedulers: - !PiecewiseDecay gamma: 0.1 milestones: [9, 11] - !LinearWarmup start_factor: 0.001 steps: 1000 OptimizerBuilder: optimizer: type: AdamWDL betas: [0.9, 0.999] layer_decay: 0.75 weight_decay: 0.02 num_layers: 12 filter_bias_and_bn: True skip_decay_names: ['pos_embed', 'cls_token'] set_param_lr_func: 'layerwise_lr_decay'