epoch: 12 LearningRate: base_lr: 0.0001 schedulers: - !PiecewiseDecay gamma: 0.1 milestones: [11] use_warmup: false OptimizerBuilder: clip_grad_by_norm: 0.1 regularizer: false optimizer: type: AdamW weight_decay: 0.0001