123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778 |
- _BASE_: [
- '../datasets/coco_detection.yml',
- '../runtime.yml',
- './_base_/ppyoloe_reader.yml',
- './_base_/optimizer_base_36e.yml'
- ]
- weights: output/ppyoloe_vit_base_csppan_cae_36e_coco/model_final
- snapshot_epoch: 2
- log_iter: 100
- use_ema: true
- ema_decay: 0.9999
- ema_skip_names: ['yolo_head.proj_conv.weight', 'backbone.pos_embed']
- custom_black_list: ['reduce_mean']
- use_fused_allreduce_gradients: &use_checkpoint False
- architecture: YOLOv3
- norm_type: sync_bn
- YOLOv3:
- backbone: VisionTransformer
- neck: YOLOCSPPAN
- yolo_head: PPYOLOEHead
- post_process: ~
- VisionTransformer:
- patch_size: 16
- embed_dim: 768
- depth: 12
- num_heads: 12
- mlp_ratio: 4
- qkv_bias: True
- drop_rate: 0.0
- drop_path_rate: 0.2
- init_values: 0.1
- final_norm: False
- use_rel_pos_bias: False
- use_sincos_pos_emb: True
- epsilon: 0.000001 # 1e-6
- out_indices: [11, ]
- with_fpn: True
- num_fpn_levels: 3
- out_with_norm: False
- use_checkpoint: *use_checkpoint
- pretrained: https://bj.bcebos.com/v1/paddledet/models/pretrained/vit_base_cae_pretrained.pdparams
- YOLOCSPPAN:
- in_channels: [768, 768, 768]
- act: 'silu'
- PPYOLOEHead:
- fpn_strides: [8, 16, 32]
- in_channels: [768, 768, 768]
- static_assigner_epoch: -1
- grid_cell_scale: 5.0
- grid_cell_offset: 0.5
- use_varifocal_loss: True
- loss_weight: {class: 1.0, iou: 2.5, dfl: 0.5}
- static_assigner:
- name: ATSSAssigner
- topk: 9
- assigner:
- name: TaskAlignedAssigner
- topk: 13
- alpha: 1.0
- beta: 6.0
- nms:
- name: MultiClassNMS
- nms_top_k: 1000
- keep_top_k: 300
- score_threshold: 0.01
- nms_threshold: 0.7
|