_BASE_: [ '../datasets/coco_detection.yml', '../runtime.yml', './_base_/faster_rcnn_reader.yml', './_base_/optimizer_base_1x.yml' ] weights: output/faster_rcnn_vit_base_fpn_cae_1x_coco/model_final # runtime log_iter: 100 snapshot_epoch: 1 find_unused_parameters: True use_gpu: true norm_type: sync_bn OptimizerBuilder: optimizer: weight_decay: 0.05 # reader worker_num: 2 TrainReader: batch_size: 1 # model architecture: FasterRCNN FasterRCNN: backbone: VisionTransformer neck: FPN rpn_head: RPNHead bbox_head: BBoxHead bbox_post_process: BBoxPostProcess VisionTransformer: patch_size: 16 embed_dim: 768 depth: 12 num_heads: 12 mlp_ratio: 4 qkv_bias: True drop_rate: 0.0 drop_path_rate: 0.2 init_values: 0.1 final_norm: False use_rel_pos_bias: False use_sincos_pos_emb: True epsilon: 0.000001 # 1e-6 out_indices: [3, 5, 7, 11] with_fpn: True pretrained: https://bj.bcebos.com/v1/paddledet/models/pretrained/vit_base_cae_pretrained.pdparams FPN: out_channel: 256 RPNHead: anchor_generator: aspect_ratios: [0.5, 1.0, 2.0] anchor_sizes: [[32], [64], [128], [256], [512]] strides: [4, 8, 16, 32, 64] rpn_target_assign: batch_size_per_im: 256 fg_fraction: 0.5 negative_overlap: 0.3 positive_overlap: 0.7 use_random: True train_proposal: min_size: 0.0 nms_thresh: 0.7 pre_nms_top_n: 2000 post_nms_top_n: 1000 topk_after_collect: True test_proposal: min_size: 0.0 nms_thresh: 0.7 pre_nms_top_n: 1000 post_nms_top_n: 1000 loss_rpn_bbox: SmoothL1Loss SmoothL1Loss: beta: 0.1111111111111111 BBoxHead: # head: TwoFCHead head: XConvNormHead roi_extractor: resolution: 7 sampling_ratio: 0 aligned: True bbox_assigner: BBoxAssigner loss_normalize_pos: True bbox_loss: GIoULoss GIoULoss: loss_weight: 10. reduction: 'none' eps: 0.000001 # 1e-6 BBoxAssigner: batch_size_per_im: 512 bg_thresh: 0.5 fg_thresh: 0.5 fg_fraction: 0.25 use_random: True # TwoFCHead: # out_channel: 1024 XConvNormHead: num_convs: 4 norm_type: bn BBoxPostProcess: decode: RCNNBox nms: name: MultiClassNMS keep_top_k: 100 score_threshold: 0.05 nms_threshold: 0.5