faster_rcnn_vit_base_fpn_cae_1x_coco.yml 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130
  1. _BASE_: [
  2. '../datasets/coco_detection.yml',
  3. '../runtime.yml',
  4. './_base_/faster_rcnn_reader.yml',
  5. './_base_/optimizer_base_1x.yml'
  6. ]
  7. weights: output/faster_rcnn_vit_base_fpn_cae_1x_coco/model_final
  8. # runtime
  9. log_iter: 100
  10. snapshot_epoch: 1
  11. find_unused_parameters: True
  12. use_gpu: true
  13. norm_type: sync_bn
  14. OptimizerBuilder:
  15. optimizer:
  16. weight_decay: 0.05
  17. # reader
  18. worker_num: 2
  19. TrainReader:
  20. batch_size: 1
  21. # model
  22. architecture: FasterRCNN
  23. FasterRCNN:
  24. backbone: VisionTransformer
  25. neck: FPN
  26. rpn_head: RPNHead
  27. bbox_head: BBoxHead
  28. bbox_post_process: BBoxPostProcess
  29. VisionTransformer:
  30. patch_size: 16
  31. embed_dim: 768
  32. depth: 12
  33. num_heads: 12
  34. mlp_ratio: 4
  35. qkv_bias: True
  36. drop_rate: 0.0
  37. drop_path_rate: 0.2
  38. init_values: 0.1
  39. final_norm: False
  40. use_rel_pos_bias: False
  41. use_sincos_pos_emb: True
  42. epsilon: 0.000001 # 1e-6
  43. out_indices: [3, 5, 7, 11]
  44. with_fpn: True
  45. pretrained: https://bj.bcebos.com/v1/paddledet/models/pretrained/vit_base_cae_pretrained.pdparams
  46. FPN:
  47. out_channel: 256
  48. RPNHead:
  49. anchor_generator:
  50. aspect_ratios: [0.5, 1.0, 2.0]
  51. anchor_sizes: [[32], [64], [128], [256], [512]]
  52. strides: [4, 8, 16, 32, 64]
  53. rpn_target_assign:
  54. batch_size_per_im: 256
  55. fg_fraction: 0.5
  56. negative_overlap: 0.3
  57. positive_overlap: 0.7
  58. use_random: True
  59. train_proposal:
  60. min_size: 0.0
  61. nms_thresh: 0.7
  62. pre_nms_top_n: 2000
  63. post_nms_top_n: 1000
  64. topk_after_collect: True
  65. test_proposal:
  66. min_size: 0.0
  67. nms_thresh: 0.7
  68. pre_nms_top_n: 1000
  69. post_nms_top_n: 1000
  70. loss_rpn_bbox: SmoothL1Loss
  71. SmoothL1Loss:
  72. beta: 0.1111111111111111
  73. BBoxHead:
  74. # head: TwoFCHead
  75. head: XConvNormHead
  76. roi_extractor:
  77. resolution: 7
  78. sampling_ratio: 0
  79. aligned: True
  80. bbox_assigner: BBoxAssigner
  81. loss_normalize_pos: True
  82. bbox_loss: GIoULoss
  83. GIoULoss:
  84. loss_weight: 10.
  85. reduction: 'none'
  86. eps: 0.000001 # 1e-6
  87. BBoxAssigner:
  88. batch_size_per_im: 512
  89. bg_thresh: 0.5
  90. fg_thresh: 0.5
  91. fg_fraction: 0.25
  92. use_random: True
  93. # TwoFCHead:
  94. # out_channel: 1024
  95. XConvNormHead:
  96. num_convs: 4
  97. norm_type: bn
  98. BBoxPostProcess:
  99. decode: RCNNBox
  100. nms:
  101. name: MultiClassNMS
  102. keep_top_k: 100
  103. score_threshold: 0.05
  104. nms_threshold: 0.5