cascade_rcnn_vit_base_hrfpn_cae_1x_coco.yml 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. _BASE_: [
  2. '../datasets/coco_detection.yml',
  3. '../runtime.yml',
  4. './_base_/faster_rcnn_reader.yml',
  5. './_base_/optimizer_base_1x.yml'
  6. ]
  7. weights: output/cascade_rcnn_vit_base_hrfpn_cae_1x_coco/model_final
  8. # runtime
  9. log_iter: 100
  10. snapshot_epoch: 1
  11. find_unused_parameters: True
  12. use_gpu: true
  13. norm_type: sync_bn
  14. # reader
  15. worker_num: 2
  16. TrainReader:
  17. batch_size: 1
  18. # model
  19. architecture: CascadeRCNN
  20. CascadeRCNN:
  21. backbone: VisionTransformer
  22. neck: HRFPN
  23. rpn_head: RPNHead
  24. bbox_head: CascadeHead
  25. # post process
  26. bbox_post_process: BBoxPostProcess
  27. VisionTransformer:
  28. patch_size: 16
  29. embed_dim: 768
  30. depth: 12
  31. num_heads: 12
  32. mlp_ratio: 4
  33. qkv_bias: True
  34. drop_rate: 0.0
  35. drop_path_rate: 0.2
  36. init_values: 0.1
  37. final_norm: False
  38. use_rel_pos_bias: False
  39. use_sincos_pos_emb: True
  40. epsilon: 0.000001 # 1e-6
  41. out_indices: [3, 5, 7, 11]
  42. with_fpn: True
  43. pretrained: https://bj.bcebos.com/v1/paddledet/models/pretrained/vit_base_cae_pretrained.pdparams
  44. HRFPN:
  45. out_channel: 256
  46. use_bias: True
  47. RPNHead:
  48. anchor_generator:
  49. aspect_ratios: [0.5, 1.0, 2.0]
  50. anchor_sizes: [[32], [64], [128], [256], [512]]
  51. strides: [4, 8, 16, 32, 64]
  52. rpn_target_assign:
  53. batch_size_per_im: 256
  54. fg_fraction: 0.5
  55. negative_overlap: 0.3
  56. positive_overlap: 0.7
  57. use_random: True
  58. train_proposal:
  59. min_size: 0.0
  60. nms_thresh: 0.7
  61. pre_nms_top_n: 2000
  62. post_nms_top_n: 2000
  63. topk_after_collect: True
  64. test_proposal:
  65. min_size: 0.0
  66. nms_thresh: 0.7
  67. pre_nms_top_n: 1000
  68. post_nms_top_n: 1000
  69. loss_rpn_bbox: SmoothL1Loss
  70. SmoothL1Loss:
  71. beta: 0.1111111111111111
  72. CascadeHead:
  73. head: CascadeXConvNormHead
  74. roi_extractor:
  75. resolution: 7
  76. sampling_ratio: 0
  77. aligned: True
  78. bbox_assigner: BBoxAssigner
  79. bbox_loss: GIoULoss
  80. num_cascade_stages: 3
  81. reg_class_agnostic: False
  82. stage_loss_weights: [1, 0.5, 0.25]
  83. loss_normalize_pos: True
  84. add_gt_as_proposals: [True, True, True]
  85. BBoxAssigner:
  86. batch_size_per_im: 512
  87. bg_thresh: 0.5
  88. fg_thresh: 0.5
  89. fg_fraction: 0.25
  90. cascade_iou: [0.5, 0.6, 0.7]
  91. use_random: True
  92. CascadeXConvNormHead:
  93. norm_type: bn
  94. GIoULoss:
  95. loss_weight: 10.
  96. reduction: 'none'
  97. eps: 0.000001
  98. BBoxPostProcess:
  99. decode:
  100. name: RCNNBox
  101. prior_box_var: [30.0, 30.0, 15.0, 15.0]
  102. nms:
  103. name: MultiClassNMS
  104. keep_top_k: 100
  105. score_threshold: 0.05
  106. nms_threshold: 0.5