mask_rcnn_vit_base_hrfpn_cae_1x_coco.yml 2.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. _BASE_: [
  2. '../datasets/coco_instance.yml',
  3. '../runtime.yml',
  4. './_base_/mask_rcnn_reader.yml',
  5. './_base_/optimizer_base_1x.yml'
  6. ]
  7. weights: output/mask_rcnn_vit_base_hrfpn_cae_1x_coco/model_final
  8. # runtime
  9. log_iter: 100
  10. snapshot_epoch: 1
  11. norm_type: sync_bn
  12. use_fused_allreduce_gradients: &use_checkpoint False
  13. architecture: MaskRCNN
  14. MaskRCNN:
  15. backbone: VisionTransformer
  16. neck: HRFPN
  17. rpn_head: RPNHead
  18. bbox_head: BBoxHead
  19. mask_head: MaskHead
  20. # post process
  21. bbox_post_process: BBoxPostProcess
  22. mask_post_process: MaskPostProcess
  23. VisionTransformer:
  24. patch_size: 16
  25. embed_dim: 768
  26. depth: 12
  27. num_heads: 12
  28. mlp_ratio: 4
  29. qkv_bias: True
  30. drop_rate: 0.0
  31. drop_path_rate: 0.2
  32. init_values: 0.1
  33. final_norm: False
  34. use_rel_pos_bias: False
  35. use_sincos_pos_emb: True
  36. epsilon: 0.000001 # 1e-6
  37. out_indices: [3, 5, 7, 11]
  38. with_fpn: True
  39. use_checkpoint: *use_checkpoint
  40. pretrained: https://bj.bcebos.com/v1/paddledet/models/pretrained/vit_base_cae_pretrained.pdparams
  41. HRFPN:
  42. out_channel: 256
  43. use_bias: True
  44. RPNHead:
  45. anchor_generator:
  46. aspect_ratios: [0.5, 1.0, 2.0]
  47. anchor_sizes: [[32], [64], [128], [256], [512]]
  48. strides: [4, 8, 16, 32, 64]
  49. rpn_target_assign:
  50. batch_size_per_im: 256
  51. fg_fraction: 0.5
  52. negative_overlap: 0.3
  53. positive_overlap: 0.7
  54. use_random: True
  55. train_proposal:
  56. min_size: 0.0
  57. nms_thresh: 0.7
  58. pre_nms_top_n: 2000
  59. post_nms_top_n: 1000
  60. topk_after_collect: True
  61. test_proposal:
  62. min_size: 0.0
  63. nms_thresh: 0.7
  64. pre_nms_top_n: 1000
  65. post_nms_top_n: 1000
  66. loss_rpn_bbox: SmoothL1Loss
  67. SmoothL1Loss:
  68. beta: 0.1111111111111111
  69. BBoxHead:
  70. head: XConvNormHead
  71. roi_extractor:
  72. resolution: 7
  73. sampling_ratio: 0
  74. aligned: True
  75. bbox_assigner: BBoxAssigner
  76. loss_normalize_pos: True
  77. bbox_loss: GIoULoss
  78. BBoxAssigner:
  79. batch_size_per_im: 512
  80. bg_thresh: 0.5
  81. fg_thresh: 0.5
  82. fg_fraction: 0.25
  83. use_random: True
  84. XConvNormHead:
  85. num_convs: 4
  86. norm_type: bn
  87. GIoULoss:
  88. loss_weight: 10.
  89. reduction: 'none'
  90. eps: 0.000001
  91. BBoxPostProcess:
  92. decode: RCNNBox
  93. nms:
  94. name: MultiClassNMS
  95. keep_top_k: 100
  96. score_threshold: 0.05
  97. nms_threshold: 0.5
  98. MaskHead:
  99. head: MaskFeat
  100. roi_extractor:
  101. resolution: 14
  102. sampling_ratio: 0
  103. aligned: True
  104. mask_assigner: MaskAssigner
  105. share_bbox_feat: False
  106. MaskFeat:
  107. num_convs: 4
  108. out_channel: 256
  109. norm_type: ~
  110. MaskAssigner:
  111. mask_resolution: 28
  112. MaskPostProcess:
  113. binary_thresh: 0.5