Browse Source

提交PaddleDetection develop 分支 d56cf3f7c294a7138013dac21f87da4ea6bee829

yangjun 1 year ago
commit
dfa27afb39
100 changed files with 4922 additions and 0 deletions
  1. 88 0
      .gitignore
  2. 201 0
      LICENSE
  3. 26 0
      Pipfile
  4. 1 0
      README.md
  5. 825 0
      README_cn.md
  6. 541 0
      README_en.md
  7. 125 0
      activity/直播答疑第一期.md
  8. 47 0
      benchmark/README.md
  9. 48 0
      benchmark/configs/faster_rcnn_r50_fpn_1x_coco.yml
  10. 17 0
      benchmark/prepare.sh
  11. 47 0
      benchmark/run_all.sh
  12. 92 0
      benchmark/run_benchmark.sh
  13. 28 0
      configs/cascade_rcnn/README.md
  14. 40 0
      configs/cascade_rcnn/_base_/cascade_fpn_reader.yml
  15. 40 0
      configs/cascade_rcnn/_base_/cascade_mask_fpn_reader.yml
  16. 97 0
      configs/cascade_rcnn/_base_/cascade_mask_rcnn_r50_fpn.yml
  17. 75 0
      configs/cascade_rcnn/_base_/cascade_rcnn_r50_fpn.yml
  18. 19 0
      configs/cascade_rcnn/_base_/optimizer_1x.yml
  19. 8 0
      configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.yml
  20. 18 0
      configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco.yml
  21. 29 0
      configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco.yml
  22. 8 0
      configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.yml
  23. 18 0
      configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_1x_coco.yml
  24. 29 0
      configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.yml
  25. 37 0
      configs/centernet/README.md
  26. 36 0
      configs/centernet/README_cn.md
  27. 22 0
      configs/centernet/_base_/centernet_dla34.yml
  28. 34 0
      configs/centernet/_base_/centernet_r50.yml
  29. 35 0
      configs/centernet/_base_/centernet_reader.yml
  30. 14 0
      configs/centernet/_base_/optimizer_140e.yml
  31. 9 0
      configs/centernet/centernet_dla34_140e_coco.yml
  32. 21 0
      configs/centernet/centernet_mbv1_140e_coco.yml
  33. 22 0
      configs/centernet/centernet_mbv3_large_140e_coco.yml
  34. 28 0
      configs/centernet/centernet_mbv3_small_140e_coco.yml
  35. 9 0
      configs/centernet/centernet_r50_140e_coco.yml
  36. 33 0
      configs/centernet/centernet_shufflenetv2_140e_coco.yml
  37. 20 0
      configs/convnext/README.md
  38. 55 0
      configs/convnext/ppyoloe_convnext_tiny_36e_coco.yml
  39. 58 0
      configs/convnext/yolox_convnext_s_36e_coco.yml
  40. 21 0
      configs/datasets/coco_detection.yml
  41. 20 0
      configs/datasets/coco_instance.yml
  42. 21 0
      configs/datasets/dota.yml
  43. 21 0
      configs/datasets/dota_ms.yml
  44. 25 0
      configs/datasets/mcmot.yml
  45. 23 0
      configs/datasets/mot.yml
  46. 21 0
      configs/datasets/objects365_detection.yml
  47. 21 0
      configs/datasets/roadsign_voc.yml
  48. 47 0
      configs/datasets/sniper_coco_detection.yml
  49. 47 0
      configs/datasets/sniper_visdrone_detection.yml
  50. 21 0
      configs/datasets/spine_coco.yml
  51. 22 0
      configs/datasets/visdrone_detection.yml
  52. 21 0
      configs/datasets/voc.yml
  53. 20 0
      configs/datasets/wider_face.yml
  54. 37 0
      configs/dcn/README.md
  55. 16 0
      configs/dcn/cascade_rcnn_dcn_r50_fpn_1x_coco.yml
  56. 16 0
      configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml
  57. 15 0
      configs/dcn/faster_rcnn_dcn_r101_vd_fpn_1x_coco.yml
  58. 16 0
      configs/dcn/faster_rcnn_dcn_r50_fpn_1x_coco.yml
  59. 15 0
      configs/dcn/faster_rcnn_dcn_r50_vd_fpn_1x_coco.yml
  60. 26 0
      configs/dcn/faster_rcnn_dcn_r50_vd_fpn_2x_coco.yml
  61. 17 0
      configs/dcn/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml
  62. 15 0
      configs/dcn/mask_rcnn_dcn_r101_vd_fpn_1x_coco.yml
  63. 16 0
      configs/dcn/mask_rcnn_dcn_r50_fpn_1x_coco.yml
  64. 26 0
      configs/dcn/mask_rcnn_dcn_r50_vd_fpn_2x_coco.yml
  65. 17 0
      configs/dcn/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml
  66. 36 0
      configs/deformable_detr/README.md
  67. 48 0
      configs/deformable_detr/_base_/deformable_detr_r50.yml
  68. 48 0
      configs/deformable_detr/_base_/deformable_detr_reader.yml
  69. 16 0
      configs/deformable_detr/_base_/deformable_optimizer_1x.yml
  70. 9 0
      configs/deformable_detr/deformable_detr_r50_1x_coco.yml
  71. 39 0
      configs/detr/README.md
  72. 44 0
      configs/detr/_base_/detr_r50.yml
  73. 48 0
      configs/detr/_base_/detr_reader.yml
  74. 16 0
      configs/detr/_base_/optimizer_1x.yml
  75. 9 0
      configs/detr/detr_r50_1x_coco.yml
  76. 39 0
      configs/dino/README.md
  77. 49 0
      configs/dino/_base_/dino_r50.yml
  78. 48 0
      configs/dino/_base_/dino_reader.yml
  79. 16 0
      configs/dino/_base_/optimizer_1x.yml
  80. 16 0
      configs/dino/_base_/optimizer_2x.yml
  81. 11 0
      configs/dino/dino_r50_4scale_1x_coco.yml
  82. 11 0
      configs/dino/dino_r50_4scale_2x_coco.yml
  83. 176 0
      configs/face_detection/README.md
  84. 176 0
      configs/face_detection/README_en.md
  85. 45 0
      configs/face_detection/_base_/blazeface.yml
  86. 45 0
      configs/face_detection/_base_/blazeface_fpn.yml
  87. 44 0
      configs/face_detection/_base_/face_reader.yml
  88. 21 0
      configs/face_detection/_base_/optimizer_1000e.yml
  89. 9 0
      configs/face_detection/blazeface_1000e.yml
  90. 9 0
      configs/face_detection/blazeface_fpn_ssh_1000e.yml
  91. 38 0
      configs/faster_rcnn/README.md
  92. 40 0
      configs/faster_rcnn/_base_/faster_fpn_reader.yml
  93. 66 0
      configs/faster_rcnn/_base_/faster_rcnn_r50.yml
  94. 73 0
      configs/faster_rcnn/_base_/faster_rcnn_r50_fpn.yml
  95. 41 0
      configs/faster_rcnn/_base_/faster_rcnn_swin_reader.yml
  96. 72 0
      configs/faster_rcnn/_base_/faster_rcnn_swin_tiny_fpn.yml
  97. 40 0
      configs/faster_rcnn/_base_/faster_reader.yml
  98. 19 0
      configs/faster_rcnn/_base_/optimizer_1x.yml
  99. 22 0
      configs/faster_rcnn/_base_/optimizer_swin_1x.yml
  100. 0 0
      configs/faster_rcnn/faster_rcnn_r101_1x_coco.yml

+ 88 - 0
.gitignore

@@ -0,0 +1,88 @@
+# Virtualenv
+/.venv/
+/venv/
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+.ipynb_checkpoints/
+*.py[cod]
+
+# C extensions
+*.so
+
+# json file
+*.json
+
+# log file
+*.log
+
+# Distribution / packaging
+/bin/
+/build/
+/develop-eggs/
+/dist/
+/eggs/
+/lib/
+/lib64/
+/output/
+/inference_model/
+/output_inference/
+/parts/
+/sdist/
+/var/
+/*.egg-info/
+/.installed.cfg
+/*.egg
+/.eggs
+
+# AUTHORS and ChangeLog will be generated while packaging
+/AUTHORS
+/ChangeLog
+
+# BCloud / BuildSubmitter
+/build_submitter.*
+/logger_client_log
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+.tox/
+.coverage
+.cache
+.pytest_cache
+nosetests.xml
+coverage.xml
+
+# Translations
+*.mo
+
+# Sphinx documentation
+/docs/_build/
+
+*.tar
+*.pyc
+
+.idea/
+
+dataset/coco/annotations
+dataset/coco/train2017
+dataset/coco/val2017
+dataset/voc/VOCdevkit
+dataset/fruit/fruit-detection/
+dataset/voc/test.txt
+dataset/voc/trainval.txt
+dataset/wider_face/WIDER_test
+dataset/wider_face/WIDER_train
+dataset/wider_face/WIDER_val
+dataset/wider_face/wider_face_split
+
+ppdet/version.py
+
+# NPU meta folder
+kernel_meta/
+
+# MAC
+*.DS_Store
+

+ 201 - 0
LICENSE

@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

+ 26 - 0
Pipfile

@@ -0,0 +1,26 @@
+[[source]]
+url = "https://pypi.org/simple"
+verify_ssl = true
+name = "pypi"
+
+[packages]
+tqdm = "*"
+typeguard = "*"
+visualdl = ">=2.1.0"
+opencv-python = "*"
+pyyaml = "*"
+shapely = "*"
+scipy = "*"
+terminaltables = "*"
+cython = "*"
+pycocotools = "*"
+lap = "*"
+sklearn = "*"
+motmetrics = "*"
+openpyxl = "*"
+cython-bbox = "*"
+
+[dev-packages]
+
+[requires]
+python_version = "3.8"

+ 1 - 0
README.md

@@ -0,0 +1 @@
+README_cn.md

File diff suppressed because it is too large
+ 825 - 0
README_cn.md


File diff suppressed because it is too large
+ 541 - 0
README_en.md


File diff suppressed because it is too large
+ 125 - 0
activity/直播答疑第一期.md


+ 47 - 0
benchmark/README.md

@@ -0,0 +1,47 @@
+# 通用检测benchmark测试脚本说明
+
+```
+├── benchmark
+│   ├── analysis_log.py
+│   ├── prepare.sh
+│   ├── README.md
+│   ├── run_all.sh
+│   ├── run_benchmark.sh
+```
+
+## 脚本说明
+
+### prepare.sh
+相关数据准备脚本,完成数据、模型的自动下载
+### run_all.sh
+主要运行脚本,可完成所有相关模型的测试方案
+### run_benchmark.sh
+单模型运行脚本,可完成指定模型的测试方案
+
+## Docker 运行环境
+* docker image: registry.baidubce.com/paddlepaddle/paddle:2.1.2-gpu-cuda10.2-cudnn7
+* paddle = 2.1.2
+* python = 3.7
+
+## 运行benchmark测试
+
+### 运行所有模型
+```
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection
+bash benchmark/run_all.sh
+```
+
+### 运行指定模型
+* Usage:bash run_benchmark.sh ${run_mode} ${batch_size} ${fp_item} ${max_epoch} ${model_name}
+* model_name: faster_rcnn, fcos, deformable_detr, gfl, hrnet, higherhrnet, solov2, jde, fairmot
+```
+git clone https://github.com/PaddlePaddle/PaddleDetection.git
+cd PaddleDetection
+bash benchmark/prepare.sh
+
+# 单卡
+CUDA_VISIBLE_DEVICES=0 bash benchmark/run_benchmark.sh sp 2 fp32 1 faster_rcnn
+# 多卡
+CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark.sh mp 2 fp32 1 faster_rcnn
+```

+ 48 - 0
benchmark/configs/faster_rcnn_r50_fpn_1x_coco.yml

@@ -0,0 +1,48 @@
+_BASE_: [
+  '../../configs/datasets/coco_detection.yml',
+  '../../configs/runtime.yml',
+  '../../configs/faster_rcnn/_base_/optimizer_1x.yml',
+  '../../configs/faster_rcnn/_base_/faster_rcnn_r50_fpn.yml',
+]
+weights: output/faster_rcnn_r50_fpn_1x_coco/model_final
+
+worker_num: 2
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - RandomFlip: {prob: 0.5}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false

+ 17 - 0
benchmark/prepare.sh

@@ -0,0 +1,17 @@
+#!/usr/bin/env bash
+
+pip install -U pip Cython
+pip install -r requirements.txt
+
+mv ./dataset/coco/download_coco.py . && rm -rf ./dataset/coco/* && mv ./download_coco.py ./dataset/coco/
+# prepare lite train data
+wget -nc -P ./dataset/coco/ https://paddledet.bj.bcebos.com/data/coco_benchmark.tar
+cd ./dataset/coco/ && tar -xvf coco_benchmark.tar && mv -u coco_benchmark/* .
+rm -rf coco_benchmark/
+
+cd ../../
+rm -rf ./dataset/mot/*
+# prepare mot mini train data
+wget -nc -P ./dataset/mot/ https://paddledet.bj.bcebos.com/data/mot_benchmark.tar
+cd ./dataset/mot/ && tar -xvf mot_benchmark.tar && mv -u mot_benchmark/* .
+rm -rf mot_benchmark/

+ 47 - 0
benchmark/run_all.sh

@@ -0,0 +1,47 @@
+# Use docker: paddlepaddle/paddle:latest-gpu-cuda10.1-cudnn7  paddle=2.1.2  python3.7
+#
+# Usage:
+#   git clone https://github.com/PaddlePaddle/PaddleDetection.git
+#   cd PaddleDetection
+#   bash benchmark/run_all.sh
+log_path=${LOG_PATH_INDEX_DIR:-$(pwd)}  #  benchmark系统指定该参数,不需要跑profile时,log_path指向存speed的目录
+
+# run prepare.sh
+bash benchmark/prepare.sh
+
+model_name_list=(faster_rcnn fcos deformable_detr gfl hrnet higherhrnet solov2 jde fairmot)
+fp_item_list=(fp32)
+max_epoch=2
+
+for model_item in ${model_name_list[@]}; do
+      for fp_item in ${fp_item_list[@]}; do
+          case ${model_item} in
+              faster_rcnn) bs_list=(1 8) ;;
+              fcos) bs_list=(2) ;;
+              deformable_detr) bs_list=(2) ;;
+              gfl) bs_list=(2) ;;
+              hrnet) bs_list=(64) ;;
+              higherhrnet) bs_list=(20) ;;
+              solov2) bs_list=(2) ;;
+              jde) bs_list=(4) ;;
+              fairmot) bs_list=(6) ;;
+              *) echo "wrong model_name"; exit 1;
+          esac
+          for bs_item in ${bs_list[@]}
+            do
+            run_mode=sp
+            log_name=detection_${model_item}_bs${bs_item}_${fp_item}   # 如:clas_MobileNetv1_mp_bs32_fp32_8
+            echo "index is speed, 1gpus, begin, ${log_name}"
+            CUDA_VISIBLE_DEVICES=0 bash benchmark/run_benchmark.sh ${run_mode} ${bs_item} \
+             ${fp_item} ${max_epoch} ${model_item} | tee ${log_path}/${log_name}_speed_1gpus 2>&1
+            sleep 60
+
+            run_mode=mp
+            log_name=detection_${model_item}_bs${bs_item}_${fp_item}   # 如:clas_MobileNetv1_mp_bs32_fp32_8
+            echo "index is speed, 8gpus, run_mode is multi_process, begin, ${log_name}"
+            CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash benchmark/run_benchmark.sh ${run_mode} \
+             ${bs_item} ${fp_item} ${max_epoch} ${model_item}| tee ${log_path}/${log_name}_speed_8gpus8p 2>&1
+            sleep 60
+            done
+      done
+done

+ 92 - 0
benchmark/run_benchmark.sh

@@ -0,0 +1,92 @@
+#!/usr/bin/env bash
+set -xe
+# Usage:CUDA_VISIBLE_DEVICES=0 bash benchmark/run_benchmark.sh ${run_mode} ${batch_size} ${fp_item} ${max_epoch} ${model_name}
+python="python3.7"
+# Parameter description
+function _set_params(){
+    run_mode=${1:-"sp"}            # sp|mp
+    batch_size=${2:-"2"}
+    fp_item=${3:-"fp32"}           # fp32|fp16
+    max_epoch=${4:-"1"}
+    model_item=${5:-"model_item"}
+    run_log_path=${TRAIN_LOG_DIR:-$(pwd)}
+# 添加日志解析需要的参数
+    base_batch_size=${batch_size}
+    mission_name="目标检测"
+    direction_id="0"
+    ips_unit="images/s"
+    skip_steps=10                     # 解析日志,有些模型前几个step耗时长,需要跳过                                    (必填)
+    keyword="ips:"                 # 解析日志,筛选出数据所在行的关键字                                             (必填)
+    index="1"
+    model_name=${model_item}_bs${batch_size}_${fp_item}
+
+    device=${CUDA_VISIBLE_DEVICES//,/ }
+    arr=(${device})
+    num_gpu_devices=${#arr[*]}
+    log_file=${run_log_path}/${model_item}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices}
+}
+function _train(){
+    echo "Train on ${num_gpu_devices} GPUs"
+    echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size"
+
+    # set runtime params
+    set_optimizer_lr_sp=" "
+    set_optimizer_lr_mp=" "
+    # parse model_item
+    case ${model_item} in
+        faster_rcnn) model_yml="benchmark/configs/faster_rcnn_r50_fpn_1x_coco.yml"
+            set_optimizer_lr_sp="LearningRate.base_lr=0.001" ;;
+        fcos) model_yml="configs/fcos/fcos_r50_fpn_1x_coco.yml"
+            set_optimizer_lr_sp="LearningRate.base_lr=0.001" ;;
+        deformable_detr) model_yml="configs/deformable_detr/deformable_detr_r50_1x_coco.yml" ;;
+        gfl) model_yml="configs/gfl/gfl_r50_fpn_1x_coco.yml"
+            set_optimizer_lr_sp="LearningRate.base_lr=0.001" ;;
+        hrnet) model_yml="configs/keypoint/hrnet/hrnet_w32_256x192.yml" ;;
+        higherhrnet) model_yml="configs/keypoint/higherhrnet/higherhrnet_hrnet_w32_512.yml" ;;
+        solov2) model_yml="configs/solov2/solov2_r50_fpn_1x_coco.yml" ;;
+        jde) model_yml="configs/mot/jde/jde_darknet53_30e_1088x608.yml" ;;
+        fairmot) model_yml="configs/mot/fairmot/fairmot_dla34_30e_1088x608.yml" ;;
+        *) echo "Undefined model_item"; exit 1;
+    esac
+
+    set_batch_size="TrainReader.batch_size=${batch_size}"
+    set_max_epoch="epoch=${max_epoch}"
+    set_log_iter="log_iter=1"
+    if [ ${fp_item} = "fp16" ]; then
+        set_fp_item="--fp16"
+    else
+        set_fp_item=" "
+    fi
+
+    case ${run_mode} in
+        sp) train_cmd="${python} -u tools/train.py -c ${model_yml} ${set_fp_item} \
+            -o ${set_batch_size} ${set_max_epoch} ${set_log_iter} ${set_optimizer_lr_sp}" ;;
+        mp) rm -rf mylog
+            train_cmd="${python} -m paddle.distributed.launch --log_dir=./mylog \
+            --gpus=${CUDA_VISIBLE_DEVICES} tools/train.py -c ${model_yml} ${set_fp_item} \
+            -o ${set_batch_size} ${set_max_epoch} ${set_log_iter} ${set_optimizer_lr_mp}"
+            log_parse_file="mylog/workerlog.0" ;;
+        *) echo "choose run_mode(sp or mp)"; exit 1;
+    esac
+
+    timeout 15m ${train_cmd} > ${log_file} 2>&1
+    if [ $? -ne 0 ];then
+        echo -e "${train_cmd}, FAIL"
+        export job_fail_flag=1
+    else
+        echo -e "${train_cmd}, SUCCESS"
+        export job_fail_flag=0
+    fi
+    kill -9 `ps -ef|grep 'python'|awk '{print $2}'`
+
+    if [ $run_mode = "mp" -a -d mylog ]; then
+        rm ${log_file}
+        cp mylog/workerlog.0 ${log_file}
+    fi
+}
+
+source ${BENCHMARK_ROOT}/scripts/run_model.sh   # 在该脚本中会对符合benchmark规范的log使用analysis.py 脚本进行性能数据解析;该脚本在联调时可从benchmark repo中下载https://github.com/PaddlePaddle/benchmark/blob/master/scripts/run_model.sh;如果不联调只想要产出训练log可以注掉本行,提交时需打开
+_set_params $@
+# _train       # 如果只想产出训练log,不解析,可取消注释
+_run     # 该函数在run_model.sh中,执行时会调用_train; 如果不联调只想要产出训练log可以注掉本行,提交时需打开
+

+ 28 - 0
configs/cascade_rcnn/README.md

@@ -0,0 +1,28 @@
+# Cascade R-CNN: High Quality Object Detection and Instance Segmentation
+
+## Model Zoo
+
+| 骨架网络             | 网络类型       | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP | Mask AP |                           下载                          | 配置文件 |
+| :------------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----: | :-----------------------------------------------------: | :-----: |
+| ResNet50-FPN         | Cascade Faster         |    1    |   1x    |     ----     |  41.1  |    -    | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.yml) |
+| ResNet50-FPN         | Cascade Mask         |    1    |   1x    |     ----     |  41.8  |    36.3    | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.yml) |
+| ResNet50-vd-SSLDv2-FPN | Cascade Faster         |    1    |   1x    |     ----     |  44.4  |    -    | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_r50_vd_fpn_ssld_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_1x_coco.yml) |
+| ResNet50-vd-SSLDv2-FPN | Cascade Faster         |    1    |   2x    |     ----     |  45.0  |    -    | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.yml) |
+| ResNet50-vd-SSLDv2-FPN | Cascade Mask         |    1    |   1x    |     ----     |  44.9  |    39.1    | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco.yml) |
+| ResNet50-vd-SSLDv2-FPN | Cascade Mask         |    1    |   2x    |     ----     |  45.7  |    39.7    | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco.yml) |
+
+
+## Citations
+```
+@article{Cai_2019,
+   title={Cascade R-CNN: High Quality Object Detection and Instance Segmentation},
+   ISSN={1939-3539},
+   url={http://dx.doi.org/10.1109/tpami.2019.2956516},
+   DOI={10.1109/tpami.2019.2956516},
+   journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
+   publisher={Institute of Electrical and Electronics Engineers (IEEE)},
+   author={Cai, Zhaowei and Vasconcelos, Nuno},
+   year={2019},
+   pages={1–1}
+}
+```

+ 40 - 0
configs/cascade_rcnn/_base_/cascade_fpn_reader.yml

@@ -0,0 +1,40 @@
+worker_num: 2
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
+  - RandomFlip: {prob: 0.5}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false

+ 40 - 0
configs/cascade_rcnn/_base_/cascade_mask_fpn_reader.yml

@@ -0,0 +1,40 @@
+worker_num: 2
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
+  - RandomFlip: {prob: 0.5}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false

+ 97 - 0
configs/cascade_rcnn/_base_/cascade_mask_rcnn_r50_fpn.yml

@@ -0,0 +1,97 @@
+architecture: CascadeRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+
+CascadeRCNN:
+  backbone: ResNet
+  neck: FPN
+  rpn_head: RPNHead
+  bbox_head: CascadeHead
+  mask_head: MaskHead
+  # post process
+  bbox_post_process: BBoxPostProcess
+  mask_post_process: MaskPostProcess
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+
+FPN:
+  out_channel: 256
+
+RPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    anchor_sizes: [[32], [64], [128], [256], [512]]
+    strides: [4, 8, 16, 32, 64]
+  rpn_target_assign:
+    batch_size_per_im: 256
+    fg_fraction: 0.5
+    negative_overlap: 0.3
+    positive_overlap: 0.7
+    use_random: True
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 2000
+    topk_after_collect: True
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+
+
+CascadeHead:
+  head: CascadeTwoFCHead
+  roi_extractor:
+    resolution: 7
+    sampling_ratio: 0
+    aligned: True
+  bbox_assigner: BBoxAssigner
+
+BBoxAssigner:
+  batch_size_per_im: 512
+  bg_thresh: 0.5
+  fg_thresh: 0.5
+  fg_fraction: 0.25
+  cascade_iou: [0.5, 0.6, 0.7]
+  use_random: True
+
+CascadeTwoFCHead:
+  out_channel: 1024
+
+BBoxPostProcess:
+  decode:
+    name: RCNNBox
+    prior_box_var: [30.0, 30.0, 15.0, 15.0]
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 100
+    score_threshold: 0.05
+    nms_threshold: 0.5
+
+
+MaskHead:
+  head: MaskFeat
+  roi_extractor:
+    resolution: 14
+    sampling_ratio: 0
+    aligned: True
+  mask_assigner: MaskAssigner
+  share_bbox_feat: False
+
+MaskFeat:
+  num_convs: 4
+  out_channel: 256
+
+MaskAssigner:
+  mask_resolution: 28
+
+MaskPostProcess:
+  binary_thresh: 0.5

+ 75 - 0
configs/cascade_rcnn/_base_/cascade_rcnn_r50_fpn.yml

@@ -0,0 +1,75 @@
+architecture: CascadeRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+
+CascadeRCNN:
+  backbone: ResNet
+  neck: FPN
+  rpn_head: RPNHead
+  bbox_head: CascadeHead
+  # post process
+  bbox_post_process: BBoxPostProcess
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+
+FPN:
+  out_channel: 256
+
+RPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    anchor_sizes: [[32], [64], [128], [256], [512]]
+    strides: [4, 8, 16, 32, 64]
+  rpn_target_assign:
+    batch_size_per_im: 256
+    fg_fraction: 0.5
+    negative_overlap: 0.3
+    positive_overlap: 0.7
+    use_random: True
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 2000
+    topk_after_collect: True
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+
+
+CascadeHead:
+  head: CascadeTwoFCHead
+  roi_extractor:
+    resolution: 7
+    sampling_ratio: 0
+    aligned: True
+  bbox_assigner: BBoxAssigner
+
+BBoxAssigner:
+  batch_size_per_im: 512
+  bg_thresh: 0.5
+  fg_thresh: 0.5
+  fg_fraction: 0.25
+  cascade_iou: [0.5, 0.6, 0.7]
+  use_random: True
+
+CascadeTwoFCHead:
+  out_channel: 1024
+
+BBoxPostProcess:
+  decode:
+    name: RCNNBox
+    prior_box_var: [30.0, 30.0, 15.0, 15.0]
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 100
+    score_threshold: 0.05
+    nms_threshold: 0.5

+ 19 - 0
configs/cascade_rcnn/_base_/optimizer_1x.yml

@@ -0,0 +1,19 @@
+epoch: 12
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [8, 11]
+  - !LinearWarmup
+    start_factor: 0.001
+    steps: 1000
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2

+ 8 - 0
configs/cascade_rcnn/cascade_mask_rcnn_r50_fpn_1x_coco.yml

@@ -0,0 +1,8 @@
+_BASE_: [
+  '../datasets/coco_instance.yml',
+  '../runtime.yml',
+  '_base_/optimizer_1x.yml',
+  '_base_/cascade_mask_rcnn_r50_fpn.yml',
+  '_base_/cascade_mask_fpn_reader.yml',
+]
+weights: output/cascade_mask_rcnn_r50_fpn_1x_coco/model_final

+ 18 - 0
configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco.yml

@@ -0,0 +1,18 @@
+_BASE_: [
+  '../datasets/coco_instance.yml',
+  '../runtime.yml',
+  '_base_/optimizer_1x.yml',
+  '_base_/cascade_mask_rcnn_r50_fpn.yml',
+  '_base_/cascade_mask_fpn_reader.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
+weights: output/cascade_mask_rcnn_r50_vd_fpn_ssld_1x_coco/model_final
+
+ResNet:
+  depth: 50
+  variant: d
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  lr_mult_list: [0.05, 0.05, 0.1, 0.15]

+ 29 - 0
configs/cascade_rcnn/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco.yml

@@ -0,0 +1,29 @@
+_BASE_: [
+  '../datasets/coco_instance.yml',
+  '../runtime.yml',
+  '_base_/optimizer_1x.yml',
+  '_base_/cascade_mask_rcnn_r50_fpn.yml',
+  '_base_/cascade_mask_fpn_reader.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
+weights: output/cascade_mask_rcnn_r50_vd_fpn_ssld_2x_coco/model_final
+
+ResNet:
+  depth: 50
+  variant: d
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  lr_mult_list: [0.05, 0.05, 0.1, 0.15]
+
+epoch: 24
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [12, 22]
+  - !LinearWarmup
+    start_factor: 0.1
+    steps: 1000

+ 8 - 0
configs/cascade_rcnn/cascade_rcnn_r50_fpn_1x_coco.yml

@@ -0,0 +1,8 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '_base_/optimizer_1x.yml',
+  '_base_/cascade_rcnn_r50_fpn.yml',
+  '_base_/cascade_fpn_reader.yml',
+]
+weights: output/cascade_rcnn_r50_fpn_1x_coco/model_final

+ 18 - 0
configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_1x_coco.yml

@@ -0,0 +1,18 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '_base_/optimizer_1x.yml',
+  '_base_/cascade_rcnn_r50_fpn.yml',
+  '_base_/cascade_fpn_reader.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
+weights: output/cascade_rcnn_r50_vd_fpn_ssld_1x_coco/model_final
+
+ResNet:
+  depth: 50
+  variant: d
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  lr_mult_list: [0.05, 0.05, 0.1, 0.15]

+ 29 - 0
configs/cascade_rcnn/cascade_rcnn_r50_vd_fpn_ssld_2x_coco.yml

@@ -0,0 +1,29 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '_base_/optimizer_1x.yml',
+  '_base_/cascade_rcnn_r50_fpn.yml',
+  '_base_/cascade_fpn_reader.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_v2_pretrained.pdparams
+weights: output/cascade_rcnn_r50_vd_fpn_ssld_2x_coco/model_final
+
+ResNet:
+  depth: 50
+  variant: d
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  lr_mult_list: [0.05, 0.05, 0.1, 0.15]
+
+epoch: 24
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [12, 22]
+  - !LinearWarmup
+    start_factor: 0.1
+    steps: 1000

+ 37 - 0
configs/centernet/README.md

@@ -0,0 +1,37 @@
+English | [简体中文](README_cn.md)
+
+# CenterNet (CenterNet: Objects as Points)
+
+## Table of Contents
+- [Introduction](#Introduction)
+- [Model Zoo](#Model_Zoo)
+- [Citations](#Citations)
+
+## Introduction
+
+[CenterNet](http://arxiv.org/abs/1904.07850) is an Anchor Free detector, which model an object as a single point -- the center point of its bounding box. The detector uses keypoint estimation to find center points and regresses to all other object properties. The center point based approach, CenterNet, is end-to-end differentiable, simpler, faster, and more accurate than corresponding bounding box based detectors.
+
+## Model Zoo
+
+### CenterNet Results on COCO-val 2017
+
+| backbone       | input shape | mAP   |    FPS    | download | config |
+| :--------------| :------- |  :----: | :------: | :----: |:-----: |
+| DLA-34(paper)  | 512x512 |  37.4  |     -   |    -   |   -    |
+| DLA-34         | 512x512 |  37.6  |     -   | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_dla34_140e_coco.pdparams) | [config](./centernet_dla34_140e_coco.yml) |
+| ResNet50 + DLAUp  | 512x512 |  38.9  |     -   | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_r50_140e_coco.pdparams) | [config](./centernet_r50_140e_coco.yml) |
+| MobileNetV1 + DLAUp  | 512x512 |  28.2  |     -   | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv1_140e_coco.pdparams) | [config](./centernet_mbv1_140e_coco.yml) |
+| MobileNetV3_small + DLAUp  | 512x512 | 17  |     -   | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv3_small_140e_coco.pdparams) | [config](./centernet_mbv3_small_140e_coco.yml) |
+| MobileNetV3_large + DLAUp  | 512x512 |  27.1  |     -   | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv3_large_140e_coco.pdparams) | [config](./centernet_mbv3_large_140e_coco.yml) |
+| ShuffleNetV2 + DLAUp  | 512x512 | 23.8  |     -   | [model](https://bj.bcebos.com/v1/paddledet/models/centernet_shufflenetv2_140e_coco.pdparams) | [config](./centernet_shufflenetv2_140e_coco.yml) |
+
+
+## Citations
+```
+@article{zhou2019objects,
+  title={Objects as points},
+  author={Zhou, Xingyi and Wang, Dequan and Kr{\"a}henb{\"u}hl, Philipp},
+  journal={arXiv preprint arXiv:1904.07850},
+  year={2019}
+}
+```

+ 36 - 0
configs/centernet/README_cn.md

@@ -0,0 +1,36 @@
+简体中文 | [English](README.md)
+
+# CenterNet (CenterNet: Objects as Points)
+
+## 内容
+- [简介](#简介)
+- [模型库](#模型库)
+- [引用](#引用)
+
+## 内容
+
+[CenterNet](http://arxiv.org/abs/1904.07850)是Anchor Free检测器,将物体表示为一个目标框中心点。CenterNet使用关键点检测的方式定位中心点并回归物体的其他属性。CenterNet是以中心点为基础的检测方法,是端到端可训练的,并且相较于基于anchor的检测器更加检测高效。
+
+## 模型库
+
+### CenterNet在COCO-val 2017上结果
+
+| 骨干网络       | 输入尺寸 | mAP   |    FPS    | 下载链接 | 配置文件 |
+| :--------------| :------- |  :----: | :------: | :----: |:-----: |
+| DLA-34(paper)  | 512x512 |  37.4  |     -   |    -   |   -    |
+| DLA-34         | 512x512 |  37.6  |     -   | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_dla34_140e_coco.pdparams) | [配置文件](./centernet_dla34_140e_coco.yml) |
+| ResNet50 + DLAUp  | 512x512 |  38.9  |     -   | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_r50_140e_coco.pdparams) | [配置文件](./centernet_r50_140e_coco.yml) |
+| MobileNetV1 + DLAUp  | 512x512 |  28.2  |     -   | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv1_140e_coco.pdparams) | [配置文件](./centernet_mbv1_140e_coco.yml) |
+| MobileNetV3_small + DLAUp  | 512x512 | 17  |     -   | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv3_small_140e_coco.pdparams) | [配置文件](./centernet_mbv3_small_140e_coco.yml) |
+| MobileNetV3_large + DLAUp  | 512x512 |  27.1  |     -   | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_mbv3_large_140e_coco.pdparams) | [配置文件](./centernet_mbv3_large_140e_coco.yml) |
+| ShuffleNetV2 + DLAUp  | 512x512 | 23.8  |     -   | [下载链接](https://bj.bcebos.com/v1/paddledet/models/centernet_shufflenetv2_140e_coco.pdparams) | [配置文件](./centernet_shufflenetv2_140e_coco.yml) |
+
+## 引用
+```
+@article{zhou2019objects,
+  title={Objects as points},
+  author={Zhou, Xingyi and Wang, Dequan and Kr{\"a}henb{\"u}hl, Philipp},
+  journal={arXiv preprint arXiv:1904.07850},
+  year={2019}
+}
+```

+ 22 - 0
configs/centernet/_base_/centernet_dla34.yml

@@ -0,0 +1,22 @@
+architecture: CenterNet
+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/DLA34_pretrain.pdparams
+
+CenterNet:
+  backbone: DLA
+  neck: CenterNetDLAFPN
+  head: CenterNetHead
+  post_process: CenterNetPostProcess
+
+DLA:
+  depth: 34
+
+CenterNetDLAFPN:
+  down_ratio: 4
+
+CenterNetHead:
+  head_planes: 256
+  regress_ltrb: False
+
+CenterNetPostProcess:
+  max_per_img: 100
+  regress_ltrb: False

+ 34 - 0
configs/centernet/_base_/centernet_r50.yml

@@ -0,0 +1,34 @@
+architecture: CenterNet
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_ssld_pretrained.pdparams
+norm_type: sync_bn
+use_ema: true
+ema_decay: 0.9998
+
+CenterNet:
+  backbone: ResNet
+  neck: CenterNetDLAFPN
+  head: CenterNetHead
+  post_process: CenterNetPostProcess
+
+ResNet:
+  depth: 50
+  variant: d
+  return_idx: [0, 1, 2, 3]
+  freeze_at: -1
+  norm_decay: 0.
+  dcn_v2_stages: [3]
+
+
+CenterNetDLAFPN:
+  first_level: 0
+  last_level: 4
+  down_ratio: 4
+  dcn_v2: False
+
+CenterNetHead:
+  head_planes: 256
+  regress_ltrb: False
+
+CenterNetPostProcess:
+  max_per_img: 100
+  regress_ltrb: False

+ 35 - 0
configs/centernet/_base_/centernet_reader.yml

@@ -0,0 +1,35 @@
+worker_num: 4
+TrainReader:
+  inputs_def:
+    image_shape: [3, 512, 512]
+  sample_transforms:
+    - Decode: {}
+    - FlipWarpAffine: {keep_res: False, input_h: 512, input_w: 512, use_random: True}
+    - CenterRandColor: {}
+    - Lighting: {eigval: [0.2141788, 0.01817699, 0.00341571], eigvec: [[-0.58752847, -0.69563484, 0.41340352], [-0.5832747, 0.00994535, -0.81221408], [-0.56089297, 0.71832671, 0.41158938]]}
+    - NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834], is_scale: False}
+    - Permute: {}
+    - Gt2CenterNetTarget: {down_ratio: 4, max_objs: 128}
+  batch_size: 16
+  shuffle: True
+  drop_last: True
+  use_shared_memory: True
+
+EvalReader:
+  sample_transforms:
+    - Decode: {}
+    - WarpAffine: {keep_res: True, input_h: 512, input_w: 512}
+    - NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834]}
+    - Permute: {}
+  batch_size: 1
+
+
+TestReader:
+  inputs_def:
+    image_shape: [3, 512, 512]
+  sample_transforms:
+    - Decode: {}
+    - WarpAffine: {keep_res: True, input_h: 512, input_w: 512}
+    - NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834], is_scale: True}
+    - Permute: {}
+  batch_size: 1

+ 14 - 0
configs/centernet/_base_/optimizer_140e.yml

@@ -0,0 +1,14 @@
+epoch: 140
+
+LearningRate:
+  base_lr: 0.0005
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [90, 120]
+    use_warmup: False
+
+OptimizerBuilder:
+  optimizer:
+    type: Adam
+  regularizer: NULL

+ 9 - 0
configs/centernet/centernet_dla34_140e_coco.yml

@@ -0,0 +1,9 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '_base_/optimizer_140e.yml',
+  '_base_/centernet_dla34.yml',
+  '_base_/centernet_reader.yml',
+]
+
+weights: output/centernet_dla34_140e_coco/model_final

+ 21 - 0
configs/centernet/centernet_mbv1_140e_coco.yml

@@ -0,0 +1,21 @@
+_BASE_: [
+  'centernet_r50_140e_coco.yml'
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV1_pretrained.pdparams
+weights: output/centernet_mbv1_140e_coco/model_final
+
+CenterNet:
+  backbone: MobileNet
+  neck: CenterNetDLAFPN
+  head: CenterNetHead
+  post_process: CenterNetPostProcess
+
+MobileNet:
+  scale: 1.
+  with_extra_blocks: false
+  extra_block_filters: []
+  feature_maps: [3, 5, 11, 13]
+
+TrainReader:
+  batch_size: 32

+ 22 - 0
configs/centernet/centernet_mbv3_large_140e_coco.yml

@@ -0,0 +1,22 @@
+_BASE_: [
+  'centernet_r50_140e_coco.yml'
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_large_x1_0_ssld_pretrained.pdparams
+weights: output/centernet_mbv3_large_140e_coco/model_final
+
+CenterNet:
+  backbone: MobileNetV3
+  neck: CenterNetDLAFPN
+  head: CenterNetHead
+  post_process: CenterNetPostProcess
+
+MobileNetV3:
+  model_name: large
+  scale: 1.
+  with_extra_blocks: false
+  extra_block_filters: []
+  feature_maps: [4, 7, 13, 16]
+
+TrainReader:
+  batch_size: 32

+ 28 - 0
configs/centernet/centernet_mbv3_small_140e_coco.yml

@@ -0,0 +1,28 @@
+_BASE_: [
+  'centernet_r50_140e_coco.yml'
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/MobileNetV3_small_x1_0_ssld_pretrained.pdparams
+weights: output/centernet_mbv3_small_140e_coco/model_final
+
+CenterNet:
+  backbone: MobileNetV3
+  neck: CenterNetDLAFPN
+  head: CenterNetHead
+  post_process: CenterNetPostProcess
+
+MobileNetV3:
+  model_name: small
+  scale: 1.
+  with_extra_blocks: false
+  extra_block_filters: []
+  feature_maps: [4, 9, 12]
+
+CenterNetDLAFPN:
+  first_level: 0
+  last_level: 3
+  down_ratio: 8
+  dcn_v2: False
+
+TrainReader:
+  batch_size: 32

+ 9 - 0
configs/centernet/centernet_r50_140e_coco.yml

@@ -0,0 +1,9 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '_base_/optimizer_140e.yml',
+  '_base_/centernet_r50.yml',
+  '_base_/centernet_reader.yml',
+]
+
+weights: output/centernet_r50_140e_coco/model_final

+ 33 - 0
configs/centernet/centernet_shufflenetv2_140e_coco.yml

@@ -0,0 +1,33 @@
+_BASE_: [
+  'centernet_r50_140e_coco.yml'
+]
+
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ShuffleNetV2_x1_0_pretrained.pdparams
+weights: output/centernet_shufflenetv2_140e_coco/model_final
+
+CenterNet:
+  backbone: ShuffleNetV2
+  neck: CenterNetDLAFPN
+  head: CenterNetHead
+  post_process: CenterNetPostProcess
+
+ShuffleNetV2:
+  scale: 1.0
+  feature_maps: [5, 13, 17]
+  act: leaky_relu
+
+CenterNetDLAFPN:
+  first_level: 0
+  last_level: 3
+  down_ratio: 8
+  dcn_v2: False
+
+TrainReader:
+  batch_size: 32
+
+TestReader:
+  sample_transforms:
+    - Decode: {}
+    - WarpAffine: {keep_res: False, input_h: 512, input_w: 512}
+    - NormalizeImage: {mean: [0.40789655, 0.44719303, 0.47026116], std: [0.2886383 , 0.27408165, 0.27809834]}
+    - Permute: {}

+ 20 - 0
configs/convnext/README.md

@@ -0,0 +1,20 @@
+# ConvNeXt (A ConvNet for the 2020s)
+
+## 模型库
+### ConvNeXt on COCO
+
+| 网络网络                  | 输入尺寸 | 图片数/GPU | 学习率策略 | mAP<sup>val<br>0.5:0.95 | mAP<sup>val<br>0.5 | Params(M) | FLOPs(G) |    下载链接       | 配置文件 |
+| :------------- | :------- | :-------: | :------: | :------------: | :---------------------: | :----------------: |:---------: | :------: |:---------------: |
+| PP-YOLOE-ConvNeXt-tiny | 640 |    16      |   36e    |  44.6  |  63.3 |  33.04  |  13.87 | [下载链接](https://paddledet.bj.bcebos.com/models/ppyoloe_convnext_tiny_36e_coco.pdparams) | [配置文件](./ppyoloe_convnext_tiny_36e_coco.yml) |
+| YOLOX-ConvNeXt-s       | 640 |    8       |   36e    |  44.6  |  65.3 |  36.20  |  27.52 | [下载链接](https://paddledet.bj.bcebos.com/models/yolox_convnext_s_36e_coco.pdparams) | [配置文件](./yolox_convnext_s_36e_coco.yml) |
+
+
+## Citations
+```
+@Article{liu2022convnet,
+  author  = {Zhuang Liu and Hanzi Mao and Chao-Yuan Wu and Christoph Feichtenhofer and Trevor Darrell and Saining Xie},
+  title   = {A ConvNet for the 2020s},
+  journal = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+  year    = {2022},
+}
+```

+ 55 - 0
configs/convnext/ppyoloe_convnext_tiny_36e_coco.yml

@@ -0,0 +1,55 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '../ppyoloe/_base_/ppyoloe_crn.yml',
+  '../ppyoloe/_base_/ppyoloe_reader.yml',
+]
+depth_mult: 0.25
+width_mult: 0.50
+
+log_iter: 100
+snapshot_epoch: 5
+weights: output/ppyoloe_convnext_tiny_36e_coco/model_final
+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/convnext_tiny_22k_224.pdparams
+
+
+YOLOv3:
+  backbone: ConvNeXt
+  neck: CustomCSPPAN
+  yolo_head: PPYOLOEHead
+  post_process: ~
+
+ConvNeXt:
+  arch: 'tiny'
+  drop_path_rate: 0.4
+  layer_scale_init_value: 1.0
+  return_idx: [1, 2, 3]
+
+
+PPYOLOEHead:
+  static_assigner_epoch: 12
+  nms:
+    nms_top_k: 10000
+    keep_top_k: 300
+    score_threshold: 0.01
+    nms_threshold: 0.7
+
+
+TrainReader:
+  batch_size: 16
+
+
+epoch: 36
+LearningRate:
+  base_lr: 0.0002
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [36]
+    use_warmup: false
+
+OptimizerBuilder:
+  regularizer: false
+  optimizer:
+    type: AdamW
+    weight_decay: 0.0005

+ 58 - 0
configs/convnext/yolox_convnext_s_36e_coco.yml

@@ -0,0 +1,58 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '../yolox/_base_/yolox_cspdarknet.yml',
+  '../yolox/_base_/yolox_reader.yml'
+]
+depth_mult: 0.33
+width_mult: 0.50
+
+log_iter: 100
+snapshot_epoch: 5
+weights: output/yolox_convnext_s_36e_coco/model_final
+pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/pretrained/convnext_tiny_22k_224.pdparams
+
+
+YOLOX:
+  backbone: ConvNeXt
+  neck: YOLOCSPPAN
+  head: YOLOXHead
+  size_stride: 32
+  size_range: [15, 25] # multi-scale range [480*480 ~ 800*800]
+
+ConvNeXt:
+  arch: 'tiny'
+  drop_path_rate: 0.4
+  layer_scale_init_value: 1.0
+  return_idx: [1, 2, 3]
+
+
+TrainReader:
+  batch_size: 8
+  mosaic_epoch: 30
+
+
+YOLOXHead:
+  l1_epoch: 30
+  nms:
+    name: MultiClassNMS
+    nms_top_k: 10000
+    keep_top_k: 1000
+    score_threshold: 0.001
+    nms_threshold: 0.65
+
+
+epoch: 36
+LearningRate:
+  base_lr: 0.0002
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [36]
+    use_warmup: false
+
+OptimizerBuilder:
+  regularizer: false
+  optimizer:
+    type: AdamW
+    weight_decay: 0.0005

+ 21 - 0
configs/datasets/coco_detection.yml

@@ -0,0 +1,21 @@
+metric: COCO
+num_classes: 80
+
+TrainDataset:
+  !COCODataSet
+    image_dir: train2017
+    anno_path: annotations/instances_train2017.json
+    dataset_dir: dataset/coco
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  !COCODataSet
+    image_dir: val2017
+    anno_path: annotations/instances_val2017.json
+    dataset_dir: dataset/coco
+    allow_empty: true
+
+TestDataset:
+  !ImageFolder
+    anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
+    dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'

+ 20 - 0
configs/datasets/coco_instance.yml

@@ -0,0 +1,20 @@
+metric: COCO
+num_classes: 80
+
+TrainDataset:
+  !COCODataSet
+    image_dir: train2017
+    anno_path: annotations/instances_train2017.json
+    dataset_dir: dataset/coco
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_poly', 'is_crowd']
+
+EvalDataset:
+  !COCODataSet
+    image_dir: val2017
+    anno_path: annotations/instances_val2017.json
+    dataset_dir: dataset/coco
+
+TestDataset:
+  !ImageFolder
+    anno_path: annotations/instances_val2017.json # also support txt (like VOC's label_list.txt)
+    dataset_dir: dataset/coco # if set, anno_path will be 'dataset_dir/anno_path'

+ 21 - 0
configs/datasets/dota.yml

@@ -0,0 +1,21 @@
+metric: RBOX
+num_classes: 15
+
+TrainDataset:
+  !COCODataSet
+    image_dir: trainval1024/images
+    anno_path: trainval1024/DOTA_trainval1024.json
+    dataset_dir: dataset/dota/
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
+
+EvalDataset:
+  !COCODataSet
+    image_dir: trainval1024/images
+    anno_path: trainval1024/DOTA_trainval1024.json
+    dataset_dir: dataset/dota/
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
+
+TestDataset:
+  !ImageFolder
+    anno_path: test1024/DOTA_test1024.json
+    dataset_dir: dataset/dota/

+ 21 - 0
configs/datasets/dota_ms.yml

@@ -0,0 +1,21 @@
+metric: RBOX
+num_classes: 15
+
+TrainDataset:
+  !COCODataSet
+    image_dir: trainval1024/images
+    anno_path: trainval1024/DOTA_trainval1024.json
+    dataset_dir: dataset/dota_ms/
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
+
+EvalDataset:
+  !COCODataSet
+    image_dir: trainval1024/images
+    anno_path: trainval1024/DOTA_trainval1024.json
+    dataset_dir: dataset/dota_ms/
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
+
+TestDataset:
+  !ImageFolder
+    anno_path: test1024/DOTA_test1024.json
+    dataset_dir: dataset/dota_ms/

+ 25 - 0
configs/datasets/mcmot.yml

@@ -0,0 +1,25 @@
+metric: MCMOT
+num_classes: 10
+# using VisDrone2019 MOT dataset with 10 classes as default, you can modify it for your needs.
+
+# for MCMOT training
+TrainDataset:
+  !MCMOTDataSet
+    dataset_dir: dataset/mot
+    image_lists: ['visdrone_mcmot.train']
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_ide']
+    label_list: label_list.txt
+
+# for MCMOT evaluation
+# If you want to change the MCMOT evaluation dataset, please modify 'data_root'
+EvalMOTDataset:
+  !MOTImageFolder
+    dataset_dir: dataset/mot
+    data_root: visdrone_mcmot/images/val
+    keep_ori_im: False # set True if save visualization images or video, or used in DeepSORT
+
+# for MCMOT video inference
+TestMOTDataset:
+  !MOTImageFolder
+    dataset_dir: dataset/mot
+    keep_ori_im: True # set True if save visualization images or video

+ 23 - 0
configs/datasets/mot.yml

@@ -0,0 +1,23 @@
+metric: MOT
+num_classes: 1
+
+# for MOT training
+TrainDataset:
+  !MOTDataSet
+    dataset_dir: dataset/mot
+    image_lists: ['mot17.train', 'caltech.all', 'cuhksysu.train', 'prw.train', 'citypersons.train', 'eth.train']
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'gt_ide']
+
+# for MOT evaluation
+# If you want to change the MOT evaluation dataset, please modify 'data_root'
+EvalMOTDataset:
+  !MOTImageFolder
+    dataset_dir: dataset/mot
+    data_root: MOT16/images/train
+    keep_ori_im: False # set True if save visualization images or video, or used in DeepSORT
+
+# for MOT video inference
+TestMOTDataset:
+  !MOTImageFolder
+    dataset_dir: dataset/mot
+    keep_ori_im: True # set True if save visualization images or video

+ 21 - 0
configs/datasets/objects365_detection.yml

@@ -0,0 +1,21 @@
+metric: COCO
+num_classes: 365
+
+TrainDataset:
+  !COCODataSet
+    image_dir: train
+    anno_path: annotations/zhiyuan_objv2_train.json
+    dataset_dir: dataset/objects365
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  !COCODataSet
+    image_dir: val
+    anno_path: annotations/zhiyuan_objv2_val.json
+    dataset_dir: dataset/objects365
+    allow_empty: true
+
+TestDataset:
+  !ImageFolder
+    anno_path: annotations/zhiyuan_objv2_val.json
+    dataset_dir: dataset/objects365/

+ 21 - 0
configs/datasets/roadsign_voc.yml

@@ -0,0 +1,21 @@
+metric: VOC
+map_type: integral
+num_classes: 4
+
+TrainDataset:
+  name: VOCDataSet
+  dataset_dir: dataset/roadsign_voc
+  anno_path: train.txt
+  label_list: label_list.txt
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
+
+EvalDataset:
+  name: VOCDataSet
+  dataset_dir: dataset/roadsign_voc
+  anno_path: valid.txt
+  label_list: label_list.txt
+  data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
+
+TestDataset:
+  name: ImageFolder
+  anno_path: dataset/roadsign_voc/label_list.txt

+ 47 - 0
configs/datasets/sniper_coco_detection.yml

@@ -0,0 +1,47 @@
+metric: SNIPERCOCO
+num_classes: 80
+
+TrainDataset:
+  !SniperCOCODataSet
+    image_dir: train2017
+    anno_path: annotations/instances_train2017.json
+    dataset_dir: dataset/coco
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+    allow_empty: true
+    is_trainset: true
+    image_target_sizes: [2000, 1000]
+    valid_box_ratio_ranges: [[-1, 0.1],[0.08, -1]]
+    chip_target_size: 512
+    chip_target_stride: 200
+    use_neg_chip: false
+    max_neg_num_per_im: 8
+
+
+EvalDataset:
+  !SniperCOCODataSet
+    image_dir: val2017
+    anno_path: annotations/instances_val2017.json
+    dataset_dir: dataset/coco
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+    allow_empty: true
+    is_trainset: false
+    image_target_sizes: [2000, 1000]
+    valid_box_ratio_ranges: [[-1, 0.1], [0.08, -1]]
+    chip_target_size: 512
+    chip_target_stride: 200
+    max_per_img: -1
+    nms_thresh: 0.5
+
+TestDataset:
+  !SniperCOCODataSet
+    image_dir: val2017
+    dataset_dir: dataset/coco
+    is_trainset: false
+    image_target_sizes: [2000, 1000]
+    valid_box_ratio_ranges: [[-1, 0.1],[0.08, -1]]
+    chip_target_size: 500
+    chip_target_stride: 200
+    max_per_img: -1
+    nms_thresh: 0.5
+
+

+ 47 - 0
configs/datasets/sniper_visdrone_detection.yml

@@ -0,0 +1,47 @@
+metric: SNIPERCOCO
+num_classes: 9
+
+TrainDataset:
+  !SniperCOCODataSet
+    image_dir: train
+    anno_path: annotations/train.json
+    dataset_dir: dataset/VisDrone2019_coco
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+    allow_empty: true
+    is_trainset: true
+    image_target_sizes: [8145, 2742]
+    valid_box_ratio_ranges: [[-1, 0.03142857142857144], [0.02333211853008726, -1]]
+    chip_target_size: 1536
+    chip_target_stride: 1184
+    use_neg_chip: false
+    max_neg_num_per_im: 8
+
+
+EvalDataset:
+  !SniperCOCODataSet
+    image_dir: val
+    anno_path: annotations/val.json
+    dataset_dir: dataset/VisDrone2019_coco
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+    allow_empty: true
+    is_trainset: false
+    image_target_sizes: [8145, 2742]
+    valid_box_ratio_ranges: [[-1, 0.03142857142857144], [0.02333211853008726, -1]]
+    chip_target_size: 1536
+    chip_target_stride: 1184
+    max_per_img: -1
+    nms_thresh: 0.5
+
+TestDataset:
+  !SniperCOCODataSet
+    image_dir: val
+    dataset_dir: dataset/VisDrone2019_coco
+    is_trainset: false
+    image_target_sizes: [8145, 2742]
+    valid_box_ratio_ranges: [[-1, 0.03142857142857144], [0.02333211853008726, -1]]
+    chip_target_size: 1536
+    chip_target_stride: 1184
+    max_per_img: -1
+    nms_thresh: 0.5
+
+

+ 21 - 0
configs/datasets/spine_coco.yml

@@ -0,0 +1,21 @@
+metric: RBOX
+num_classes: 9
+
+TrainDataset:
+  !COCODataSet
+    image_dir: images
+    anno_path: annotations/train.json
+    dataset_dir: dataset/spine_coco
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
+
+EvalDataset:
+  !COCODataSet
+    image_dir: images
+    anno_path: annotations/valid.json
+    dataset_dir: dataset/spine_coco
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
+
+TestDataset:
+  !ImageFolder
+    anno_path: annotations/valid.json
+    dataset_dir: dataset/spine_coco

+ 22 - 0
configs/datasets/visdrone_detection.yml

@@ -0,0 +1,22 @@
+metric: COCO
+num_classes: 10
+
+TrainDataset:
+  !COCODataSet
+    image_dir: VisDrone2019-DET-train
+    anno_path: train.json
+    dataset_dir: dataset/visdrone
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
+
+EvalDataset:
+  !COCODataSet
+    image_dir: VisDrone2019-DET-val
+    anno_path: val.json
+    # image_dir: test_dev
+    # anno_path: test_dev.json
+    dataset_dir: dataset/visdrone
+
+TestDataset:
+  !ImageFolder
+    anno_path: val.json
+    dataset_dir: dataset/visdrone

+ 21 - 0
configs/datasets/voc.yml

@@ -0,0 +1,21 @@
+metric: VOC
+map_type: 11point
+num_classes: 20
+
+TrainDataset:
+  !VOCDataSet
+    dataset_dir: dataset/voc
+    anno_path: trainval.txt
+    label_list: label_list.txt
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
+
+EvalDataset:
+  !VOCDataSet
+    dataset_dir: dataset/voc
+    anno_path: test.txt
+    label_list: label_list.txt
+    data_fields: ['image', 'gt_bbox', 'gt_class', 'difficult']
+
+TestDataset:
+  !ImageFolder
+    anno_path: dataset/voc/label_list.txt

+ 20 - 0
configs/datasets/wider_face.yml

@@ -0,0 +1,20 @@
+metric: WiderFace
+num_classes: 1
+
+TrainDataset:
+  !WIDERFaceDataSet
+    dataset_dir: dataset/wider_face
+    anno_path: wider_face_split/wider_face_train_bbx_gt.txt
+    image_dir: WIDER_train/images
+    data_fields: ['image', 'gt_bbox', 'gt_class']
+
+EvalDataset:
+  !WIDERFaceDataSet
+    dataset_dir: dataset/wider_face
+    anno_path: wider_face_split/wider_face_val_bbx_gt.txt
+    image_dir: WIDER_val/images
+    data_fields: ['image']
+
+TestDataset:
+  !ImageFolder
+    use_default_label: true

+ 37 - 0
configs/dcn/README.md

@@ -0,0 +1,37 @@
+### Deformable ConvNets v2
+
+| 骨架网络             | 网络类型           | 卷积    | 每张GPU图片个数 | 学习率策略 |推理时间(fps)| Box AP | Mask AP |                           下载                           | 配置文件 |
+| :------------------- | :------------- | :-----: |:--------: | :-----: | :-----------: |:----: | :-----: | :----------------------------------------------------------: | :----: |
+| ResNet50-FPN         | Faster         | c3-c5   |    1      |   1x    |    -     |  42.1  |    -    | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/faster_rcnn_dcn_r50_fpn_1x_coco.yml) |
+| ResNet50-vd-FPN      | Faster         | c3-c5   |    1      |   1x    |    -     |  42.7  |    -    | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r50_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_1x_coco.yml) |
+| ResNet50-vd-FPN      | Faster         | c3-c5   |    1      |   2x    |    -     |  43.7  |    -    | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r50_vd_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/faster_rcnn_dcn_r50_vd_fpn_2x_coco.yml) |
+| ResNet101-vd-FPN     | Faster         | c3-c5   |    1      |   1x    |    -     |  45.1  |    -    | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_r101_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/faster_rcnn_dcn_r101_vd_fpn_1x_coco.yml) |
+| ResNeXt101-vd-FPN    | Faster         | c3-c5   |    1      |   1x    |    -     |  46.5  |    -    | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.pdparams) |[配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml) |
+| ResNet50-FPN         | Mask           | c3-c5   |    1      |   1x    |    -     |  42.7  |   38.4   | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/mask_rcnn_dcn_r50_fpn_1x_coco.yml) |
+| ResNet50-vd-FPN      | Mask           | c3-c5   |    1      |   2x    |    -     |  44.6  |  39.8   | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_r50_vd_fpn_2x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/mask_rcnn_dcn_r50_vd_fpn_2x_coco.yml) |
+| ResNet101-vd-FPN     | Mask           | c3-c5   |    1      |   1x    |    -     |  45.6 |  40.6  | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_r101_vd_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/mask_rcnn_dcn_r101_vd_fpn_1x_coco.yml) |
+| ResNeXt101-vd-FPN    | Mask           | c3-c5   |    1      |   1x    |     -    |  47.3 |  42.0  | [下载链接](https://paddledet.bj.bcebos.com/models/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml) |
+| ResNet50-FPN         | Cascade Faster         | c3-c5   |    1      |   1x    |    -     |  42.1  |    -    | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_dcn_r50_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/cascade_rcnn_dcn_r50_fpn_1x_coco.yml) |
+| ResNeXt101-vd-FPN    | Cascade Faster           | c3-c5   |    1      |   1x    |     -    |  48.8 |  -  | [下载链接](https://paddledet.bj.bcebos.com/models/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml) |
+
+
+**注意事项:**  
+
+- Deformable卷积网络v2(dcn_v2)参考自论文[Deformable ConvNets v2](https://arxiv.org/abs/1811.11168).
+- `c3-c5`意思是在resnet模块的3到5阶段增加`dcn`.
+
+## Citations
+```
+@inproceedings{dai2017deformable,
+  title={Deformable Convolutional Networks},
+  author={Dai, Jifeng and Qi, Haozhi and Xiong, Yuwen and Li, Yi and Zhang, Guodong and Hu, Han and Wei, Yichen},
+  booktitle={Proceedings of the IEEE international conference on computer vision},
+  year={2017}
+}
+@article{zhu2018deformable,
+  title={Deformable ConvNets v2: More Deformable, Better Results},
+  author={Zhu, Xizhou and Hu, Han and Lin, Stephen and Dai, Jifeng},
+  journal={arXiv preprint arXiv:1811.11168},
+  year={2018}
+}
+```

+ 16 - 0
configs/dcn/cascade_rcnn_dcn_r50_fpn_1x_coco.yml

@@ -0,0 +1,16 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '../cascade_rcnn/_base_/optimizer_1x.yml',
+  '../cascade_rcnn/_base_/cascade_rcnn_r50_fpn.yml',
+  '../cascade_rcnn/_base_/cascade_fpn_reader.yml',
+]
+weights: output/cascade_rcnn_dcn_r50_fpn_1x_coco/model_final
+
+ResNet:
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  dcn_v2_stages: [1,2,3]

+ 16 - 0
configs/dcn/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml

@@ -0,0 +1,16 @@
+_BASE_: [
+  'cascade_rcnn_dcn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
+weights: output/cascade_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco/model_final
+
+ResNet:
+  depth: 101
+  groups: 64
+  base_width: 4
+  variant: d
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  dcn_v2_stages: [1,2,3]

+ 15 - 0
configs/dcn/faster_rcnn_dcn_r101_vd_fpn_1x_coco.yml

@@ -0,0 +1,15 @@
+_BASE_: [
+  'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
+weights: output/faster_rcnn_dcn_r101_vd_fpn_1x_coco/model_final
+
+ResNet:
+  # index 0 stands for res2
+  depth: 101
+  variant: d
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  dcn_v2_stages: [1,2,3]

+ 16 - 0
configs/dcn/faster_rcnn_dcn_r50_fpn_1x_coco.yml

@@ -0,0 +1,16 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '../faster_rcnn/_base_/optimizer_1x.yml',
+  '../faster_rcnn/_base_/faster_rcnn_r50_fpn.yml',
+  '../faster_rcnn/_base_/faster_fpn_reader.yml',
+]
+weights: output/faster_rcnn_dcn_r50_fpn_1x_coco/model_final
+
+ResNet:
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  dcn_v2_stages: [1,2,3]

+ 15 - 0
configs/dcn/faster_rcnn_dcn_r50_vd_fpn_1x_coco.yml

@@ -0,0 +1,15 @@
+_BASE_: [
+  'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
+weights: output/faster_rcnn_dcn_r50_vd_fpn_2x_coco/model_final
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  variant: d
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  dcn_v2_stages: [1,2,3]

+ 26 - 0
configs/dcn/faster_rcnn_dcn_r50_vd_fpn_2x_coco.yml

@@ -0,0 +1,26 @@
+_BASE_: [
+  'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
+weights: output/faster_rcnn_dcn_r50_vd_fpn_2x_coco/model_final
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  variant: d
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  dcn_v2_stages: [1,2,3]
+
+epoch: 24
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [16, 22]
+  - !LinearWarmup
+    start_factor: 0.1
+    steps: 1000

+ 17 - 0
configs/dcn/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml

@@ -0,0 +1,17 @@
+_BASE_: [
+  'faster_rcnn_dcn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
+weights: output/faster_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco/model_final
+
+ResNet:
+  # for ResNeXt: groups, base_width, base_channels
+  depth: 101
+  groups: 64
+  base_width: 4
+  variant: d
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  dcn_v2_stages: [1,2,3]

+ 15 - 0
configs/dcn/mask_rcnn_dcn_r101_vd_fpn_1x_coco.yml

@@ -0,0 +1,15 @@
+_BASE_: [
+  'mask_rcnn_dcn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet101_vd_pretrained.pdparams
+weights: output/mask_rcnn_dcn_r101_vd_fpn_1x_coco/model_final
+
+ResNet:
+  # index 0 stands for res2
+  depth: 101
+  variant: d
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  dcn_v2_stages: [1,2,3]

+ 16 - 0
configs/dcn/mask_rcnn_dcn_r50_fpn_1x_coco.yml

@@ -0,0 +1,16 @@
+_BASE_: [
+  '../datasets/coco_instance.yml',
+  '../runtime.yml',
+  '../mask_rcnn/_base_/optimizer_1x.yml',
+  '../mask_rcnn/_base_/mask_rcnn_r50_fpn.yml',
+  '../mask_rcnn/_base_/mask_fpn_reader.yml',
+]
+weights: output/mask_rcnn_dcn_r50_fpn_1x_coco/model_final
+
+ResNet:
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  dcn_v2_stages: [1,2,3]

+ 26 - 0
configs/dcn/mask_rcnn_dcn_r50_vd_fpn_2x_coco.yml

@@ -0,0 +1,26 @@
+_BASE_: [
+  'mask_rcnn_dcn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vd_pretrained.pdparams
+weights: output/mask_rcnn_dcn_r50_vd_fpn_2x_coco/model_final
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  variant: d
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  dcn_v2_stages: [1,2,3]
+
+epoch: 24
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [16, 22]
+  - !LinearWarmup
+    start_factor: 0.1
+    steps: 1000

+ 17 - 0
configs/dcn/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco.yml

@@ -0,0 +1,17 @@
+_BASE_: [
+  'mask_rcnn_dcn_r50_fpn_1x_coco.yml',
+]
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNeXt101_vd_64x4d_pretrained.pdparams
+weights: output/mask_rcnn_dcn_x101_vd_64x4d_fpn_1x_coco/model_final
+
+ResNet:
+  # for ResNeXt: groups, base_width, base_channels
+  depth: 101
+  variant: d
+  groups: 64
+  base_width: 4
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+  dcn_v2_stages: [1,2,3]

+ 36 - 0
configs/deformable_detr/README.md

@@ -0,0 +1,36 @@
+# Deformable DETR
+
+## Introduction
+
+
+Deformable DETR is an object detection model based on DETR. We reproduced the model of the paper.
+
+
+## Model Zoo
+
+| Backbone | Model | Images/GPU  | Inf time (fps) | Box AP | Config | Download |
+|:------:|:--------:|:--------:|:--------------:|:------:|:------:|:--------:|
+| R-50 | Deformable DETR  | 2 | --- |  44.5  | [config](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/configs/deformable_detr/deformable_detr_r50_1x_coco.yml) | [model](https://paddledet.bj.bcebos.com/models/deformable_detr_r50_1x_coco.pdparams) |
+
+**Notes:**
+
+- Deformable DETR is trained on COCO train2017 dataset and evaluated on val2017 results of `mAP(IoU=0.5:0.95)`.
+- Deformable DETR uses 8GPU to train 50 epochs.
+
+GPU multi-card training
+```bash
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+python -m paddle.distributed.launch --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/deformable_detr/deformable_detr_r50_1x_coco.yml --fleet
+```
+
+## Citations
+```
+@inproceedings{
+zhu2021deformable,
+title={Deformable DETR: Deformable Transformers for End-to-End Object Detection},
+author={Xizhou Zhu and Weijie Su and Lewei Lu and Bin Li and Xiaogang Wang and Jifeng Dai},
+booktitle={International Conference on Learning Representations},
+year={2021},
+url={https://openreview.net/forum?id=gZ9hCDWe6ke}
+}
+```

+ 48 - 0
configs/deformable_detr/_base_/deformable_detr_r50.yml

@@ -0,0 +1,48 @@
+architecture: DETR
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vb_normal_pretrained.pdparams
+hidden_dim: 256
+use_focal_loss: True
+
+
+DETR:
+  backbone: ResNet
+  transformer: DeformableTransformer
+  detr_head: DeformableDETRHead
+  post_process: DETRBBoxPostProcess
+
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [1, 2, 3]
+  lr_mult_list: [0.0, 0.1, 0.1, 0.1]
+  num_stages: 4
+
+
+DeformableTransformer:
+  num_queries: 300
+  position_embed_type: sine
+  nhead: 8
+  num_encoder_layers: 6
+  num_decoder_layers: 6
+  dim_feedforward: 1024
+  dropout: 0.1
+  activation: relu
+  num_feature_levels: 4
+  num_encoder_points: 4
+  num_decoder_points: 4
+
+
+DeformableDETRHead:
+  num_mlp_layers: 3
+
+
+DETRLoss:
+  loss_coeff: {class: 2, bbox: 5, giou: 2, mask: 1, dice: 1}
+  aux_loss: True
+
+
+HungarianMatcher:
+  matcher_coeff: {class: 2, bbox: 5, giou: 2}

+ 48 - 0
configs/deformable_detr/_base_/deformable_detr_reader.yml

@@ -0,0 +1,48 @@
+worker_num: 2
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomFlip: {prob: 0.5}
+  - RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ],
+                    transforms2: [
+                        RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] },
+                        RandomSizeCrop: { min_size: 384, max_size: 600 },
+                        RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ]
+  }
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - NormalizeBox: {}
+  - BboxXYXY2XYWH: {}
+  - Permute: {}
+  batch_transforms:
+  - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
+  batch_size: 2
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+  use_shared_memory: false
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
+  batch_size: 1
+  shuffle: false
+  drop_last: false

+ 16 - 0
configs/deformable_detr/_base_/deformable_optimizer_1x.yml

@@ -0,0 +1,16 @@
+epoch: 50
+
+LearningRate:
+  base_lr: 0.0002
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [40]
+    use_warmup: false
+
+OptimizerBuilder:
+  clip_grad_by_norm: 0.1
+  regularizer: false
+  optimizer:
+    type: AdamW
+    weight_decay: 0.0001

+ 9 - 0
configs/deformable_detr/deformable_detr_r50_1x_coco.yml

@@ -0,0 +1,9 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '_base_/deformable_optimizer_1x.yml',
+  '_base_/deformable_detr_r50.yml',
+  '_base_/deformable_detr_reader.yml',
+]
+weights: output/deformable_detr_r50_1x_coco/model_final
+find_unused_parameters: True

+ 39 - 0
configs/detr/README.md

@@ -0,0 +1,39 @@
+# DETR
+
+## Introduction
+
+
+DETR is an object detection model based on transformer. We reproduced the model of the paper.
+
+
+## Model Zoo
+
+| Backbone | Model | Images/GPU  | Inf time (fps) | Box AP | Config | Download |
+|:------:|:--------:|:--------:|:--------------:|:------:|:------:|:--------:|
+| R-50 | DETR  | 4 | --- | 42.3 | [config](https://github.com/PaddlePaddle/PaddleDetection/blob/develop/configs/detr/detr_r50_1x_coco.yml) | [model](https://paddledet.bj.bcebos.com/models/detr_r50_1x_coco.pdparams) |
+
+**Notes:**
+
+- DETR is trained on COCO train2017 dataset and evaluated on val2017 results of `mAP(IoU=0.5:0.95)`.
+- DETR uses 8GPU to train 500 epochs.
+
+GPU multi-card training
+```bash
+export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
+python -m paddle.distributed.launch --gpus 0,1,2,3,4,5,6,7 tools/train.py -c configs/detr/detr_r50_1x_coco.yml --fleet
+```
+
+## Citations
+```
+@inproceedings{detr,
+  author    = {Nicolas Carion and
+               Francisco Massa and
+               Gabriel Synnaeve and
+               Nicolas Usunier and
+               Alexander Kirillov and
+               Sergey Zagoruyko},
+  title     = {End-to-End Object Detection with Transformers},
+  booktitle = {ECCV},
+  year      = {2020}
+}
+```

+ 44 - 0
configs/detr/_base_/detr_r50.yml

@@ -0,0 +1,44 @@
+architecture: DETR
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_vb_normal_pretrained.pdparams
+hidden_dim: 256
+
+
+DETR:
+  backbone: ResNet
+  transformer: DETRTransformer
+  detr_head: DETRHead
+  post_process: DETRBBoxPostProcess
+
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [3]
+  lr_mult_list: [0.0, 0.1, 0.1, 0.1]
+  num_stages: 4
+
+
+DETRTransformer:
+  num_queries: 100
+  position_embed_type: sine
+  nhead: 8
+  num_encoder_layers: 6
+  num_decoder_layers: 6
+  dim_feedforward: 2048
+  dropout: 0.1
+  activation: relu
+
+
+DETRHead:
+  num_mlp_layers: 3
+
+
+DETRLoss:
+  loss_coeff: {class: 1, bbox: 5, giou: 2, no_object: 0.1, mask: 1, dice: 1}
+  aux_loss: True
+
+
+HungarianMatcher:
+  matcher_coeff: {class: 1, bbox: 5, giou: 2}

+ 48 - 0
configs/detr/_base_/detr_reader.yml

@@ -0,0 +1,48 @@
+worker_num: 0
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomFlip: {prob: 0.5}
+  - RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ],
+                    transforms2: [
+                        RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] },
+                        RandomSizeCrop: { min_size: 384, max_size: 600 },
+                        RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ]
+  }
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - NormalizeBox: {}
+  - BboxXYXY2XYWH: {}
+  - Permute: {}
+  batch_transforms:
+  - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
+  batch_size: 2
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+  use_shared_memory: false
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
+  batch_size: 1
+  shuffle: false
+  drop_last: false

+ 16 - 0
configs/detr/_base_/optimizer_1x.yml

@@ -0,0 +1,16 @@
+epoch: 500
+
+LearningRate:
+  base_lr: 0.0001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [400]
+    use_warmup: false
+
+OptimizerBuilder:
+  clip_grad_by_norm: 0.1
+  regularizer: false
+  optimizer:
+    type: AdamW
+    weight_decay: 0.0001

+ 9 - 0
configs/detr/detr_r50_1x_coco.yml

@@ -0,0 +1,9 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '_base_/optimizer_1x.yml',
+  '_base_/detr_r50.yml',
+  '_base_/detr_reader.yml',
+]
+weights: output/detr_r50_1x_coco/model_final
+find_unused_parameters: True

+ 39 - 0
configs/dino/README.md

@@ -0,0 +1,39 @@
+# DINO: DETR with Improved DeNoising Anchor Boxes for End-to-End Object Detection
+
+## Introduction
+
+
+[DINO](https://arxiv.org/abs/2203.03605) is an object detection model based on DETR. We reproduced the model of the paper.
+
+
+## Model Zoo
+
+| Backbone |      Model      | Epochs | Box AP |                 Config                  |                                     Download                                     |
+|:------:|:---------------:|:------:|:------:|:---------------------------------------:|:--------------------------------------------------------------------------------:|
+| R-50 | dino_r50_4scale |   12   |  49.1  | [config](./dino_r50_4scale_1x_coco.yml) | [model](https://paddledet.bj.bcebos.com/models/dino_r50_4scale_1x_coco.pdparams) |
+| R-50 | dino_r50_4scale |   24   |  50.5  | [config](./dino_r50_4scale_2x_coco.yml) | [model](https://paddledet.bj.bcebos.com/models/dino_r50_4scale_2x_coco.pdparams) |
+
+**Notes:**
+
+- DINO is trained on COCO train2017 dataset and evaluated on val2017 results of `mAP(IoU=0.5:0.95)`.
+- DINO uses 4GPU to train.
+
+GPU multi-card training
+```bash
+python -m paddle.distributed.launch --gpus 0,1,2,3 tools/train.py -c configs/dino/dino_r50_4scale_1x_coco.yml --fleet --eval
+```
+
+## Custom Operator
+- Multi-scale deformable attention custom operator see [here](../../ppdet/modeling/transformers/ext_op).
+
+## Citations
+```
+@misc{zhang2022dino,
+      title={DINO: DETR with Improved DeNoising Anchor Boxes for End-to-End Object Detection},
+      author={Hao Zhang and Feng Li and Shilong Liu and Lei Zhang and Hang Su and Jun Zhu and Lionel M. Ni and Heung-Yeung Shum},
+      year={2022},
+      eprint={2203.03605},
+      archivePrefix={arXiv},
+      primaryClass={cs.CV}
+}
+```

+ 49 - 0
configs/dino/_base_/dino_r50.yml

@@ -0,0 +1,49 @@
+architecture: DETR
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+hidden_dim: 256
+use_focal_loss: True
+
+
+DETR:
+  backbone: ResNet
+  transformer: DINOTransformer
+  detr_head: DINOHead
+  post_process: DETRBBoxPostProcess
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [1, 2, 3]
+  lr_mult_list: [0.0, 0.1, 0.1, 0.1]
+  num_stages: 4
+
+DINOTransformer:
+  num_queries: 900
+  position_embed_type: sine
+  num_levels: 4
+  nhead: 8
+  num_encoder_layers: 6
+  num_decoder_layers: 6
+  dim_feedforward: 2048
+  dropout: 0.0
+  activation: relu
+  pe_temperature: 20
+  pe_offset: 0.0
+  num_denoising: 100
+  label_noise_ratio: 0.5
+  box_noise_scale: 1.0
+  learnt_init_query: True
+
+DINOHead:
+  loss:
+    name: DINOLoss
+    loss_coeff: {class: 1, bbox: 5, giou: 2}
+    aux_loss: True
+    matcher:
+      name: HungarianMatcher
+      matcher_coeff: {class: 2, bbox: 5, giou: 2}
+
+DETRBBoxPostProcess:
+  num_top_queries: 300

+ 48 - 0
configs/dino/_base_/dino_reader.yml

@@ -0,0 +1,48 @@
+worker_num: 4
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomFlip: {prob: 0.5}
+  - RandomSelect: { transforms1: [ RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ],
+                    transforms2: [
+                        RandomShortSideResize: { short_side_sizes: [ 400, 500, 600 ] },
+                        RandomSizeCrop: { min_size: 384, max_size: 600 },
+                        RandomShortSideResize: { short_side_sizes: [ 480, 512, 544, 576, 608, 640, 672, 704, 736, 768, 800 ], max_size: 1333 } ]
+  }
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - NormalizeBox: {}
+  - BboxXYXY2XYWH: {}
+  - Permute: {}
+  batch_transforms:
+  - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
+  batch_size: 4
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+  use_shared_memory: false
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadMaskBatch: {pad_to_stride: -1, return_pad_mask: true}
+  batch_size: 1
+  shuffle: false
+  drop_last: false

+ 16 - 0
configs/dino/_base_/optimizer_1x.yml

@@ -0,0 +1,16 @@
+epoch: 12
+
+LearningRate:
+  base_lr: 0.0001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [11]
+    use_warmup: false
+
+OptimizerBuilder:
+  clip_grad_by_norm: 0.1
+  regularizer: false
+  optimizer:
+    type: AdamW
+    weight_decay: 0.0001

+ 16 - 0
configs/dino/_base_/optimizer_2x.yml

@@ -0,0 +1,16 @@
+epoch: 24
+
+LearningRate:
+  base_lr: 0.0001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [20]
+    use_warmup: false
+
+OptimizerBuilder:
+  clip_grad_by_norm: 0.1
+  regularizer: false
+  optimizer:
+    type: AdamW
+    weight_decay: 0.0001

+ 11 - 0
configs/dino/dino_r50_4scale_1x_coco.yml

@@ -0,0 +1,11 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '_base_/optimizer_1x.yml',
+  '_base_/dino_r50.yml',
+  '_base_/dino_reader.yml',
+]
+
+weights: output/dino_r50_4scale_1x_coco/model_final
+find_unused_parameters: True
+log_iter: 100

+ 11 - 0
configs/dino/dino_r50_4scale_2x_coco.yml

@@ -0,0 +1,11 @@
+_BASE_: [
+  '../datasets/coco_detection.yml',
+  '../runtime.yml',
+  '_base_/optimizer_2x.yml',
+  '_base_/dino_r50.yml',
+  '_base_/dino_reader.yml',
+]
+
+weights: output/dino_r50_4scale_2x_coco/model_final
+find_unused_parameters: True
+log_iter: 100

+ 176 - 0
configs/face_detection/README.md

@@ -0,0 +1,176 @@
+# 人脸检测模型
+
+## 简介
+`face_detection`中提供高效、高速的人脸检测解决方案,包括最先进的模型和经典模型。
+
+![](../../docs/images/12_Group_Group_12_Group_Group_12_935.jpg)
+
+## 模型库
+
+#### WIDER-FACE数据集上的mAP
+
+| 网络结构 | 输入尺寸 | 图片个数/GPU | 学习率策略 | Easy/Medium/Hard Set  | 预测时延(SD855)| 模型大小(MB) | 下载 | 配置文件 |
+|:------------:|:--------:|:----:|:-------:|:-------:|:---------:|:----------:|:---------:|:--------:|
+| BlazeFace  | 640  |    8    | 1000e     | 0.885 / 0.855 / 0.731 | - | 0.472 |[下载链接](https://paddledet.bj.bcebos.com/models/blazeface_1000e.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/face_detection/blazeface_1000e.yml) |
+| BlazeFace-FPN-SSH  | 640  |    8    | 1000e     | 0.907 / 0.883 / 0.793 | - | 0.479 |[下载链接](https://paddledet.bj.bcebos.com/models/blazeface_fpn_ssh_1000e.pdparams) | [配置文件](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/face_detection/blazeface_fpn_ssh_1000e.yml) |
+
+**注意:**  
+- 我们使用多尺度评估策略得到`Easy/Medium/Hard Set`里的mAP。具体细节请参考[在WIDER-FACE数据集上评估](#在WIDER-FACE数据集上评估)。
+
+## 快速开始
+
+### 数据准备
+我们使用[WIDER-FACE数据集](http://shuoyang1213.me/WIDERFACE/)进行训练和模型测试,官方网站提供了详细的数据介绍。
+- WIDER-Face数据源:  
+使用如下目录结构加载`wider_face`类型的数据集:
+
+  ```
+  dataset/wider_face/
+  ├── wider_face_split
+  │   ├── wider_face_train_bbx_gt.txt
+  │   ├── wider_face_val_bbx_gt.txt
+  ├── WIDER_train
+  │   ├── images
+  │   │   ├── 0--Parade
+  │   │   │   ├── 0_Parade_marchingband_1_100.jpg
+  │   │   │   ├── 0_Parade_marchingband_1_381.jpg
+  │   │   │   │   ...
+  │   │   ├── 10--People_Marching
+  │   │   │   ...
+  ├── WIDER_val
+  │   ├── images
+  │   │   ├── 0--Parade
+  │   │   │   ├── 0_Parade_marchingband_1_1004.jpg
+  │   │   │   ├── 0_Parade_marchingband_1_1045.jpg
+  │   │   │   │   ...
+  │   │   ├── 10--People_Marching
+  │   │   │   ...
+  ```
+
+- 手动下载数据集:
+要下载WIDER-FACE数据集,请运行以下命令:
+```
+cd dataset/wider_face && ./download_wider_face.sh
+```
+
+### 参数配置
+基础模型的配置可以参考`configs/face_detection/_base_/blazeface.yml`;
+改进模型增加FPN和SSH的neck结构,配置文件可以参考`configs/face_detection/_base_/blazeface_fpn.yml`,可以根据需求配置FPN和SSH,具体如下:
+```yaml
+BlazeNet:
+   blaze_filters: [[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]]
+   double_blaze_filters: [[48, 24, 96, 2], [96, 24, 96], [96, 24, 96],
+                           [96, 24, 96, 2], [96, 24, 96], [96, 24, 96]]
+   act: hard_swish #配置backbone中BlazeBlock的激活函数,基础模型为relu,增加FPN和SSH时需使用hard_swish
+
+BlazeNeck:
+   neck_type : fpn_ssh #可选only_fpn、only_ssh和fpn_ssh
+   in_channel: [96,96]
+```
+
+
+
+### 训练与评估
+训练流程与评估流程方法与其他算法一致,请参考[GETTING_STARTED_cn.md](../../docs/tutorials/GETTING_STARTED_cn.md)。  
+**注意:** 人脸检测模型目前不支持边训练边评估。
+
+#### 在WIDER-FACE数据集上评估
+- 步骤一:评估并生成结果文件:
+```shell
+python -u tools/eval.py -c configs/face_detection/blazeface_1000e.yml \
+       -o weights=output/blazeface_1000e/model_final \
+       multi_scale=True
+```
+设置`multi_scale=True`进行多尺度评估,评估完成后,将在`output/pred`中生成txt格式的测试结果。
+
+- 步骤二:下载官方评估脚本和Ground Truth文件:
+```
+wget http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/support/eval_script/eval_tools.zip
+unzip eval_tools.zip && rm -f eval_tools.zip
+```
+
+- 步骤三:开始评估
+
+方法一:python评估:
+```
+git clone https://github.com/wondervictor/WiderFace-Evaluation.git
+cd WiderFace-Evaluation
+# 编译
+python3 setup.py build_ext --inplace
+# 开始评估
+python3 evaluation.py -p /path/to/PaddleDetection/output/pred -g /path/to/eval_tools/ground_truth
+```
+
+方法二:MatLab评估:
+```
+# 在`eval_tools/wider_eval.m`中修改保存结果路径和绘制曲线的名称:
+pred_dir = './pred';  
+legend_name = 'Paddle-BlazeFace';
+
+`wider_eval.m` 是评估模块的主要执行程序。运行命令如下:
+matlab -nodesktop -nosplash -nojvm -r "run wider_eval.m;quit;"
+```
+
+### Python脚本预测
+为了支持二次开发,这里提供通过Python脚本使用Paddle Detection whl包来进行预测的示例。
+```python
+import cv2
+import paddle
+import numpy as np
+from ppdet.core.workspace import load_config
+from ppdet.engine import Trainer
+from ppdet.metrics import get_infer_results
+from ppdet.data.transform.operators import NormalizeImage, Permute
+
+
+if __name__ == '__main__':
+    # 准备基础的参数
+    config_path = 'PaddleDetection/configs/face_detection/blazeface_1000e.yml'
+    cfg = load_config(config_path)
+    weight_path = 'PaddleDetection/output/blazeface_1000e.pdparams'
+    infer_img_path = 'PaddleDetection/demo/hrnet_demo.jpg'
+    cfg.weights = weight_path
+    bbox_thre = 0.8
+    paddle.set_device('gpu')
+    # 创建所需的类
+    trainer = Trainer(cfg, mode='test')
+    trainer.load_weights(cfg.weights)
+    trainer.model.eval()
+    normaler = NormalizeImage(mean=[123, 117, 104], std=[127.502231, 127.502231, 127.502231], is_scale=False)
+    permuter = Permute()
+    # 进行图片读取
+    im = cv2.imread(infer_img_path)
+    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+    # 准备数据字典
+    data_dict = {'image': im}
+    data_dict = normaler(data_dict)
+    data_dict = permuter(data_dict)
+    h, w, c = im.shape
+    data_dict['im_id'] = paddle.Tensor(np.array([[0]]))
+    data_dict['im_shape'] = paddle.Tensor(np.array([[h, w]], dtype=np.float32))
+    data_dict['scale_factor'] = paddle.Tensor(np.array([[1., 1.]], dtype=np.float32))
+    data_dict['image'] = paddle.Tensor(data_dict['image'].reshape((1, c, h, w)))
+    data_dict['curr_iter'] = paddle.Tensor(np.array([0]))
+    # 进行预测
+    outs = trainer.model(data_dict)
+    # 对预测的数据进行后处理得到最终的bbox信息
+    for key in ['im_shape', 'scale_factor', 'im_id']:
+        outs[key] = data_dict[key]
+    for key, value in outs.items():
+        outs[key] = value.numpy()
+    clsid2catid, catid2name = {0: 'face'}, {0: 0}
+    batch_res = get_infer_results(outs, clsid2catid)
+    bbox = [sub_dict for sub_dict in batch_res['bbox'] if sub_dict['score'] > bbox_thre]
+    print(bbox)
+```
+
+## Citations
+
+```
+@article{bazarevsky2019blazeface,
+      title={BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs},
+      author={Valentin Bazarevsky and Yury Kartynnik and Andrey Vakunov and Karthik Raveendran and Matthias Grundmann},
+      year={2019},
+      eprint={1907.05047},
+      archivePrefix={arXiv},
+```

+ 176 - 0
configs/face_detection/README_en.md

@@ -0,0 +1,176 @@
+# Face Detection Model
+
+## Introduction
+`face_detection` High efficiency, high speed face detection solutions, including the most advanced models and classic models.
+
+![](../../docs/images/12_Group_Group_12_Group_Group_12_935.jpg)
+
+## Model Library
+
+#### A mAP on the WIDERFACE dataset
+
+| Network structure | size | images/GPUs | Learning rate strategy | Easy/Medium/Hard Set  | Prediction delay(SD855)| Model size(MB) | Download | Configuration File |
+|:------------:|:--------:|:----:|:-------:|:-------:|:---------:|:----------:|:---------:|:--------:|
+| BlazeFace  | 640  |    8    | 1000e     | 0.885 / 0.855 / 0.731 | - | 0.472 |[link](https://paddledet.bj.bcebos.com/models/blazeface_1000e.pdparams) | [Configuration File](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/face_detection/blazeface_1000e.yml) |
+| BlazeFace-FPN-SSH  | 640  |    8    | 1000e     | 0.907 / 0.883 / 0.793 | - | 0.479 |[link](https://paddledet.bj.bcebos.com/models/blazeface_fpn_ssh_1000e.pdparams) | [Configuration File](https://github.com/PaddlePaddle/PaddleDetection/tree/develop/configs/face_detection/blazeface_fpn_ssh_1000e.yml) |
+
+**Attention:**  
+- We use a multi-scale evaluation strategy to get the mAP in `Easy/Medium/Hard Set`. Please refer to the [evaluation on the WIDER FACE dataset](#Evaluated-on-the-WIDER-FACE-Dataset) for details.
+
+## Quick Start
+
+### Data preparation
+We use [WIDER-FACE dataset](http://shuoyang1213.me/WIDERFACE/) for training and model tests, the official web site provides detailed data is introduced.
+- WIDER-Face data source:  
+- Load a dataset of type `wider_face` using the following directory structure:
+  ```
+  dataset/wider_face/
+  ├── wider_face_split
+  │   ├── wider_face_train_bbx_gt.txt
+  │   ├── wider_face_val_bbx_gt.txt
+  ├── WIDER_train
+  │   ├── images
+  │   │   ├── 0--Parade
+  │   │   │   ├── 0_Parade_marchingband_1_100.jpg
+  │   │   │   ├── 0_Parade_marchingband_1_381.jpg
+  │   │   │   │   ...
+  │   │   ├── 10--People_Marching
+  │   │   │   ...
+  ├── WIDER_val
+  │   ├── images
+  │   │   ├── 0--Parade
+  │   │   │   ├── 0_Parade_marchingband_1_1004.jpg
+  │   │   │   ├── 0_Parade_marchingband_1_1045.jpg
+  │   │   │   │   ...
+  │   │   ├── 10--People_Marching
+  │   │   │   ...
+  ```
+
+- Manually download the dataset:
+To download the WIDER-FACE dataset, run the following command:
+```
+cd dataset/wider_face && ./download_wider_face.sh
+```
+
+### Parameter configuration
+The configuration of the base model can be referenced to `configs/face_detection/_base_/blazeface.yml`;
+Improved model to add FPN and SSH neck structure, configuration files can be referenced to `configs/face_detection/_base_/blazeface_fpn.yml`, You can configure FPN and SSH as required
+```yaml
+BlazeNet:
+   blaze_filters: [[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]]
+   double_blaze_filters: [[48, 24, 96, 2], [96, 24, 96], [96, 24, 96],
+                           [96, 24, 96, 2], [96, 24, 96], [96, 24, 96]]
+   act: hard_swish #Configure Blaze Block activation function in Backbone. The basic model is Relu. hard_swish is needed to add FPN and SSH
+
+BlazeNeck:
+   neck_type : fpn_ssh #only_fpn, only_ssh and fpn_ssh
+   in_channel: [96,96]
+```
+
+
+
+### Training and Evaluation
+The training process and evaluation process methods are consistent with other algorithms, please refer to [GETTING_STARTED_cn.md](../../docs/tutorials/GETTING_STARTED_cn.md)。  
+**Attention:** Face detection models currently do not support training and evaluation.
+
+#### Evaluated on the WIDER-FACE Dataset
+- Step 1: Evaluate and generate a result file:
+```shell
+python -u tools/eval.py -c configs/face_detection/blazeface_1000e.yml \
+       -o weights=output/blazeface_1000e/model_final \
+       multi_scale=True
+```
+Set `multi_scale=True` for multi-scale evaluation. After evaluation, test results in TXT format will be generated in `output/pred`.
+
+- Step 2: Download the official evaluation script and Ground Truth file:
+```
+wget http://mmlab.ie.cuhk.edu.hk/projects/WIDERFace/support/eval_script/eval_tools.zip
+unzip eval_tools.zip && rm -f eval_tools.zip
+```
+
+- Step 3: Start the evaluation
+
+Method 1: Python evaluation:
+```
+git clone https://github.com/wondervictor/WiderFace-Evaluation.git
+cd WiderFace-Evaluation
+# compile
+python3 setup.py build_ext --inplace
+# Begin to assess
+python3 evaluation.py -p /path/to/PaddleDetection/output/pred -g /path/to/eval_tools/ground_truth
+```
+
+Method 2: MatLab evaluation:
+```
+# Change the name of save result path and draw curve in `eval_tools/wider_eval.m`:
+pred_dir = './pred';  
+legend_name = 'Paddle-BlazeFace';
+
+`wider_eval.m` is the main implementation of the evaluation module. Run the following command:
+matlab -nodesktop -nosplash -nojvm -r "run wider_eval.m;quit;"
+```
+
+### Use by Python Code
+In order to support development, here is an example of using the Paddle Detection whl package to make predictions through Python code.
+```python
+import cv2
+import paddle
+import numpy as np
+from ppdet.core.workspace import load_config
+from ppdet.engine import Trainer
+from ppdet.metrics import get_infer_results
+from ppdet.data.transform.operators import NormalizeImage, Permute
+
+
+if __name__ == '__main__':
+    # prepare for the parameters
+    config_path = 'PaddleDetection/configs/face_detection/blazeface_1000e.yml'
+    cfg = load_config(config_path)
+    weight_path = 'PaddleDetection/output/blazeface_1000e.pdparams'
+    infer_img_path = 'PaddleDetection/demo/hrnet_demo.jpg'
+    cfg.weights = weight_path
+    bbox_thre = 0.8
+    paddle.set_device('gpu')
+    # create the class object
+    trainer = Trainer(cfg, mode='test')
+    trainer.load_weights(cfg.weights)
+    trainer.model.eval()
+    normaler = NormalizeImage(mean=[123, 117, 104], std=[127.502231, 127.502231, 127.502231], is_scale=False)
+    permuter = Permute()
+    # read the image file
+    im = cv2.imread(infer_img_path)
+    im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
+    # prepare for the data dict
+    data_dict = {'image': im}
+    data_dict = normaler(data_dict)
+    data_dict = permuter(data_dict)
+    h, w, c = im.shape
+    data_dict['im_id'] = paddle.Tensor(np.array([[0]]))
+    data_dict['im_shape'] = paddle.Tensor(np.array([[h, w]], dtype=np.float32))
+    data_dict['scale_factor'] = paddle.Tensor(np.array([[1., 1.]], dtype=np.float32))
+    data_dict['image'] = paddle.Tensor(data_dict['image'].reshape((1, c, h, w)))
+    data_dict['curr_iter'] = paddle.Tensor(np.array([0]))
+    # do the prediction
+    outs = trainer.model(data_dict)
+    # to do the postprocess to get the final bbox info
+    for key in ['im_shape', 'scale_factor', 'im_id']:
+        outs[key] = data_dict[key]
+    for key, value in outs.items():
+        outs[key] = value.numpy()
+    clsid2catid, catid2name = {0: 'face'}, {0: 0}
+    batch_res = get_infer_results(outs, clsid2catid)
+    bbox = [sub_dict for sub_dict in batch_res['bbox'] if sub_dict['score'] > bbox_thre]
+    print(bbox)
+```
+
+
+## Citations
+
+```
+@article{bazarevsky2019blazeface,
+      title={BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs},
+      author={Valentin Bazarevsky and Yury Kartynnik and Andrey Vakunov and Karthik Raveendran and Matthias Grundmann},
+      year={2019},
+      eprint={1907.05047},
+      archivePrefix={arXiv},
+```

+ 45 - 0
configs/face_detection/_base_/blazeface.yml

@@ -0,0 +1,45 @@
+architecture: BlazeFace
+
+BlazeFace:
+  backbone: BlazeNet
+  neck: BlazeNeck
+  blaze_head: FaceHead
+  post_process: BBoxPostProcess
+
+BlazeNet:
+  blaze_filters: [[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]]
+  double_blaze_filters: [[48, 24, 96, 2], [96, 24, 96], [96, 24, 96],
+                          [96, 24, 96, 2], [96, 24, 96], [96, 24, 96]]
+  act: relu
+
+BlazeNeck:
+  neck_type : None
+  in_channel: [96,96]
+
+FaceHead:
+  in_channels: [96,96]
+  anchor_generator: AnchorGeneratorSSD
+  loss: SSDLoss
+
+SSDLoss:
+  overlap_threshold: 0.35
+
+AnchorGeneratorSSD:
+  steps: [8., 16.]
+  aspect_ratios: [[1.], [1.]]
+  min_sizes: [[16.,24.], [32., 48., 64., 80., 96., 128.]]
+  max_sizes: [[], []]
+  offset: 0.5
+  flip: False
+  min_max_aspect_ratios_order: false
+
+BBoxPostProcess:
+  decode:
+    name: SSDBox
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 750
+    score_threshold: 0.01
+    nms_threshold: 0.3
+    nms_top_k: 5000
+    nms_eta: 1.0

+ 45 - 0
configs/face_detection/_base_/blazeface_fpn.yml

@@ -0,0 +1,45 @@
+architecture: BlazeFace
+
+BlazeFace:
+  backbone: BlazeNet
+  neck: BlazeNeck
+  blaze_head: FaceHead
+  post_process: BBoxPostProcess
+
+BlazeNet:
+  blaze_filters: [[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]]
+  double_blaze_filters: [[48, 24, 96, 2], [96, 24, 96], [96, 24, 96],
+                          [96, 24, 96, 2], [96, 24, 96], [96, 24, 96]]
+  act: hard_swish
+
+BlazeNeck:
+  neck_type : fpn_ssh
+  in_channel: [96,96]
+
+FaceHead:
+  in_channels: [48, 48]
+  anchor_generator: AnchorGeneratorSSD
+  loss: SSDLoss
+
+SSDLoss:
+  overlap_threshold: 0.35
+
+AnchorGeneratorSSD:
+  steps: [8., 16.]
+  aspect_ratios: [[1.], [1.]]
+  min_sizes: [[16.,24.], [32., 48., 64., 80., 96., 128.]]
+  max_sizes: [[], []]
+  offset: 0.5
+  flip: False
+  min_max_aspect_ratios_order: false
+
+BBoxPostProcess:
+  decode:
+    name: SSDBox
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 750
+    score_threshold: 0.01
+    nms_threshold: 0.3
+    nms_top_k: 5000
+    nms_eta: 1.0

+ 44 - 0
configs/face_detection/_base_/face_reader.yml

@@ -0,0 +1,44 @@
+worker_num: 2
+TrainReader:
+  inputs_def:
+    num_max_boxes: 90
+  sample_transforms:
+    - Decode: {}
+    - RandomDistort: {brightness: [0.5, 1.125, 0.875], random_apply: False}
+    - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
+    - RandomFlip: {}
+    - CropWithDataAchorSampling: {
+      anchor_sampler: [[1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0]],
+      batch_sampler: [
+        [1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
+        [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
+        [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
+        [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
+        [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
+      ],
+      target_size: 640}
+    - Resize: {target_size: [640, 640], keep_ratio: False, interp: 1}
+    - NormalizeBox: {}
+    - PadBox: {num_max_boxes: 90}
+  batch_transforms:
+    - NormalizeImage: {mean:  [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false}
+    - Permute: {}
+  batch_size: 8
+  shuffle: true
+  drop_last: true
+
+
+EvalReader:
+  sample_transforms:
+    - Decode: {}
+    - NormalizeImage: {mean:  [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false}
+    - Permute: {}
+  batch_size: 1
+
+
+TestReader:
+  sample_transforms:
+    - Decode: {}
+    - NormalizeImage: {mean:  [123, 117, 104], std: [127.502231, 127.502231, 127.502231], is_scale: false}
+    - Permute: {}
+  batch_size: 1

+ 21 - 0
configs/face_detection/_base_/optimizer_1000e.yml

@@ -0,0 +1,21 @@
+epoch: 1000
+
+LearningRate:
+  base_lr: 0.001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones:
+    - 333
+    - 800
+  - !LinearWarmup
+    start_factor: 0.3333333333333333
+    steps: 500
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.0
+    type: RMSProp
+  regularizer:
+    factor: 0.0005
+    type: L2

+ 9 - 0
configs/face_detection/blazeface_1000e.yml

@@ -0,0 +1,9 @@
+_BASE_: [
+  '../datasets/wider_face.yml',
+  '../runtime.yml',
+  '_base_/optimizer_1000e.yml',
+  '_base_/blazeface.yml',
+  '_base_/face_reader.yml',
+]
+weights: output/blazeface_1000e/model_final
+multi_scale_eval: True

+ 9 - 0
configs/face_detection/blazeface_fpn_ssh_1000e.yml

@@ -0,0 +1,9 @@
+_BASE_: [
+  '../datasets/wider_face.yml',
+  '../runtime.yml',
+  '_base_/optimizer_1000e.yml',
+  '_base_/blazeface_fpn.yml',
+  '_base_/face_reader.yml',
+]
+weights: output/blazeface_fpn_ssh_1000e/model_final
+multi_scale_eval: True

+ 38 - 0
configs/faster_rcnn/README.md

@@ -0,0 +1,38 @@
+# Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks
+
+## Model Zoo
+
+| 骨架网络             | 网络类型       | 每张GPU图片个数 | 学习率策略 |推理时间(fps) | Box AP |                           下载                          | 配置文件 |
+| :------------------- | :------------- | :-----: | :-----: | :------------: | :-----: | :-----------------------------------------------------: | :-----: |
+| ResNet50             | Faster         |    1    |   1x    |     ----     |  36.7  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_1x_coco.pdparams) | [配置文件](./faster_rcnn_r50_1x_coco.yml) |
+| ResNet50-vd          | Faster         |    1    |   1x    |     ----     |  37.6  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_1x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_1x_coco.yml) |
+| ResNet101            | Faster         |    1    |   1x    |     ----     |  39.0  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_1x_coco.pdparams) | [配置文件](./faster_rcnn_r101_1x_coco.yml) |
+| ResNet34-FPN         | Faster         |    1    |   1x    |     ----     |  37.8  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r34_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_r34_fpn_1x_coco.yml) |
+| ResNet34-FPN-MultiScaleTest | Faster  |    1    |   1x    |     ----     |  38.2  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r34_fpn_multiscaletest_1x_coco.pdparams) | [配置文件](./faster_rcnn_r34_fpn_multiscaletest_1x_coco.yml) |
+| ResNet34-vd-FPN      | Faster         |    1    |   1x    |     ----     |  38.5  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r34_vd_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_r34_vd_fpn_1x_coco.yml) |
+| ResNet50-FPN         | Faster         |    1    |   1x    |     ----     |  38.4  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_r50_fpn_1x_coco.yml) |
+| ResNet50-FPN         | Faster         |    1    |   2x    |     ----     |  40.0  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_r50_fpn_2x_coco.yml) |
+| ResNet50-vd-FPN      | Faster         |    1    |   1x    |     ----     |  39.5  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_fpn_1x_coco.yml) |
+| ResNet50-vd-FPN      | Faster         |    1    |   2x    |     ----     |  40.8  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_fpn_2x_coco.yml) |
+| ResNet101-FPN        | Faster         |    1    |   2x    |     ----     |  41.4  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_r101_fpn_2x_coco.yml) |
+| ResNet101-vd-FPN     | Faster         |    1    |   1x    |     ----     |  42.0  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_vd_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_r101_vd_fpn_1x_coco.yml) |
+| ResNet101-vd-FPN     | Faster         |    1    |   2x    |     ----     |  43.0  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r101_vd_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_r101_vd_fpn_2x_coco.yml) |
+| ResNeXt101-vd-FPN    | Faster         |    1    |   1x    |     ----     |  43.4  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_x101_vd_64x4d_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_x101_vd_64x4d_fpn_1x_coco.yml) |
+| ResNeXt101-vd-FPN    | Faster         |    1    |   2x    |     ----     |  44.0  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_x101_vd_64x4d_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_x101_vd_64x4d_fpn_2x_coco.yml) |
+| ResNet50-vd-SSLDv2-FPN | Faster       |    1    |   1x    |     ----     |  41.4  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_ssld_1x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_fpn_ssld_1x_coco.yml) |
+| ResNet50-vd-SSLDv2-FPN | Faster       |    1    |   2x    |     ----     |  42.3  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_r50_vd_fpn_ssld_2x_coco.pdparams) | [配置文件](./faster_rcnn_r50_vd_fpn_ssld_2x_coco.yml) |
+| Swin-Tiny-FPN | Faster       |    2    |   1x    |     ----     |  42.6  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_swin_tiny_fpn_1x_coco.pdparams) | [配置文件](./faster_rcnn_swin_tiny_fpn_1x_coco.yml) |
+| Swin-Tiny-FPN | Faster       |    2    |   2x    |     ----     |  44.8  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_swin_tiny_fpn_2x_coco.pdparams) | [配置文件](./faster_rcnn_swin_tiny_fpn_2x_coco.yml) |
+| Swin-Tiny-FPN | Faster       |    2    |   3x    |     ----     |  45.3  | [下载链接](https://paddledet.bj.bcebos.com/models/faster_rcnn_swin_tiny_fpn_3x_coco.pdparams) | [配置文件](./faster_rcnn_swin_tiny_fpn_3x_coco.yml) |
+
+## Citations
+```
+@article{Ren_2017,
+   title={Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks},
+   journal={IEEE Transactions on Pattern Analysis and Machine Intelligence},
+   publisher={Institute of Electrical and Electronics Engineers (IEEE)},
+   author={Ren, Shaoqing and He, Kaiming and Girshick, Ross and Sun, Jian},
+   year={2017},
+   month={Jun},
+}
+```

+ 40 - 0
configs/faster_rcnn/_base_/faster_fpn_reader.yml

@@ -0,0 +1,40 @@
+worker_num: 2
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
+  - RandomFlip: {prob: 0.5}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false

+ 66 - 0
configs/faster_rcnn/_base_/faster_rcnn_r50.yml

@@ -0,0 +1,66 @@
+architecture: FasterRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+FasterRCNN:
+  backbone: ResNet
+  rpn_head: RPNHead
+  bbox_head: BBoxHead
+  # post process
+  bbox_post_process: BBoxPostProcess
+
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [2]
+  num_stages: 3
+
+RPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    anchor_sizes: [32, 64, 128, 256, 512]
+    strides: [16]
+  rpn_target_assign:
+    batch_size_per_im: 256
+    fg_fraction: 0.5
+    negative_overlap: 0.3
+    positive_overlap: 0.7
+    use_random: True
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 12000
+    post_nms_top_n: 2000
+    topk_after_collect: False
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 6000
+    post_nms_top_n: 1000
+
+
+BBoxHead:
+  head: Res5Head
+  roi_extractor:
+    resolution: 14
+    sampling_ratio: 0
+    aligned: True
+  bbox_assigner: BBoxAssigner
+  with_pool: true
+
+BBoxAssigner:
+  batch_size_per_im: 512
+  bg_thresh: 0.5
+  fg_thresh: 0.5
+  fg_fraction: 0.25
+  use_random: True
+
+BBoxPostProcess:
+  decode: RCNNBox
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 100
+    score_threshold: 0.05
+    nms_threshold: 0.5

+ 73 - 0
configs/faster_rcnn/_base_/faster_rcnn_r50_fpn.yml

@@ -0,0 +1,73 @@
+architecture: FasterRCNN
+pretrain_weights: https://paddledet.bj.bcebos.com/models/pretrained/ResNet50_cos_pretrained.pdparams
+
+FasterRCNN:
+  backbone: ResNet
+  neck: FPN
+  rpn_head: RPNHead
+  bbox_head: BBoxHead
+  # post process
+  bbox_post_process: BBoxPostProcess
+
+
+ResNet:
+  # index 0 stands for res2
+  depth: 50
+  norm_type: bn
+  freeze_at: 0
+  return_idx: [0,1,2,3]
+  num_stages: 4
+
+FPN:
+  out_channel: 256
+
+RPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    anchor_sizes: [[32], [64], [128], [256], [512]]
+    strides: [4, 8, 16, 32, 64]
+  rpn_target_assign:
+    batch_size_per_im: 256
+    fg_fraction: 0.5
+    negative_overlap: 0.3
+    positive_overlap: 0.7
+    use_random: True
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 1000
+    topk_after_collect: True
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+
+
+BBoxHead:
+  head: TwoFCHead
+  roi_extractor:
+    resolution: 7
+    sampling_ratio: 0
+    aligned: True
+  bbox_assigner: BBoxAssigner
+
+BBoxAssigner:
+  batch_size_per_im: 512
+  bg_thresh: 0.5
+  fg_thresh: 0.5
+  fg_fraction: 0.25
+  use_random: True
+
+TwoFCHead:
+  out_channel: 1024
+
+
+BBoxPostProcess:
+  decode: RCNNBox
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 100
+    score_threshold: 0.05
+    nms_threshold: 0.5

+ 41 - 0
configs/faster_rcnn/_base_/faster_rcnn_swin_reader.yml

@@ -0,0 +1,41 @@
+worker_num: 2
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomResizeCrop: {resizes: [400, 500, 600], cropsizes: [[384, 600], ], prob: 0.5}
+  - RandomResize: {target_size: [[480, 1333], [512, 1333], [544, 1333], [576, 1333], [608, 1333], [640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], keep_ratio: True, interp: 2}
+  - RandomFlip: {prob: 0.5}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 2
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: 32}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  inputs_def:
+    image_shape: [-1, 3, 640, 640]
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: 640, keep_ratio: True}
+  - Pad: {size: 640}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_size: 1
+  shuffle: false
+  drop_last: false

+ 72 - 0
configs/faster_rcnn/_base_/faster_rcnn_swin_tiny_fpn.yml

@@ -0,0 +1,72 @@
+architecture: FasterRCNN
+
+FasterRCNN:
+  backbone: SwinTransformer
+  neck: FPN
+  rpn_head: RPNHead
+  bbox_head: BBoxHead
+  bbox_post_process: BBoxPostProcess
+
+SwinTransformer:
+  embed_dim: 96
+  depths: [2, 2, 6, 2]
+  num_heads: [3, 6, 12, 24]
+  window_size: 7
+  ape: false
+  drop_path_rate: 0.1
+  patch_norm: true
+  out_indices: [0,1,2,3]
+  pretrained: https://paddledet.bj.bcebos.com/models/pretrained/swin_tiny_patch4_window7_224.pdparams
+
+FPN:
+  out_channel: 256
+
+RPNHead:
+  anchor_generator:
+    aspect_ratios: [0.5, 1.0, 2.0]
+    anchor_sizes: [[32], [64], [128], [256], [512]]
+    strides: [4, 8, 16, 32, 64]
+  rpn_target_assign:
+    batch_size_per_im: 256
+    fg_fraction: 0.5
+    negative_overlap: 0.3
+    positive_overlap: 0.7
+    use_random: True
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 2000
+    post_nms_top_n: 1000
+    topk_after_collect: True
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 1000
+    post_nms_top_n: 1000
+
+
+BBoxHead:
+  head: TwoFCHead
+  roi_extractor:
+    resolution: 7
+    sampling_ratio: 0
+    aligned: True
+  bbox_assigner: BBoxAssigner
+
+BBoxAssigner:
+  batch_size_per_im: 512
+  bg_thresh: 0.5
+  fg_thresh: 0.5
+  fg_fraction: 0.25
+  use_random: True
+
+TwoFCHead:
+  out_channel: 1024
+
+BBoxPostProcess:
+  decode: RCNNBox
+  nms:
+    name: MultiClassNMS
+    keep_top_k: 100
+    score_threshold: 0.05
+    nms_threshold: 0.5

+ 40 - 0
configs/faster_rcnn/_base_/faster_reader.yml

@@ -0,0 +1,40 @@
+worker_num: 2
+TrainReader:
+  sample_transforms:
+  - Decode: {}
+  - RandomResize: {target_size: [[640, 1333], [672, 1333], [704, 1333], [736, 1333], [768, 1333], [800, 1333]], interp: 2, keep_ratio: True}
+  - RandomFlip: {prob: 0.5}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: -1}
+  batch_size: 1
+  shuffle: true
+  drop_last: true
+  collate_batch: false
+
+
+EvalReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: -1}
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+
+
+TestReader:
+  sample_transforms:
+  - Decode: {}
+  - Resize: {interp: 2, target_size: [800, 1333], keep_ratio: True}
+  - NormalizeImage: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
+  - Permute: {}
+  batch_transforms:
+  - PadBatch: {pad_to_stride: -1}
+  batch_size: 1
+  shuffle: false
+  drop_last: false

+ 19 - 0
configs/faster_rcnn/_base_/optimizer_1x.yml

@@ -0,0 +1,19 @@
+epoch: 12
+
+LearningRate:
+  base_lr: 0.01
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [8, 11]
+  - !LinearWarmup
+    start_factor: 0.1
+    steps: 1000
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2

+ 22 - 0
configs/faster_rcnn/_base_/optimizer_swin_1x.yml

@@ -0,0 +1,22 @@
+epoch: 12
+
+LearningRate:
+  base_lr: 0.0001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [8, 11]
+  - !LinearWarmup
+    start_factor: 0.1
+    steps: 1000
+
+OptimizerBuilder:
+  clip_grad_by_norm: 1.0
+  optimizer:
+    type: AdamW
+    weight_decay: 0.05
+
+    param_groups:
+        -
+          params: ['absolute_pos_embed', 'relative_position_bias_table', 'norm']
+          weight_decay: 0.

+ 0 - 0
configs/faster_rcnn/faster_rcnn_r101_1x_coco.yml


Some files were not shown because too many files changed in this diff