picodet_mnn.cpp 7.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229
  1. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. // reference from https://github.com/RangiLyu/nanodet/tree/main/demo_mnn
  15. #include "picodet_mnn.h"
  16. using namespace std;
  17. PicoDet::PicoDet(const std::string &mnn_path,
  18. int input_width,
  19. int input_length,
  20. int num_thread_,
  21. float score_threshold_,
  22. float nms_threshold_) {
  23. num_thread = num_thread_;
  24. in_w = input_width;
  25. in_h = input_length;
  26. score_threshold = score_threshold_;
  27. nms_threshold = nms_threshold_;
  28. PicoDet_interpreter = std::shared_ptr<MNN::Interpreter>(
  29. MNN::Interpreter::createFromFile(mnn_path.c_str()));
  30. MNN::ScheduleConfig config;
  31. config.numThread = num_thread;
  32. MNN::BackendConfig backendConfig;
  33. backendConfig.precision = (MNN::BackendConfig::PrecisionMode)2;
  34. config.backendConfig = &backendConfig;
  35. PicoDet_session = PicoDet_interpreter->createSession(config);
  36. input_tensor = PicoDet_interpreter->getSessionInput(PicoDet_session, nullptr);
  37. }
  38. PicoDet::~PicoDet() {
  39. PicoDet_interpreter->releaseModel();
  40. PicoDet_interpreter->releaseSession(PicoDet_session);
  41. }
  42. int PicoDet::detect(cv::Mat &raw_image, std::vector<BoxInfo> &result_list) {
  43. if (raw_image.empty()) {
  44. std::cout << "image is empty ,please check!" << std::endl;
  45. return -1;
  46. }
  47. image_h = raw_image.rows;
  48. image_w = raw_image.cols;
  49. cv::Mat image;
  50. cv::resize(raw_image, image, cv::Size(in_w, in_h));
  51. PicoDet_interpreter->resizeTensor(input_tensor, {1, 3, in_h, in_w});
  52. PicoDet_interpreter->resizeSession(PicoDet_session);
  53. std::shared_ptr<MNN::CV::ImageProcess> pretreat(MNN::CV::ImageProcess::create(
  54. MNN::CV::BGR, MNN::CV::BGR, mean_vals, 3, norm_vals, 3));
  55. pretreat->convert(image.data, in_w, in_h, image.step[0], input_tensor);
  56. auto start = chrono::steady_clock::now();
  57. // run network
  58. PicoDet_interpreter->runSession(PicoDet_session);
  59. // get output data
  60. std::vector<std::vector<BoxInfo>> results;
  61. results.resize(num_class);
  62. for (const auto &head_info : heads_info) {
  63. MNN::Tensor *tensor_scores = PicoDet_interpreter->getSessionOutput(
  64. PicoDet_session, head_info.cls_layer.c_str());
  65. MNN::Tensor *tensor_boxes = PicoDet_interpreter->getSessionOutput(
  66. PicoDet_session, head_info.dis_layer.c_str());
  67. MNN::Tensor tensor_scores_host(tensor_scores,
  68. tensor_scores->getDimensionType());
  69. tensor_scores->copyToHostTensor(&tensor_scores_host);
  70. MNN::Tensor tensor_boxes_host(tensor_boxes,
  71. tensor_boxes->getDimensionType());
  72. tensor_boxes->copyToHostTensor(&tensor_boxes_host);
  73. decode_infer(&tensor_scores_host,
  74. &tensor_boxes_host,
  75. head_info.stride,
  76. score_threshold,
  77. results);
  78. }
  79. auto end = chrono::steady_clock::now();
  80. chrono::duration<double> elapsed = end - start;
  81. cout << "inference time:" << elapsed.count() << " s, ";
  82. for (int i = 0; i < (int)results.size(); i++) {
  83. nms(results[i], nms_threshold);
  84. for (auto box : results[i]) {
  85. box.x1 = box.x1 / in_w * image_w;
  86. box.x2 = box.x2 / in_w * image_w;
  87. box.y1 = box.y1 / in_h * image_h;
  88. box.y2 = box.y2 / in_h * image_h;
  89. result_list.push_back(box);
  90. }
  91. }
  92. cout << "detect " << result_list.size() << " objects." << std::endl;
  93. ;
  94. return 0;
  95. }
  96. void PicoDet::decode_infer(MNN::Tensor *cls_pred,
  97. MNN::Tensor *dis_pred,
  98. int stride,
  99. float threshold,
  100. std::vector<std::vector<BoxInfo>> &results) {
  101. int feature_h = in_h / stride;
  102. int feature_w = in_w / stride;
  103. for (int idx = 0; idx < feature_h * feature_w; idx++) {
  104. const float *scores = cls_pred->host<float>() + (idx * num_class);
  105. int row = idx / feature_w;
  106. int col = idx % feature_w;
  107. float score = 0;
  108. int cur_label = 0;
  109. for (int label = 0; label < num_class; label++) {
  110. if (scores[label] > score) {
  111. score = scores[label];
  112. cur_label = label;
  113. }
  114. }
  115. if (score > threshold) {
  116. const float *bbox_pred =
  117. dis_pred->host<float>() + (idx * 4 * (reg_max + 1));
  118. results[cur_label].push_back(
  119. disPred2Bbox(bbox_pred, cur_label, score, col, row, stride));
  120. }
  121. }
  122. }
  123. BoxInfo PicoDet::disPred2Bbox(
  124. const float *&dfl_det, int label, float score, int x, int y, int stride) {
  125. float ct_x = (x + 0.5) * stride;
  126. float ct_y = (y + 0.5) * stride;
  127. std::vector<float> dis_pred;
  128. dis_pred.resize(4);
  129. for (int i = 0; i < 4; i++) {
  130. float dis = 0;
  131. float *dis_after_sm = new float[reg_max + 1];
  132. activation_function_softmax(
  133. dfl_det + i * (reg_max + 1), dis_after_sm, reg_max + 1);
  134. for (int j = 0; j < reg_max + 1; j++) {
  135. dis += j * dis_after_sm[j];
  136. }
  137. dis *= stride;
  138. dis_pred[i] = dis;
  139. delete[] dis_after_sm;
  140. }
  141. float xmin = (std::max)(ct_x - dis_pred[0], .0f);
  142. float ymin = (std::max)(ct_y - dis_pred[1], .0f);
  143. float xmax = (std::min)(ct_x + dis_pred[2], (float)in_w);
  144. float ymax = (std::min)(ct_y + dis_pred[3], (float)in_h);
  145. return BoxInfo{xmin, ymin, xmax, ymax, score, label};
  146. }
  147. void PicoDet::nms(std::vector<BoxInfo> &input_boxes, float NMS_THRESH) {
  148. std::sort(input_boxes.begin(), input_boxes.end(), [](BoxInfo a, BoxInfo b) {
  149. return a.score > b.score;
  150. });
  151. std::vector<float> vArea(input_boxes.size());
  152. for (int i = 0; i < int(input_boxes.size()); ++i) {
  153. vArea[i] = (input_boxes.at(i).x2 - input_boxes.at(i).x1 + 1) *
  154. (input_boxes.at(i).y2 - input_boxes.at(i).y1 + 1);
  155. }
  156. for (int i = 0; i < int(input_boxes.size()); ++i) {
  157. for (int j = i + 1; j < int(input_boxes.size());) {
  158. float xx1 = (std::max)(input_boxes[i].x1, input_boxes[j].x1);
  159. float yy1 = (std::max)(input_boxes[i].y1, input_boxes[j].y1);
  160. float xx2 = (std::min)(input_boxes[i].x2, input_boxes[j].x2);
  161. float yy2 = (std::min)(input_boxes[i].y2, input_boxes[j].y2);
  162. float w = (std::max)(float(0), xx2 - xx1 + 1);
  163. float h = (std::max)(float(0), yy2 - yy1 + 1);
  164. float inter = w * h;
  165. float ovr = inter / (vArea[i] + vArea[j] - inter);
  166. if (ovr >= NMS_THRESH) {
  167. input_boxes.erase(input_boxes.begin() + j);
  168. vArea.erase(vArea.begin() + j);
  169. } else {
  170. j++;
  171. }
  172. }
  173. }
  174. }
  175. string PicoDet::get_label_str(int label) { return labels[label]; }
  176. inline float fast_exp(float x) {
  177. union {
  178. uint32_t i;
  179. float f;
  180. } v{};
  181. v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
  182. return v.f;
  183. }
  184. inline float sigmoid(float x) { return 1.0f / (1.0f + fast_exp(-x)); }
  185. template <typename _Tp>
  186. int activation_function_softmax(const _Tp *src, _Tp *dst, int length) {
  187. const _Tp alpha = *std::max_element(src, src + length);
  188. _Tp denominator{0};
  189. for (int i = 0; i < length; ++i) {
  190. dst[i] = fast_exp(src[i] - alpha);
  191. denominator += dst[i];
  192. }
  193. for (int i = 0; i < length; ++i) {
  194. dst[i] /= denominator;
  195. }
  196. return 0;
  197. }