object_detector.cc 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. // Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include <sstream>
  15. // for setprecision
  16. #include <chrono>
  17. #include <iomanip>
  18. #include "include/object_detector.h"
  19. namespace PaddleDetection {
  20. // Load Model and create model predictor
  21. void ObjectDetector::LoadModel(std::string model_file, int num_theads) {
  22. MobileConfig config;
  23. config.set_threads(num_theads);
  24. config.set_model_from_file(model_file + "/model.nb");
  25. config.set_power_mode(LITE_POWER_HIGH);
  26. predictor_ = CreatePaddlePredictor<MobileConfig>(config);
  27. }
  28. // Visualiztion MaskDetector results
  29. cv::Mat VisualizeResult(const cv::Mat& img,
  30. const std::vector<PaddleDetection::ObjectResult>& results,
  31. const std::vector<std::string>& lables,
  32. const std::vector<int>& colormap,
  33. const bool is_rbox = false) {
  34. cv::Mat vis_img = img.clone();
  35. for (int i = 0; i < results.size(); ++i) {
  36. // Configure color and text size
  37. std::ostringstream oss;
  38. oss << std::setiosflags(std::ios::fixed) << std::setprecision(4);
  39. oss << lables[results[i].class_id] << " ";
  40. oss << results[i].confidence;
  41. std::string text = oss.str();
  42. int c1 = colormap[3 * results[i].class_id + 0];
  43. int c2 = colormap[3 * results[i].class_id + 1];
  44. int c3 = colormap[3 * results[i].class_id + 2];
  45. cv::Scalar roi_color = cv::Scalar(c1, c2, c3);
  46. int font_face = cv::FONT_HERSHEY_COMPLEX_SMALL;
  47. double font_scale = 0.5f;
  48. float thickness = 0.5;
  49. cv::Size text_size =
  50. cv::getTextSize(text, font_face, font_scale, thickness, nullptr);
  51. cv::Point origin;
  52. if (is_rbox) {
  53. // Draw object, text, and background
  54. for (int k = 0; k < 4; k++) {
  55. cv::Point pt1 = cv::Point(results[i].rect[(k * 2) % 8],
  56. results[i].rect[(k * 2 + 1) % 8]);
  57. cv::Point pt2 = cv::Point(results[i].rect[(k * 2 + 2) % 8],
  58. results[i].rect[(k * 2 + 3) % 8]);
  59. cv::line(vis_img, pt1, pt2, roi_color, 2);
  60. }
  61. } else {
  62. int w = results[i].rect[2] - results[i].rect[0];
  63. int h = results[i].rect[3] - results[i].rect[1];
  64. cv::Rect roi = cv::Rect(results[i].rect[0], results[i].rect[1], w, h);
  65. // Draw roi object, text, and background
  66. cv::rectangle(vis_img, roi, roi_color, 2);
  67. }
  68. origin.x = results[i].rect[0];
  69. origin.y = results[i].rect[1];
  70. // Configure text background
  71. cv::Rect text_back = cv::Rect(results[i].rect[0],
  72. results[i].rect[1] - text_size.height,
  73. text_size.width,
  74. text_size.height);
  75. // Draw text, and background
  76. cv::rectangle(vis_img, text_back, roi_color, -1);
  77. cv::putText(vis_img,
  78. text,
  79. origin,
  80. font_face,
  81. font_scale,
  82. cv::Scalar(255, 255, 255),
  83. thickness);
  84. }
  85. return vis_img;
  86. }
  87. void ObjectDetector::Preprocess(const cv::Mat& ori_im) {
  88. // Clone the image : keep the original mat for postprocess
  89. cv::Mat im = ori_im.clone();
  90. cv::cvtColor(im, im, cv::COLOR_BGR2RGB);
  91. preprocessor_.Run(&im, &inputs_);
  92. }
  93. void ObjectDetector::Postprocess(const std::vector<cv::Mat> mats,
  94. std::vector<PaddleDetection::ObjectResult>* result,
  95. std::vector<int> bbox_num,
  96. bool is_rbox = false) {
  97. result->clear();
  98. int start_idx = 0;
  99. for (int im_id = 0; im_id < mats.size(); im_id++) {
  100. cv::Mat raw_mat = mats[im_id];
  101. int rh = 1;
  102. int rw = 1;
  103. if (config_.arch_ == "Face") {
  104. rh = raw_mat.rows;
  105. rw = raw_mat.cols;
  106. }
  107. for (int j = start_idx; j < start_idx + bbox_num[im_id]; j++) {
  108. if (is_rbox) {
  109. // Class id
  110. int class_id = static_cast<int>(round(output_data_[0 + j * 10]));
  111. // Confidence score
  112. float score = output_data_[1 + j * 10];
  113. int x1 = (output_data_[2 + j * 10] * rw);
  114. int y1 = (output_data_[3 + j * 10] * rh);
  115. int x2 = (output_data_[4 + j * 10] * rw);
  116. int y2 = (output_data_[5 + j * 10] * rh);
  117. int x3 = (output_data_[6 + j * 10] * rw);
  118. int y3 = (output_data_[7 + j * 10] * rh);
  119. int x4 = (output_data_[8 + j * 10] * rw);
  120. int y4 = (output_data_[9 + j * 10] * rh);
  121. PaddleDetection::ObjectResult result_item;
  122. result_item.rect = {x1, y1, x2, y2, x3, y3, x4, y4};
  123. result_item.class_id = class_id;
  124. result_item.confidence = score;
  125. result->push_back(result_item);
  126. } else {
  127. // Class id
  128. int class_id = static_cast<int>(round(output_data_[0 + j * 6]));
  129. // Confidence score
  130. float score = output_data_[1 + j * 6];
  131. int xmin = (output_data_[2 + j * 6] * rw);
  132. int ymin = (output_data_[3 + j * 6] * rh);
  133. int xmax = (output_data_[4 + j * 6] * rw);
  134. int ymax = (output_data_[5 + j * 6] * rh);
  135. int wd = xmax - xmin;
  136. int hd = ymax - ymin;
  137. PaddleDetection::ObjectResult result_item;
  138. result_item.rect = {xmin, ymin, xmax, ymax};
  139. result_item.class_id = class_id;
  140. result_item.confidence = score;
  141. result->push_back(result_item);
  142. }
  143. }
  144. start_idx += bbox_num[im_id];
  145. }
  146. }
  147. void ObjectDetector::Predict(const std::vector<cv::Mat>& imgs,
  148. const double threshold,
  149. const int warmup,
  150. const int repeats,
  151. std::vector<PaddleDetection::ObjectResult>* result,
  152. std::vector<int>* bbox_num,
  153. std::vector<double>* times) {
  154. auto preprocess_start = std::chrono::steady_clock::now();
  155. int batch_size = imgs.size();
  156. // in_data_batch
  157. std::vector<float> in_data_all;
  158. std::vector<float> im_shape_all(batch_size * 2);
  159. std::vector<float> scale_factor_all(batch_size * 2);
  160. // Preprocess image
  161. for (int bs_idx = 0; bs_idx < batch_size; bs_idx++) {
  162. cv::Mat im = imgs.at(bs_idx);
  163. Preprocess(im);
  164. im_shape_all[bs_idx * 2] = inputs_.im_shape_[0];
  165. im_shape_all[bs_idx * 2 + 1] = inputs_.im_shape_[1];
  166. scale_factor_all[bs_idx * 2] = inputs_.scale_factor_[0];
  167. scale_factor_all[bs_idx * 2 + 1] = inputs_.scale_factor_[1];
  168. // TODO: reduce cost time
  169. in_data_all.insert(
  170. in_data_all.end(), inputs_.im_data_.begin(), inputs_.im_data_.end());
  171. }
  172. auto preprocess_end = std::chrono::steady_clock::now();
  173. std::vector<const float *> output_data_list_;
  174. // Prepare input tensor
  175. auto input_names = predictor_->GetInputNames();
  176. for (const auto& tensor_name : input_names) {
  177. auto in_tensor = predictor_->GetInputByName(tensor_name);
  178. if (tensor_name == "image") {
  179. int rh = inputs_.in_net_shape_[0];
  180. int rw = inputs_.in_net_shape_[1];
  181. in_tensor->Resize({batch_size, 3, rh, rw});
  182. auto* inptr = in_tensor->mutable_data<float>();
  183. std::copy_n(in_data_all.data(), in_data_all.size(), inptr);
  184. } else if (tensor_name == "im_shape") {
  185. in_tensor->Resize({batch_size, 2});
  186. auto* inptr = in_tensor->mutable_data<float>();
  187. std::copy_n(im_shape_all.data(), im_shape_all.size(), inptr);
  188. } else if (tensor_name == "scale_factor") {
  189. in_tensor->Resize({batch_size, 2});
  190. auto* inptr = in_tensor->mutable_data<float>();
  191. std::copy_n(scale_factor_all.data(), scale_factor_all.size(), inptr);
  192. }
  193. }
  194. // Run predictor
  195. // warmup
  196. for (int i = 0; i < warmup; i++) {
  197. predictor_->Run();
  198. // Get output tensor
  199. auto output_names = predictor_->GetOutputNames();
  200. if (config_.arch_ == "PicoDet") {
  201. for (int j = 0; j < output_names.size(); j++) {
  202. auto output_tensor = predictor_->GetTensor(output_names[j]);
  203. const float* outptr = output_tensor->data<float>();
  204. std::vector<int64_t> output_shape = output_tensor->shape();
  205. output_data_list_.push_back(outptr);
  206. }
  207. } else {
  208. auto out_tensor = predictor_->GetTensor(output_names[0]);
  209. auto out_bbox_num = predictor_->GetTensor(output_names[1]);
  210. }
  211. }
  212. bool is_rbox = false;
  213. auto inference_start = std::chrono::steady_clock::now();
  214. for (int i = 0; i < repeats; i++) {
  215. predictor_->Run();
  216. }
  217. auto inference_end = std::chrono::steady_clock::now();
  218. auto postprocess_start = std::chrono::steady_clock::now();
  219. // Get output tensor
  220. output_data_list_.clear();
  221. int num_class = 80;
  222. int reg_max = 7;
  223. auto output_names = predictor_->GetOutputNames();
  224. // TODO: Unified model output.
  225. if (config_.arch_ == "PicoDet") {
  226. for (int i = 0; i < output_names.size(); i++) {
  227. auto output_tensor = predictor_->GetTensor(output_names[i]);
  228. const float* outptr = output_tensor->data<float>();
  229. std::vector<int64_t> output_shape = output_tensor->shape();
  230. if (i == 0) {
  231. num_class = output_shape[2];
  232. }
  233. if (i == config_.fpn_stride_.size()) {
  234. reg_max = output_shape[2] / 4 - 1;
  235. }
  236. output_data_list_.push_back(outptr);
  237. }
  238. } else {
  239. auto output_tensor = predictor_->GetTensor(output_names[0]);
  240. auto output_shape = output_tensor->shape();
  241. auto out_bbox_num = predictor_->GetTensor(output_names[1]);
  242. auto out_bbox_num_shape = out_bbox_num->shape();
  243. // Calculate output length
  244. int output_size = 1;
  245. for (int j = 0; j < output_shape.size(); ++j) {
  246. output_size *= output_shape[j];
  247. }
  248. is_rbox = output_shape[output_shape.size() - 1] % 10 == 0;
  249. if (output_size < 6) {
  250. std::cerr << "[WARNING] No object detected." << std::endl;
  251. }
  252. output_data_.resize(output_size);
  253. std::copy_n(
  254. output_tensor->mutable_data<float>(), output_size, output_data_.data());
  255. int out_bbox_num_size = 1;
  256. for (int j = 0; j < out_bbox_num_shape.size(); ++j) {
  257. out_bbox_num_size *= out_bbox_num_shape[j];
  258. }
  259. out_bbox_num_data_.resize(out_bbox_num_size);
  260. std::copy_n(out_bbox_num->mutable_data<int>(),
  261. out_bbox_num_size,
  262. out_bbox_num_data_.data());
  263. }
  264. // Postprocessing result
  265. result->clear();
  266. if (config_.arch_ == "PicoDet") {
  267. PaddleDetection::PicoDetPostProcess(
  268. result, output_data_list_, config_.fpn_stride_,
  269. inputs_.im_shape_, inputs_.scale_factor_,
  270. config_.nms_info_["score_threshold"].as<float>(),
  271. config_.nms_info_["nms_threshold"].as<float>(), num_class, reg_max);
  272. bbox_num->push_back(result->size());
  273. } else {
  274. Postprocess(imgs, result, out_bbox_num_data_, is_rbox);
  275. bbox_num->clear();
  276. for (int k = 0; k < out_bbox_num_data_.size(); k++) {
  277. int tmp = out_bbox_num_data_[k];
  278. bbox_num->push_back(tmp);
  279. }
  280. }
  281. auto postprocess_end = std::chrono::steady_clock::now();
  282. std::chrono::duration<float> preprocess_diff =
  283. preprocess_end - preprocess_start;
  284. times->push_back(double(preprocess_diff.count() * 1000));
  285. std::chrono::duration<float> inference_diff = inference_end - inference_start;
  286. times->push_back(double(inference_diff.count() / repeats * 1000));
  287. std::chrono::duration<float> postprocess_diff =
  288. postprocess_end - postprocess_start;
  289. times->push_back(double(postprocess_diff.count() * 1000));
  290. }
  291. std::vector<int> GenerateColorMap(int num_class) {
  292. auto colormap = std::vector<int>(3 * num_class, 0);
  293. for (int i = 0; i < num_class; ++i) {
  294. int j = 0;
  295. int lab = i;
  296. while (lab) {
  297. colormap[i * 3] |= (((lab >> 0) & 1) << (7 - j));
  298. colormap[i * 3 + 1] |= (((lab >> 1) & 1) << (7 - j));
  299. colormap[i * 3 + 2] |= (((lab >> 2) & 1) << (7 - j));
  300. ++j;
  301. lab >>= 3;
  302. }
  303. }
  304. return colormap;
  305. }
  306. } // namespace PaddleDetection