keypoint_postprocess.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273
  1. // Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. #include "keypoint_postprocess.h"
  15. #define PI 3.1415926535
  16. #define HALF_CIRCLE_DEGREE 180
  17. cv::Point2f get_3rd_point(cv::Point2f& a, cv::Point2f& b) {
  18. cv::Point2f direct{a.x - b.x, a.y - b.y};
  19. return cv::Point2f(a.x - direct.y, a.y + direct.x);
  20. }
  21. std::vector<float> get_dir(float src_point_x,
  22. float src_point_y,
  23. float rot_rad) {
  24. float sn = sin(rot_rad);
  25. float cs = cos(rot_rad);
  26. std::vector<float> src_result{0.0, 0.0};
  27. src_result[0] = src_point_x * cs - src_point_y * sn;
  28. src_result[1] = src_point_x * sn + src_point_y * cs;
  29. return src_result;
  30. }
  31. void affine_tranform(
  32. float pt_x, float pt_y, cv::Mat& trans, std::vector<float>& preds, int p) {
  33. double new1[3] = {pt_x, pt_y, 1.0};
  34. cv::Mat new_pt(3, 1, trans.type(), new1);
  35. cv::Mat w = trans * new_pt;
  36. preds[p * 3 + 1] = static_cast<float>(w.at<double>(0, 0));
  37. preds[p * 3 + 2] = static_cast<float>(w.at<double>(1, 0));
  38. }
  39. void get_affine_transform(std::vector<float>& center,
  40. std::vector<float>& scale,
  41. float rot,
  42. std::vector<int>& output_size,
  43. cv::Mat& trans,
  44. int inv) {
  45. float src_w = scale[0];
  46. float dst_w = static_cast<float>(output_size[0]);
  47. float dst_h = static_cast<float>(output_size[1]);
  48. float rot_rad = rot * PI / HALF_CIRCLE_DEGREE;
  49. std::vector<float> src_dir = get_dir(-0.5 * src_w, 0, rot_rad);
  50. std::vector<float> dst_dir{static_cast<float>(-0.5) * dst_w, 0.0};
  51. cv::Point2f srcPoint2f[3], dstPoint2f[3];
  52. srcPoint2f[0] = cv::Point2f(center[0], center[1]);
  53. srcPoint2f[1] = cv::Point2f(center[0] + src_dir[0], center[1] + src_dir[1]);
  54. srcPoint2f[2] = get_3rd_point(srcPoint2f[0], srcPoint2f[1]);
  55. dstPoint2f[0] = cv::Point2f(dst_w * 0.5, dst_h * 0.5);
  56. dstPoint2f[1] =
  57. cv::Point2f(dst_w * 0.5 + dst_dir[0], dst_h * 0.5 + dst_dir[1]);
  58. dstPoint2f[2] = get_3rd_point(dstPoint2f[0], dstPoint2f[1]);
  59. if (inv == 0) {
  60. trans = cv::getAffineTransform(srcPoint2f, dstPoint2f);
  61. } else {
  62. trans = cv::getAffineTransform(dstPoint2f, srcPoint2f);
  63. }
  64. }
  65. void transform_preds(std::vector<float>& coords,
  66. std::vector<float>& center,
  67. std::vector<float>& scale,
  68. std::vector<int>& output_size,
  69. std::vector<uint64_t>& dim,
  70. std::vector<float>& target_coords,
  71. bool affine=false) {
  72. if (affine) {
  73. cv::Mat trans(2, 3, CV_64FC1);
  74. get_affine_transform(center, scale, 0, output_size, trans, 1);
  75. for (int p = 0; p < dim[1]; ++p) {
  76. affine_tranform(
  77. coords[p * 2], coords[p * 2 + 1], trans, target_coords, p);
  78. }
  79. } else {
  80. float heat_w = static_cast<float>(output_size[0]);
  81. float heat_h = static_cast<float>(output_size[1]);
  82. float x_scale = scale[0] / heat_w;
  83. float y_scale = scale[1] / heat_h;
  84. float offset_x = center[0] - scale[0] / 2.;
  85. float offset_y = center[1] - scale[1] / 2.;
  86. for (int i = 0; i < dim[1]; i++) {
  87. target_coords[i * 3 + 1] = x_scale * coords[i * 2] + offset_x;
  88. target_coords[i * 3 + 2] = y_scale * coords[i * 2 + 1] + offset_y;
  89. }
  90. }
  91. }
  92. // only for batchsize == 1
  93. void get_max_preds(std::vector<float>& heatmap,
  94. std::vector<int>& dim,
  95. std::vector<float>& preds,
  96. std::vector<float>& maxvals,
  97. int batchid,
  98. int joint_idx) {
  99. int num_joints = dim[1];
  100. int width = dim[3];
  101. std::vector<int> idx;
  102. idx.resize(num_joints * 2);
  103. for (int j = 0; j < dim[1]; j++) {
  104. float* index = &(
  105. heatmap[batchid * num_joints * dim[2] * dim[3] + j * dim[2] * dim[3]]);
  106. float* end = index + dim[2] * dim[3];
  107. float* max_dis = std::max_element(index, end);
  108. auto max_id = std::distance(index, max_dis);
  109. maxvals[j] = *max_dis;
  110. if (*max_dis > 0) {
  111. preds[j * 2] = static_cast<float>(max_id % width);
  112. preds[j * 2 + 1] = static_cast<float>(max_id / width);
  113. }
  114. }
  115. }
  116. void dark_parse(std::vector<float>& heatmap,
  117. std::vector<uint64_t>& dim,
  118. std::vector<float>& coords,
  119. int px,
  120. int py,
  121. int index,
  122. int ch) {
  123. /*DARK postpocessing, Zhang et al. Distribution-Aware Coordinate
  124. Representation for Human Pose Estimation (CVPR 2020).
  125. 1) offset = - hassian.inv() * derivative
  126. 2) dx = (heatmap[x+1] - heatmap[x-1])/2.
  127. 3) dxx = (dx[x+1] - dx[x-1])/2.
  128. 4) derivative = Mat([dx, dy])
  129. 5) hassian = Mat([[dxx, dxy], [dxy, dyy]])
  130. */
  131. std::vector<float>::const_iterator first1 = heatmap.begin() + index;
  132. std::vector<float>::const_iterator last1 =
  133. heatmap.begin() + index + dim[2] * dim[3];
  134. std::vector<float> heatmap_ch(first1, last1);
  135. cv::Mat heatmap_mat = cv::Mat(heatmap_ch).reshape(0, dim[2]);
  136. heatmap_mat.convertTo(heatmap_mat, CV_32FC1);
  137. cv::GaussianBlur(heatmap_mat, heatmap_mat, cv::Size(3, 3), 0, 0);
  138. heatmap_mat = heatmap_mat.reshape(1, 1);
  139. heatmap_ch = std::vector<float>(heatmap_mat.reshape(1, 1));
  140. float epsilon = 1e-10;
  141. // sample heatmap to get values in around target location
  142. float xy = log(fmax(heatmap_ch[py * dim[3] + px], epsilon));
  143. float xr = log(fmax(heatmap_ch[py * dim[3] + px + 1], epsilon));
  144. float xl = log(fmax(heatmap_ch[py * dim[3] + px - 1], epsilon));
  145. float xr2 = log(fmax(heatmap_ch[py * dim[3] + px + 2], epsilon));
  146. float xl2 = log(fmax(heatmap_ch[py * dim[3] + px - 2], epsilon));
  147. float yu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px], epsilon));
  148. float yd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px], epsilon));
  149. float yu2 = log(fmax(heatmap_ch[(py + 2) * dim[3] + px], epsilon));
  150. float yd2 = log(fmax(heatmap_ch[(py - 2) * dim[3] + px], epsilon));
  151. float xryu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px + 1], epsilon));
  152. float xryd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px + 1], epsilon));
  153. float xlyu = log(fmax(heatmap_ch[(py + 1) * dim[3] + px - 1], epsilon));
  154. float xlyd = log(fmax(heatmap_ch[(py - 1) * dim[3] + px - 1], epsilon));
  155. // compute dx/dy and dxx/dyy with sampled values
  156. float dx = 0.5 * (xr - xl);
  157. float dy = 0.5 * (yu - yd);
  158. float dxx = 0.25 * (xr2 - 2 * xy + xl2);
  159. float dxy = 0.25 * (xryu - xryd - xlyu + xlyd);
  160. float dyy = 0.25 * (yu2 - 2 * xy + yd2);
  161. // finally get offset by derivative and hassian, which combined by dx/dy and
  162. // dxx/dyy
  163. if (dxx * dyy - dxy * dxy != 0) {
  164. float M[2][2] = {dxx, dxy, dxy, dyy};
  165. float D[2] = {dx, dy};
  166. cv::Mat hassian(2, 2, CV_32F, M);
  167. cv::Mat derivative(2, 1, CV_32F, D);
  168. cv::Mat offset = -hassian.inv() * derivative;
  169. coords[ch * 2] += offset.at<float>(0, 0);
  170. coords[ch * 2 + 1] += offset.at<float>(1, 0);
  171. }
  172. }
  173. void get_final_preds(std::vector<float>& heatmap,
  174. std::vector<uint64_t>& dim,
  175. std::vector<float>& idxout,
  176. std::vector<uint64_t>& idxdim,
  177. std::vector<float>& center,
  178. std::vector<float> scale,
  179. std::vector<float>& preds,
  180. int batchid,
  181. bool DARK) {
  182. std::vector<float> coords;
  183. coords.resize(dim[1] * 2);
  184. int heatmap_height = dim[2];
  185. int heatmap_width = dim[3];
  186. for (int j = 0; j < dim[1]; ++j) {
  187. int index = (batchid * dim[1] + j) * dim[2] * dim[3];
  188. int idx = int(idxout[batchid * dim[1] + j]);
  189. preds[j * 3] = heatmap[index + idx];
  190. coords[j * 2] = idx % heatmap_width;
  191. coords[j * 2 + 1] = idx / heatmap_width;
  192. int px = int(coords[j * 2] + 0.5);
  193. int py = int(coords[j * 2 + 1] + 0.5);
  194. if (DARK && px > 1 && px < heatmap_width - 2 && py > 1 &&
  195. py < heatmap_height - 2) {
  196. dark_parse(heatmap, dim, coords, px, py, index, j);
  197. } else {
  198. if (px > 0 && px < heatmap_width - 1) {
  199. float diff_x = heatmap[index + py * dim[3] + px + 1] -
  200. heatmap[index + py * dim[3] + px - 1];
  201. coords[j * 2] += diff_x > 0 ? 1 : -1 * 0.25;
  202. }
  203. if (py > 0 && py < heatmap_height - 1) {
  204. float diff_y = heatmap[index + (py + 1) * dim[3] + px] -
  205. heatmap[index + (py - 1) * dim[3] + px];
  206. coords[j * 2 + 1] += diff_y > 0 ? 1 : -1 * 0.25;
  207. }
  208. }
  209. }
  210. std::vector<int> img_size{heatmap_width, heatmap_height};
  211. transform_preds(coords, center, scale, img_size, dim, preds);
  212. }
  213. void CropImg(cv::Mat& img,
  214. cv::Mat& crop_img,
  215. std::vector<int>& area,
  216. std::vector<float>& center,
  217. std::vector<float>& scale,
  218. float expandratio) {
  219. int crop_x1 = std::max(0, area[0]);
  220. int crop_y1 = std::max(0, area[1]);
  221. int crop_x2 = std::min(img.cols - 1, area[2]);
  222. int crop_y2 = std::min(img.rows - 1, area[3]);
  223. int center_x = (crop_x1 + crop_x2) / 2.;
  224. int center_y = (crop_y1 + crop_y2) / 2.;
  225. int half_h = (crop_y2 - crop_y1) / 2.;
  226. int half_w = (crop_x2 - crop_x1) / 2.;
  227. if (half_h * 3 > half_w * 4) {
  228. half_w = static_cast<int>(half_h * 0.75);
  229. } else {
  230. half_h = static_cast<int>(half_w * 4 / 3);
  231. }
  232. crop_x1 =
  233. std::max(0, center_x - static_cast<int>(half_w * (1 + expandratio)));
  234. crop_y1 =
  235. std::max(0, center_y - static_cast<int>(half_h * (1 + expandratio)));
  236. crop_x2 = std::min(img.cols - 1,
  237. static_cast<int>(center_x + half_w * (1 + expandratio)));
  238. crop_y2 = std::min(img.rows - 1,
  239. static_cast<int>(center_y + half_h * (1 + expandratio)));
  240. crop_img =
  241. img(cv::Range(crop_y1, crop_y2 + 1), cv::Range(crop_x1, crop_x2 + 1));
  242. center.clear();
  243. center.emplace_back((crop_x1 + crop_x2) / 2);
  244. center.emplace_back((crop_y1 + crop_y2) / 2);
  245. scale.clear();
  246. scale.emplace_back((crop_x2 - crop_x1));
  247. scale.emplace_back((crop_y2 - crop_y1));
  248. }