centernet_fpn.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426
  1. # Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import numpy as np
  15. import math
  16. import paddle
  17. import paddle.nn as nn
  18. from paddle import ParamAttr
  19. from paddle.nn.initializer import Uniform
  20. import paddle.nn.functional as F
  21. from ppdet.core.workspace import register, serializable
  22. from ppdet.modeling.layers import ConvNormLayer
  23. from ppdet.modeling.backbones.hardnet import ConvLayer, HarDBlock
  24. from ..shape_spec import ShapeSpec
  25. __all__ = ['CenterNetDLAFPN', 'CenterNetHarDNetFPN']
  26. # SGE attention
  27. class BasicConv(nn.Layer):
  28. def __init__(self,
  29. in_planes,
  30. out_planes,
  31. kernel_size,
  32. stride=1,
  33. padding=0,
  34. dilation=1,
  35. groups=1,
  36. relu=True,
  37. bn=True,
  38. bias_attr=False):
  39. super(BasicConv, self).__init__()
  40. self.out_channels = out_planes
  41. self.conv = nn.Conv2D(
  42. in_planes,
  43. out_planes,
  44. kernel_size=kernel_size,
  45. stride=stride,
  46. padding=padding,
  47. dilation=dilation,
  48. groups=groups,
  49. bias_attr=bias_attr)
  50. self.bn = nn.BatchNorm2D(
  51. out_planes,
  52. epsilon=1e-5,
  53. momentum=0.01,
  54. weight_attr=False,
  55. bias_attr=False) if bn else None
  56. self.relu = nn.ReLU() if relu else None
  57. def forward(self, x):
  58. x = self.conv(x)
  59. if self.bn is not None:
  60. x = self.bn(x)
  61. if self.relu is not None:
  62. x = self.relu(x)
  63. return x
  64. class ChannelPool(nn.Layer):
  65. def forward(self, x):
  66. return paddle.concat(
  67. (paddle.max(x, 1).unsqueeze(1), paddle.mean(x, 1).unsqueeze(1)),
  68. axis=1)
  69. class SpatialGate(nn.Layer):
  70. def __init__(self):
  71. super(SpatialGate, self).__init__()
  72. kernel_size = 7
  73. self.compress = ChannelPool()
  74. self.spatial = BasicConv(
  75. 2,
  76. 1,
  77. kernel_size,
  78. stride=1,
  79. padding=(kernel_size - 1) // 2,
  80. relu=False)
  81. def forward(self, x):
  82. x_compress = self.compress(x)
  83. x_out = self.spatial(x_compress)
  84. scale = F.sigmoid(x_out) # broadcasting
  85. return x * scale
  86. def fill_up_weights(up):
  87. weight = up.weight.numpy()
  88. f = math.ceil(weight.shape[2] / 2)
  89. c = (2 * f - 1 - f % 2) / (2. * f)
  90. for i in range(weight.shape[2]):
  91. for j in range(weight.shape[3]):
  92. weight[0, 0, i, j] = \
  93. (1 - math.fabs(i / f - c)) * (1 - math.fabs(j / f - c))
  94. for c in range(1, weight.shape[0]):
  95. weight[c, 0, :, :] = weight[0, 0, :, :]
  96. up.weight.set_value(weight)
  97. class IDAUp(nn.Layer):
  98. def __init__(self, ch_ins, ch_out, up_strides, dcn_v2=True):
  99. super(IDAUp, self).__init__()
  100. for i in range(1, len(ch_ins)):
  101. ch_in = ch_ins[i]
  102. up_s = int(up_strides[i])
  103. fan_in = ch_in * 3 * 3
  104. stdv = 1. / math.sqrt(fan_in)
  105. proj = nn.Sequential(
  106. ConvNormLayer(
  107. ch_in,
  108. ch_out,
  109. filter_size=3,
  110. stride=1,
  111. use_dcn=dcn_v2,
  112. bias_on=dcn_v2,
  113. norm_decay=None,
  114. dcn_lr_scale=1.,
  115. dcn_regularizer=None,
  116. initializer=Uniform(-stdv, stdv)),
  117. nn.ReLU())
  118. node = nn.Sequential(
  119. ConvNormLayer(
  120. ch_out,
  121. ch_out,
  122. filter_size=3,
  123. stride=1,
  124. use_dcn=dcn_v2,
  125. bias_on=dcn_v2,
  126. norm_decay=None,
  127. dcn_lr_scale=1.,
  128. dcn_regularizer=None,
  129. initializer=Uniform(-stdv, stdv)),
  130. nn.ReLU())
  131. kernel_size = up_s * 2
  132. fan_in = ch_out * kernel_size * kernel_size
  133. stdv = 1. / math.sqrt(fan_in)
  134. up = nn.Conv2DTranspose(
  135. ch_out,
  136. ch_out,
  137. kernel_size=up_s * 2,
  138. stride=up_s,
  139. padding=up_s // 2,
  140. groups=ch_out,
  141. weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)),
  142. bias_attr=False)
  143. fill_up_weights(up)
  144. setattr(self, 'proj_' + str(i), proj)
  145. setattr(self, 'up_' + str(i), up)
  146. setattr(self, 'node_' + str(i), node)
  147. def forward(self, inputs, start_level, end_level):
  148. for i in range(start_level + 1, end_level):
  149. upsample = getattr(self, 'up_' + str(i - start_level))
  150. project = getattr(self, 'proj_' + str(i - start_level))
  151. inputs[i] = project(inputs[i])
  152. inputs[i] = upsample(inputs[i])
  153. node = getattr(self, 'node_' + str(i - start_level))
  154. inputs[i] = node(paddle.add(inputs[i], inputs[i - 1]))
  155. return inputs
  156. class DLAUp(nn.Layer):
  157. def __init__(self, start_level, channels, scales, ch_in=None, dcn_v2=True):
  158. super(DLAUp, self).__init__()
  159. self.start_level = start_level
  160. if ch_in is None:
  161. ch_in = channels
  162. self.channels = channels
  163. channels = list(channels)
  164. scales = np.array(scales, dtype=int)
  165. for i in range(len(channels) - 1):
  166. j = -i - 2
  167. setattr(
  168. self,
  169. 'ida_{}'.format(i),
  170. IDAUp(
  171. ch_in[j:],
  172. channels[j],
  173. scales[j:] // scales[j],
  174. dcn_v2=dcn_v2))
  175. scales[j + 1:] = scales[j]
  176. ch_in[j + 1:] = [channels[j] for _ in channels[j + 1:]]
  177. def forward(self, inputs):
  178. out = [inputs[-1]] # start with 32
  179. for i in range(len(inputs) - self.start_level - 1):
  180. ida = getattr(self, 'ida_{}'.format(i))
  181. outputs = ida(inputs, len(inputs) - i - 2, len(inputs))
  182. out.insert(0, outputs[-1])
  183. return out
  184. @register
  185. @serializable
  186. class CenterNetDLAFPN(nn.Layer):
  187. """
  188. Args:
  189. in_channels (list): number of input feature channels from backbone.
  190. [16, 32, 64, 128, 256, 512] by default, means the channels of DLA-34
  191. down_ratio (int): the down ratio from images to heatmap, 4 by default
  192. last_level (int): the last level of input feature fed into the upsamplng block
  193. out_channel (int): the channel of the output feature, 0 by default means
  194. the channel of the input feature whose down ratio is `down_ratio`
  195. first_level (None): the first level of input feature fed into the upsamplng block.
  196. if None, the first level stands for logs(down_ratio)
  197. dcn_v2 (bool): whether use the DCNv2, True by default
  198. with_sge (bool): whether use SGE attention, False by default
  199. """
  200. def __init__(self,
  201. in_channels,
  202. down_ratio=4,
  203. last_level=5,
  204. out_channel=0,
  205. first_level=None,
  206. dcn_v2=True,
  207. with_sge=False):
  208. super(CenterNetDLAFPN, self).__init__()
  209. self.first_level = int(np.log2(
  210. down_ratio)) if first_level is None else first_level
  211. assert self.first_level >= 0, "first level in CenterNetDLAFPN should be greater or equal to 0, but received {}".format(
  212. self.first_level)
  213. self.down_ratio = down_ratio
  214. self.last_level = last_level
  215. scales = [2**i for i in range(len(in_channels[self.first_level:]))]
  216. self.dla_up = DLAUp(
  217. self.first_level,
  218. in_channels[self.first_level:],
  219. scales,
  220. dcn_v2=dcn_v2)
  221. self.out_channel = out_channel
  222. if out_channel == 0:
  223. self.out_channel = in_channels[self.first_level]
  224. self.ida_up = IDAUp(
  225. in_channels[self.first_level:self.last_level],
  226. self.out_channel,
  227. [2**i for i in range(self.last_level - self.first_level)],
  228. dcn_v2=dcn_v2)
  229. self.with_sge = with_sge
  230. if self.with_sge:
  231. self.sge_attention = SpatialGate()
  232. @classmethod
  233. def from_config(cls, cfg, input_shape):
  234. return {'in_channels': [i.channels for i in input_shape]}
  235. def forward(self, body_feats):
  236. inputs = [body_feats[i] for i in range(len(body_feats))]
  237. dla_up_feats = self.dla_up(inputs)
  238. ida_up_feats = []
  239. for i in range(self.last_level - self.first_level):
  240. ida_up_feats.append(dla_up_feats[i].clone())
  241. self.ida_up(ida_up_feats, 0, len(ida_up_feats))
  242. feat = ida_up_feats[-1]
  243. if self.with_sge:
  244. feat = self.sge_attention(feat)
  245. if self.down_ratio != 4:
  246. feat = F.interpolate(
  247. feat,
  248. scale_factor=self.down_ratio // 4,
  249. mode="bilinear",
  250. align_corners=True)
  251. return feat
  252. @property
  253. def out_shape(self):
  254. return [ShapeSpec(channels=self.out_channel, stride=self.down_ratio)]
  255. class TransitionUp(nn.Layer):
  256. def __init__(self, in_channels, out_channels):
  257. super().__init__()
  258. def forward(self, x, skip):
  259. w, h = skip.shape[2], skip.shape[3]
  260. out = F.interpolate(x, size=(w, h), mode="bilinear", align_corners=True)
  261. out = paddle.concat([out, skip], 1)
  262. return out
  263. @register
  264. @serializable
  265. class CenterNetHarDNetFPN(nn.Layer):
  266. """
  267. Args:
  268. in_channels (list): number of input feature channels from backbone.
  269. [96, 214, 458, 784] by default, means the channels of HarDNet85
  270. num_layers (int): HarDNet laters, 85 by default
  271. down_ratio (int): the down ratio from images to heatmap, 4 by default
  272. first_level (int|None): the first level of input feature fed into the upsamplng block.
  273. if None, the first level stands for logs(down_ratio) - 1
  274. last_level (int): the last level of input feature fed into the upsamplng block
  275. out_channel (int): the channel of the output feature, 0 by default means
  276. the channel of the input feature whose down ratio is `down_ratio`
  277. """
  278. def __init__(self,
  279. in_channels,
  280. num_layers=85,
  281. down_ratio=4,
  282. first_level=None,
  283. last_level=4,
  284. out_channel=0):
  285. super(CenterNetHarDNetFPN, self).__init__()
  286. self.first_level = int(np.log2(
  287. down_ratio)) - 1 if first_level is None else first_level
  288. assert self.first_level >= 0, "first level in CenterNetDLAFPN should be greater or equal to 0, but received {}".format(
  289. self.first_level)
  290. self.down_ratio = down_ratio
  291. self.last_level = last_level
  292. self.last_pool = nn.AvgPool2D(kernel_size=2, stride=2)
  293. assert num_layers in [68, 85], "HarDNet-{} not support.".format(
  294. num_layers)
  295. if num_layers == 85:
  296. self.last_proj = ConvLayer(784, 256, kernel_size=1)
  297. self.last_blk = HarDBlock(768, 80, 1.7, 8)
  298. self.skip_nodes = [1, 3, 8, 13]
  299. self.SC = [32, 32, 0]
  300. gr = [64, 48, 28]
  301. layers = [8, 8, 4]
  302. ch_list2 = [224 + self.SC[0], 160 + self.SC[1], 96 + self.SC[2]]
  303. channels = [96, 214, 458, 784]
  304. self.skip_lv = 3
  305. elif num_layers == 68:
  306. self.last_proj = ConvLayer(654, 192, kernel_size=1)
  307. self.last_blk = HarDBlock(576, 72, 1.7, 8)
  308. self.skip_nodes = [1, 3, 8, 11]
  309. self.SC = [32, 32, 0]
  310. gr = [48, 32, 20]
  311. layers = [8, 8, 4]
  312. ch_list2 = [224 + self.SC[0], 96 + self.SC[1], 64 + self.SC[2]]
  313. channels = [64, 124, 328, 654]
  314. self.skip_lv = 2
  315. self.transUpBlocks = nn.LayerList([])
  316. self.denseBlocksUp = nn.LayerList([])
  317. self.conv1x1_up = nn.LayerList([])
  318. self.avg9x9 = nn.AvgPool2D(kernel_size=(9, 9), stride=1, padding=(4, 4))
  319. prev_ch = self.last_blk.get_out_ch()
  320. for i in range(3):
  321. skip_ch = channels[3 - i]
  322. self.transUpBlocks.append(TransitionUp(prev_ch, prev_ch))
  323. if i < self.skip_lv:
  324. cur_ch = prev_ch + skip_ch
  325. else:
  326. cur_ch = prev_ch
  327. self.conv1x1_up.append(
  328. ConvLayer(
  329. cur_ch, ch_list2[i], kernel_size=1))
  330. cur_ch = ch_list2[i]
  331. cur_ch -= self.SC[i]
  332. cur_ch *= 3
  333. blk = HarDBlock(cur_ch, gr[i], 1.7, layers[i])
  334. self.denseBlocksUp.append(blk)
  335. prev_ch = blk.get_out_ch()
  336. prev_ch += self.SC[0] + self.SC[1] + self.SC[2]
  337. self.out_channel = prev_ch
  338. @classmethod
  339. def from_config(cls, cfg, input_shape):
  340. return {'in_channels': [i.channels for i in input_shape]}
  341. def forward(self, body_feats):
  342. x = body_feats[-1]
  343. x_sc = []
  344. x = self.last_proj(x)
  345. x = self.last_pool(x)
  346. x2 = self.avg9x9(x)
  347. x3 = x / (x.sum((2, 3), keepdim=True) + 0.1)
  348. x = paddle.concat([x, x2, x3], 1)
  349. x = self.last_blk(x)
  350. for i in range(3):
  351. skip_x = body_feats[3 - i]
  352. x_up = self.transUpBlocks[i](x, skip_x)
  353. x_ch = self.conv1x1_up[i](x_up)
  354. if self.SC[i] > 0:
  355. end = x_ch.shape[1]
  356. new_st = end - self.SC[i]
  357. x_sc.append(x_ch[:, new_st:, :, :])
  358. x_ch = x_ch[:, :new_st, :, :]
  359. x2 = self.avg9x9(x_ch)
  360. x3 = x_ch / (x_ch.sum((2, 3), keepdim=True) + 0.1)
  361. x_new = paddle.concat([x_ch, x2, x3], 1)
  362. x = self.denseBlocksUp[i](x_new)
  363. scs = [x]
  364. for i in range(3):
  365. if self.SC[i] > 0:
  366. scs.insert(
  367. 0,
  368. F.interpolate(
  369. x_sc[i],
  370. size=(x.shape[2], x.shape[3]),
  371. mode="bilinear",
  372. align_corners=True))
  373. neck_feat = paddle.concat(scs, 1)
  374. return neck_feat
  375. @property
  376. def out_shape(self):
  377. return [ShapeSpec(channels=self.out_channel, stride=self.down_ratio)]