Others
/
DocumentAIKit


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869
							# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import paddle
import paddle.nn as nn
import paddle.nn.functional as F
from paddle.nn import AdaptiveAvgPool2D, Linear
from paddle.regularizer import L2Decay
from paddle import ParamAttr
from paddle.nn.initializer import Normal, Uniform
from numbers import Integral
import math

from ppdet.core.workspace import register
from ..shape_spec import ShapeSpec

__all__ = ['HRNet']


class ConvNormLayer(nn.Layer):
    def __init__(self,
                 ch_in,
                 ch_out,
                 filter_size,
                 stride=1,
                 norm_type='bn',
                 norm_groups=32,
                 use_dcn=False,
                 norm_momentum=0.9,
                 norm_decay=0.,
                 freeze_norm=False,
                 act=None,
                 name=None):
        super(ConvNormLayer, self).__init__()
        assert norm_type in ['bn', 'sync_bn', 'gn']

        self.act = act
        self.conv = nn.Conv2D(
            in_channels=ch_in,
            out_channels=ch_out,
            kernel_size=filter_size,
            stride=stride,
            padding=(filter_size - 1) // 2,
            groups=1,
            weight_attr=ParamAttr(initializer=Normal(
                mean=0., std=0.01)),
            bias_attr=False)

        norm_lr = 0. if freeze_norm else 1.

        param_attr = ParamAttr(
            learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
        bias_attr = ParamAttr(
            learning_rate=norm_lr, regularizer=L2Decay(norm_decay))
        global_stats = True if freeze_norm else None
        if norm_type in ['bn', 'sync_bn']:
            self.norm = nn.BatchNorm2D(
                ch_out,
                momentum=norm_momentum,
                weight_attr=param_attr,
                bias_attr=bias_attr,
                use_global_stats=global_stats)
        elif norm_type == 'gn':
            self.norm = nn.GroupNorm(
                num_groups=norm_groups,
                num_channels=ch_out,
                weight_attr=param_attr,
                bias_attr=bias_attr)
        norm_params = self.norm.parameters()
        if freeze_norm:
            for param in norm_params:
                param.stop_gradient = True

    def forward(self, inputs):
        out = self.conv(inputs)
        out = self.norm(out)

        if self.act == 'relu':
            out = F.relu(out)
        return out


class Layer1(nn.Layer):
    def __init__(self,
                 num_channels,
                 has_se=False,
                 norm_momentum=0.9,
                 norm_decay=0.,
                 freeze_norm=True,
                 name=None):
        super(Layer1, self).__init__()

        self.bottleneck_block_list = []

        for i in range(4):
            bottleneck_block = self.add_sublayer(
                "block_{}_{}".format(name, i + 1),
                BottleneckBlock(
                    num_channels=num_channels if i == 0 else 256,
                    num_filters=64,
                    has_se=has_se,
                    stride=1,
                    downsample=True if i == 0 else False,
                    norm_momentum=norm_momentum,
                    norm_decay=norm_decay,
                    freeze_norm=freeze_norm,
                    name=name + '_' + str(i + 1)))
            self.bottleneck_block_list.append(bottleneck_block)

    def forward(self, input):
        conv = input
        for block_func in self.bottleneck_block_list:
            conv = block_func(conv)
        return conv


class TransitionLayer(nn.Layer):
    def __init__(self,
                 in_channels,
                 out_channels,
                 norm_momentum=0.9,
                 norm_decay=0.,
                 freeze_norm=True,
                 name=None):
        super(TransitionLayer, self).__init__()

        num_in = len(in_channels)
        num_out = len(out_channels)
        out = []
        self.conv_bn_func_list = []
        for i in range(num_out):
            residual = None
            if i < num_in:
                if in_channels[i] != out_channels[i]:
                    residual = self.add_sublayer(
                        "transition_{}_layer_{}".format(name, i + 1),
                        ConvNormLayer(
                            ch_in=in_channels[i],
                            ch_out=out_channels[i],
                            filter_size=3,
                            norm_momentum=norm_momentum,
                            norm_decay=norm_decay,
                            freeze_norm=freeze_norm,
                            act='relu',
                            name=name + '_layer_' + str(i + 1)))
            else:
                residual = self.add_sublayer(
                    "transition_{}_layer_{}".format(name, i + 1),
                    ConvNormLayer(
                        ch_in=in_channels[-1],
                        ch_out=out_channels[i],
                        filter_size=3,
                        stride=2,
                        norm_momentum=norm_momentum,
                        norm_decay=norm_decay,
                        freeze_norm=freeze_norm,
                        act='relu',
                        name=name + '_layer_' + str(i + 1)))
            self.conv_bn_func_list.append(residual)

    def forward(self, input):
        outs = []
        for idx, conv_bn_func in enumerate(self.conv_bn_func_list):
            if conv_bn_func is None:
                outs.append(input[idx])
            else:
                if idx < len(input):
                    outs.append(conv_bn_func(input[idx]))
                else:
                    outs.append(conv_bn_func(input[-1]))
        return outs


class Branches(nn.Layer):
    def __init__(self,
                 block_num,
                 in_channels,
                 out_channels,
                 has_se=False,
                 norm_momentum=0.9,
                 norm_decay=0.,
                 freeze_norm=True,
                 name=None):
        super(Branches, self).__init__()

        self.basic_block_list = []
        for i in range(len(out_channels)):
            self.basic_block_list.append([])
            for j in range(block_num):
                in_ch = in_channels[i] if j == 0 else out_channels[i]
                basic_block_func = self.add_sublayer(
                    "bb_{}_branch_layer_{}_{}".format(name, i + 1, j + 1),
                    BasicBlock(
                        num_channels=in_ch,
                        num_filters=out_channels[i],
                        has_se=has_se,
                        norm_momentum=norm_momentum,
                        norm_decay=norm_decay,
                        freeze_norm=freeze_norm,
                        name=name + '_branch_layer_' + str(i + 1) + '_' +
                        str(j + 1)))
                self.basic_block_list[i].append(basic_block_func)

    def forward(self, inputs):
        outs = []
        for idx, input in enumerate(inputs):
            conv = input
            basic_block_list = self.basic_block_list[idx]
            for basic_block_func in basic_block_list:
                conv = basic_block_func(conv)
            outs.append(conv)
        return outs


class BottleneckBlock(nn.Layer):
    def __init__(self,
                 num_channels,
                 num_filters,
                 has_se,
                 stride=1,
                 downsample=False,
                 norm_momentum=0.9,
                 norm_decay=0.,
                 freeze_norm=True,
                 name=None):
        super(BottleneckBlock, self).__init__()

        self.has_se = has_se
        self.downsample = downsample

        self.conv1 = ConvNormLayer(
            ch_in=num_channels,
            ch_out=num_filters,
            filter_size=1,
            norm_momentum=norm_momentum,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            act="relu",
            name=name + "_conv1")
        self.conv2 = ConvNormLayer(
            ch_in=num_filters,
            ch_out=num_filters,
            filter_size=3,
            stride=stride,
            norm_momentum=norm_momentum,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            act="relu",
            name=name + "_conv2")
        self.conv3 = ConvNormLayer(
            ch_in=num_filters,
            ch_out=num_filters * 4,
            filter_size=1,
            norm_momentum=norm_momentum,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            act=None,
            name=name + "_conv3")

        if self.downsample:
            self.conv_down = ConvNormLayer(
                ch_in=num_channels,
                ch_out=num_filters * 4,
                filter_size=1,
                norm_momentum=norm_momentum,
                norm_decay=norm_decay,
                freeze_norm=freeze_norm,
                act=None,
                name=name + "_downsample")

        if self.has_se:
            self.se = SELayer(
                num_channels=num_filters * 4,
                num_filters=num_filters * 4,
                reduction_ratio=16,
                name='fc' + name)

    def forward(self, input):
        residual = input
        conv1 = self.conv1(input)
        conv2 = self.conv2(conv1)
        conv3 = self.conv3(conv2)

        if self.downsample:
            residual = self.conv_down(input)

        if self.has_se:
            conv3 = self.se(conv3)

        y = paddle.add(x=residual, y=conv3)
        y = F.relu(y)
        return y


class BasicBlock(nn.Layer):
    def __init__(self,
                 num_channels,
                 num_filters,
                 stride=1,
                 has_se=False,
                 downsample=False,
                 norm_momentum=0.9,
                 norm_decay=0.,
                 freeze_norm=True,
                 name=None):
        super(BasicBlock, self).__init__()

        self.has_se = has_se
        self.downsample = downsample
        self.conv1 = ConvNormLayer(
            ch_in=num_channels,
            ch_out=num_filters,
            filter_size=3,
            norm_momentum=norm_momentum,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            stride=stride,
            act="relu",
            name=name + "_conv1")
        self.conv2 = ConvNormLayer(
            ch_in=num_filters,
            ch_out=num_filters,
            filter_size=3,
            norm_momentum=norm_momentum,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            stride=1,
            act=None,
            name=name + "_conv2")

        if self.downsample:
            self.conv_down = ConvNormLayer(
                ch_in=num_channels,
                ch_out=num_filters * 4,
                filter_size=1,
                norm_momentum=norm_momentum,
                norm_decay=norm_decay,
                freeze_norm=freeze_norm,
                act=None,
                name=name + "_downsample")

        if self.has_se:
            self.se = SELayer(
                num_channels=num_filters,
                num_filters=num_filters,
                reduction_ratio=16,
                name='fc' + name)

    def forward(self, input):
        residual = input
        conv1 = self.conv1(input)
        conv2 = self.conv2(conv1)

        if self.downsample:
            residual = self.conv_down(input)

        if self.has_se:
            conv2 = self.se(conv2)

        y = paddle.add(x=residual, y=conv2)
        y = F.relu(y)
        return y


class SELayer(nn.Layer):
    def __init__(self, num_channels, num_filters, reduction_ratio, name=None):
        super(SELayer, self).__init__()

        self.pool2d_gap = AdaptiveAvgPool2D(1)

        self._num_channels = num_channels

        med_ch = int(num_channels / reduction_ratio)
        stdv = 1.0 / math.sqrt(num_channels * 1.0)
        self.squeeze = Linear(
            num_channels,
            med_ch,
            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))

        stdv = 1.0 / math.sqrt(med_ch * 1.0)
        self.excitation = Linear(
            med_ch,
            num_filters,
            weight_attr=ParamAttr(initializer=Uniform(-stdv, stdv)))

    def forward(self, input):
        pool = self.pool2d_gap(input)
        pool = paddle.squeeze(pool, axis=[2, 3])
        squeeze = self.squeeze(pool)
        squeeze = F.relu(squeeze)
        excitation = self.excitation(squeeze)
        excitation = F.sigmoid(excitation)
        excitation = paddle.unsqueeze(excitation, axis=[2, 3])
        out = input * excitation
        return out


class Stage(nn.Layer):
    def __init__(self,
                 num_channels,
                 num_modules,
                 num_filters,
                 has_se=False,
                 norm_momentum=0.9,
                 norm_decay=0.,
                 freeze_norm=True,
                 multi_scale_output=True,
                 name=None):
        super(Stage, self).__init__()

        self._num_modules = num_modules
        self.stage_func_list = []
        for i in range(num_modules):
            if i == num_modules - 1 and not multi_scale_output:
                stage_func = self.add_sublayer(
                    "stage_{}_{}".format(name, i + 1),
                    HighResolutionModule(
                        num_channels=num_channels,
                        num_filters=num_filters,
                        has_se=has_se,
                        norm_momentum=norm_momentum,
                        norm_decay=norm_decay,
                        freeze_norm=freeze_norm,
                        multi_scale_output=False,
                        name=name + '_' + str(i + 1)))
            else:
                stage_func = self.add_sublayer(
                    "stage_{}_{}".format(name, i + 1),
                    HighResolutionModule(
                        num_channels=num_channels,
                        num_filters=num_filters,
                        has_se=has_se,
                        norm_momentum=norm_momentum,
                        norm_decay=norm_decay,
                        freeze_norm=freeze_norm,
                        name=name + '_' + str(i + 1)))

            self.stage_func_list.append(stage_func)

    def forward(self, input):
        out = input
        for idx in range(self._num_modules):
            out = self.stage_func_list[idx](out)
        return out


class HighResolutionModule(nn.Layer):
    def __init__(self,
                 num_channels,
                 num_filters,
                 has_se=False,
                 multi_scale_output=True,
                 norm_momentum=0.9,
                 norm_decay=0.,
                 freeze_norm=True,
                 name=None):
        super(HighResolutionModule, self).__init__()
        self.branches_func = Branches(
            block_num=4,
            in_channels=num_channels,
            out_channels=num_filters,
            has_se=has_se,
            norm_momentum=norm_momentum,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name=name)

        self.fuse_func = FuseLayers(
            in_channels=num_filters,
            out_channels=num_filters,
            multi_scale_output=multi_scale_output,
            norm_momentum=norm_momentum,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name=name)

    def forward(self, input):
        out = self.branches_func(input)
        out = self.fuse_func(out)
        return out


class FuseLayers(nn.Layer):
    def __init__(self,
                 in_channels,
                 out_channels,
                 multi_scale_output=True,
                 norm_momentum=0.9,
                 norm_decay=0.,
                 freeze_norm=True,
                 name=None):
        super(FuseLayers, self).__init__()

        self._actual_ch = len(in_channels) if multi_scale_output else 1
        self._in_channels = in_channels

        self.residual_func_list = []
        for i in range(self._actual_ch):
            for j in range(len(in_channels)):
                residual_func = None
                if j > i:
                    residual_func = self.add_sublayer(
                        "residual_{}_layer_{}_{}".format(name, i + 1, j + 1),
                        ConvNormLayer(
                            ch_in=in_channels[j],
                            ch_out=out_channels[i],
                            filter_size=1,
                            stride=1,
                            act=None,
                            norm_momentum=norm_momentum,
                            norm_decay=norm_decay,
                            freeze_norm=freeze_norm,
                            name=name + '_layer_' + str(i + 1) + '_' +
                            str(j + 1)))
                    self.residual_func_list.append(residual_func)
                elif j < i:
                    pre_num_filters = in_channels[j]
                    for k in range(i - j):
                        if k == i - j - 1:
                            residual_func = self.add_sublayer(
                                "residual_{}_layer_{}_{}_{}".format(
                                    name, i + 1, j + 1, k + 1),
                                ConvNormLayer(
                                    ch_in=pre_num_filters,
                                    ch_out=out_channels[i],
                                    filter_size=3,
                                    stride=2,
                                    norm_momentum=norm_momentum,
                                    norm_decay=norm_decay,
                                    freeze_norm=freeze_norm,
                                    act=None,
                                    name=name + '_layer_' + str(i + 1) + '_' +
                                    str(j + 1) + '_' + str(k + 1)))
                            pre_num_filters = out_channels[i]
                        else:
                            residual_func = self.add_sublayer(
                                "residual_{}_layer_{}_{}_{}".format(
                                    name, i + 1, j + 1, k + 1),
                                ConvNormLayer(
                                    ch_in=pre_num_filters,
                                    ch_out=out_channels[j],
                                    filter_size=3,
                                    stride=2,
                                    norm_momentum=norm_momentum,
                                    norm_decay=norm_decay,
                                    freeze_norm=freeze_norm,
                                    act="relu",
                                    name=name + '_layer_' + str(i + 1) + '_' +
                                    str(j + 1) + '_' + str(k + 1)))
                            pre_num_filters = out_channels[j]
                        self.residual_func_list.append(residual_func)

    def forward(self, input):
        outs = []
        residual_func_idx = 0
        for i in range(self._actual_ch):
            residual = input[i]
            for j in range(len(self._in_channels)):
                if j > i:
                    y = self.residual_func_list[residual_func_idx](input[j])
                    residual_func_idx += 1
                    y = F.interpolate(y, scale_factor=2**(j - i))
                    residual = paddle.add(x=residual, y=y)
                elif j < i:
                    y = input[j]
                    for k in range(i - j):
                        y = self.residual_func_list[residual_func_idx](y)
                        residual_func_idx += 1
                    residual = paddle.add(x=residual, y=y)
            residual = F.relu(residual)
            outs.append(residual)

        return outs


@register
class HRNet(nn.Layer):
    """
    HRNet, see https://arxiv.org/abs/1908.07919

    Args:
        width (int): the width of HRNet
        has_se (bool): whether to add SE block for each stage
        freeze_at (int): the stage to freeze
        freeze_norm (bool): whether to freeze norm in HRNet
        norm_momentum (float): momentum of BatchNorm
        norm_decay (float): weight decay for normalization layer weights
        return_idx (List): the stage to return
        upsample (bool): whether to upsample and concat the backbone feats
    """

    def __init__(self,
                 width=18,
                 has_se=False,
                 freeze_at=0,
                 freeze_norm=True,
                 norm_momentum=0.9,
                 norm_decay=0.,
                 return_idx=[0, 1, 2, 3],
                 upsample=False,
                 downsample=False):
        super(HRNet, self).__init__()

        self.width = width
        self.has_se = has_se
        if isinstance(return_idx, Integral):
            return_idx = [return_idx]

        assert len(return_idx) > 0, "need one or more return index"
        self.freeze_at = freeze_at
        self.return_idx = return_idx
        self.upsample = upsample
        self.downsample = downsample

        self.channels = {
            18: [[18, 36], [18, 36, 72], [18, 36, 72, 144]],
            30: [[30, 60], [30, 60, 120], [30, 60, 120, 240]],
            32: [[32, 64], [32, 64, 128], [32, 64, 128, 256]],
            40: [[40, 80], [40, 80, 160], [40, 80, 160, 320]],
            44: [[44, 88], [44, 88, 176], [44, 88, 176, 352]],
            48: [[48, 96], [48, 96, 192], [48, 96, 192, 384]],
            60: [[60, 120], [60, 120, 240], [60, 120, 240, 480]],
            64: [[64, 128], [64, 128, 256], [64, 128, 256, 512]]
        }

        channels_2, channels_3, channels_4 = self.channels[width]
        num_modules_2, num_modules_3, num_modules_4 = 1, 4, 3
        self._out_channels = [sum(channels_4)] if self.upsample else channels_4
        self._out_strides = [4] if self.upsample else [4, 8, 16, 32]

        self.conv_layer1_1 = ConvNormLayer(
            ch_in=3,
            ch_out=64,
            filter_size=3,
            stride=2,
            norm_momentum=norm_momentum,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            act='relu',
            name="layer1_1")

        self.conv_layer1_2 = ConvNormLayer(
            ch_in=64,
            ch_out=64,
            filter_size=3,
            stride=2,
            norm_momentum=norm_momentum,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            act='relu',
            name="layer1_2")

        self.la1 = Layer1(
            num_channels=64,
            has_se=has_se,
            norm_momentum=norm_momentum,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name="layer2")

        self.tr1 = TransitionLayer(
            in_channels=[256],
            out_channels=channels_2,
            norm_momentum=norm_momentum,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name="tr1")

        self.st2 = Stage(
            num_channels=channels_2,
            num_modules=num_modules_2,
            num_filters=channels_2,
            has_se=self.has_se,
            norm_momentum=norm_momentum,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name="st2")

        self.tr2 = TransitionLayer(
            in_channels=channels_2,
            out_channels=channels_3,
            norm_momentum=norm_momentum,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name="tr2")

        self.st3 = Stage(
            num_channels=channels_3,
            num_modules=num_modules_3,
            num_filters=channels_3,
            has_se=self.has_se,
            norm_momentum=norm_momentum,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name="st3")

        self.tr3 = TransitionLayer(
            in_channels=channels_3,
            out_channels=channels_4,
            norm_momentum=norm_momentum,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            name="tr3")
        self.st4 = Stage(
            num_channels=channels_4,
            num_modules=num_modules_4,
            num_filters=channels_4,
            has_se=self.has_se,
            norm_momentum=norm_momentum,
            norm_decay=norm_decay,
            freeze_norm=freeze_norm,
            multi_scale_output=len(return_idx) > 1,
            name="st4")

        if self.downsample:
            self.incre_modules, self.downsamp_modules, \
                self.final_layer = self._make_head(channels_4, norm_momentum=norm_momentum, has_se=self.has_se)

    def _make_layer(self,
                    block,
                    inplanes,
                    planes,
                    blocks,
                    stride=1,
                    norm_momentum=0.9,
                    has_se=False,
                    name=None):
        downsample = None
        if stride != 1 or inplanes != planes * 4:
            downsample = True

        layers = []
        layers.append(
            block(
                inplanes,
                planes,
                has_se,
                stride,
                downsample,
                norm_momentum=norm_momentum,
                freeze_norm=False,
                name=name + "_s0"))
        inplanes = planes * 4
        for i in range(1, blocks):
            layers.append(
                block(
                    inplanes,
                    planes,
                    has_se,
                    norm_momentum=norm_momentum,
                    freeze_norm=False,
                    name=name + "_s" + str(i)))

        return nn.Sequential(*layers)

    def _make_head(self, pre_stage_channels, norm_momentum=0.9, has_se=False):
        head_block = BottleneckBlock
        head_channels = [32, 64, 128, 256]

        # Increasing the #channels on each resolution 
        # from C, 2C, 4C, 8C to 128, 256, 512, 1024
        incre_modules = []
        for i, channels in enumerate(pre_stage_channels):
            incre_module = self._make_layer(
                head_block,
                channels,
                head_channels[i],
                1,
                stride=1,
                norm_momentum=norm_momentum,
                has_se=has_se,
                name='incre' + str(i))
            incre_modules.append(incre_module)
        incre_modules = nn.LayerList(incre_modules)

        # downsampling modules
        downsamp_modules = []
        for i in range(len(pre_stage_channels) - 1):
            in_channels = head_channels[i] * 4
            out_channels = head_channels[i + 1] * 4

            downsamp_module = nn.Sequential(
                nn.Conv2D(
                    in_channels=in_channels,
                    out_channels=out_channels,
                    kernel_size=3,
                    stride=2,
                    padding=1),
                nn.BatchNorm2D(
                    out_channels, momentum=norm_momentum),
                nn.ReLU())

            downsamp_modules.append(downsamp_module)
        downsamp_modules = nn.LayerList(downsamp_modules)

        final_layer = nn.Sequential(
            nn.Conv2D(
                in_channels=head_channels[3] * 4,
                out_channels=2048,
                kernel_size=1,
                stride=1,
                padding=0),
            nn.BatchNorm2D(
                2048, momentum=norm_momentum),
            nn.ReLU())

        return incre_modules, downsamp_modules, final_layer

    def forward(self, inputs):
        x = inputs['image']
        conv1 = self.conv_layer1_1(x)
        conv2 = self.conv_layer1_2(conv1)

        la1 = self.la1(conv2)
        tr1 = self.tr1([la1])
        st2 = self.st2(tr1)
        tr2 = self.tr2(st2)

        st3 = self.st3(tr2)
        tr3 = self.tr3(st3)

        st4 = self.st4(tr3)

        if self.upsample:
            # Upsampling
            x0_h, x0_w = st4[0].shape[2:4]
            x1 = F.upsample(st4[1], size=(x0_h, x0_w), mode='bilinear')
            x2 = F.upsample(st4[2], size=(x0_h, x0_w), mode='bilinear')
            x3 = F.upsample(st4[3], size=(x0_h, x0_w), mode='bilinear')
            x = paddle.concat([st4[0], x1, x2, x3], 1)
            return x

        if self.downsample:
            y = self.incre_modules[0](st4[0])
            for i in range(len(self.downsamp_modules)):
                y = self.incre_modules[i+1](st4[i+1]) + \
                            self.downsamp_modules[i](y)
            y = self.final_layer(y)
            return y

        res = []
        for i, layer in enumerate(st4):
            if i == self.freeze_at:
                layer.stop_gradient = True
            if i in self.return_idx:
                res.append(layer)

        return res

    @property
    def out_shape(self):
        if self.upsample:
            self.return_idx = [0]
        return [
            ShapeSpec(
                channels=self._out_channels[i], stride=self._out_strides[i])
            for i in self.return_idx
        ]