transformer_utils.py 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374
  1. # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import paddle
  15. import paddle.nn as nn
  16. from paddle.nn.initializer import TruncatedNormal, Constant, Assign
  17. # Common initializations
  18. ones_ = Constant(value=1.)
  19. zeros_ = Constant(value=0.)
  20. trunc_normal_ = TruncatedNormal(std=.02)
  21. # Common Layers
  22. def drop_path(x, drop_prob=0., training=False):
  23. """
  24. Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
  25. the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
  26. See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ...
  27. """
  28. if drop_prob == 0. or not training:
  29. return x
  30. keep_prob = paddle.to_tensor(1 - drop_prob)
  31. shape = (paddle.shape(x)[0], ) + (1, ) * (x.ndim - 1)
  32. random_tensor = keep_prob + paddle.rand(shape, dtype=x.dtype)
  33. random_tensor = paddle.floor(random_tensor) # binarize
  34. output = x.divide(keep_prob) * random_tensor
  35. return output
  36. class DropPath(nn.Layer):
  37. def __init__(self, drop_prob=None):
  38. super(DropPath, self).__init__()
  39. self.drop_prob = drop_prob
  40. def forward(self, x):
  41. return drop_path(x, self.drop_prob, self.training)
  42. class Identity(nn.Layer):
  43. def __init__(self):
  44. super(Identity, self).__init__()
  45. def forward(self, input):
  46. return input
  47. # common funcs
  48. def to_2tuple(x):
  49. if isinstance(x, (list, tuple)):
  50. return x
  51. return tuple([x] * 2)
  52. def add_parameter(layer, datas, name=None):
  53. parameter = layer.create_parameter(
  54. shape=(datas.shape), default_initializer=Assign(datas))
  55. if name:
  56. layer.add_parameter(name, parameter)
  57. return parameter