From 3001c0344cb09ad99407b12fe5f01770dea294b7 Mon Sep 17 00:00:00 2001 From: yide12 Date: Thu, 23 Feb 2023 11:05:42 +0800 Subject: [PATCH] change_dropout_keep_prob_to_p_master --- .../api_python/nn/mindspore.nn.Dropout.rst | 17 ++--- .../export_models/models/NetworkInNetwork.py | 4 +- .../export_models/models/mini_alexnet.py | 2 +- .../models/xception_train_export.py | 2 +- mindspore/python/mindspore/nn/layer/basic.py | 64 ++++++++++--------- mindspore/python/mindspore/nn/layer/rnns.py | 2 +- .../python/mindspore/nn/layer/transformer.py | 14 ++-- .../parallel/_transformer/transformer.py | 20 +++--- .../apps/bert_attention_submodules.py | 2 +- .../st/dynamic_shape/test_dcn_dynamic_gpu.py | 2 +- tests/st/dynamic_shape/test_dynamic_asr.py | 12 ++-- .../test_dynamic_wenet_ascend.py | 10 +-- .../transformer/transformer_model.py | 8 +-- tests/st/export_and_load/test_bgcf.py | 4 +- tests/st/ge/ge_pass/pass_dropout.py | 2 +- tests/st/gnn/aggregator.py | 8 +-- tests/st/gnn/gnn_csr/test_appnp_csr.py | 5 +- tests/st/gnn/gnn_csr/test_gat_csr.py | 4 +- tests/st/gnn/gnn_csr/test_gcn_csr.py | 2 +- tests/st/model_zoo_tests/DeepFM/src/deepfm.py | 2 +- .../bert/bert_performance/src/bert_model.py | 6 +- .../st/networks/models/bert/src/bert_model.py | 6 +- .../models/deeplabv3/src/deeplabv3.py | 2 +- .../networks/models/mlp/test_mlp_cell_attr.py | 2 +- tests/st/ops/ascend/test_drop_out_gen_mask.py | 2 +- tests/st/ops/ascend/test_dynamic_ops.py | 2 +- .../ops/graph_kernel/test_layernorm_stitch.py | 2 +- .../ops/graph_kernel/test_softmax_stitch.py | 2 +- tests/ut/python/ops/test_nn_ops.py | 2 +- tests/ut/python/ops/test_ops.py | 2 +- tests/ut/python/ops/test_ops_reid.py | 2 +- .../test_auto_parallel_matmul_drop.py | 2 +- ..._parallel_pangu_alpha_shard_propagation.py | 10 +-- .../parallel/test_batch_parallel_dropout.py | 2 +- tests/ut/python/parallel/test_conformer.py | 8 +-- .../python/pynative_mode/nn/test_dropout.py | 4 +- 36 files changed, 124 insertions(+), 118 deletions(-) diff --git a/docs/api/api_python/nn/mindspore.nn.Dropout.rst b/docs/api/api_python/nn/mindspore.nn.Dropout.rst index 5a4c57041fe..22560789bd4 100644 --- a/docs/api/api_python/nn/mindspore.nn.Dropout.rst +++ b/docs/api/api_python/nn/mindspore.nn.Dropout.rst @@ -1,21 +1,21 @@ mindspore.nn.Dropout ==================== -.. py:class:: mindspore.nn.Dropout(keep_prob=0.5, dtype=mstype.float32) +.. py:class:: mindspore.nn.Dropout(keep_prob=0.5, p=None) 随机丢弃层。 - Dropout是一种正则化手段,该算子根据丢弃概率 :math:`1 - keep\_prob`,在训练过程中随机将一些神经元输出设置为0,通过阻止神经元节点间的相关性来减少过拟合。在推理过程中,此层返回与 `x` 相同的Tensor。 + Dropout是一种正则化手段,该算子根据丢弃概率 `p` ,在训练过程中随机将一些神经元输出设置为0,通过阻止神经元节点间的相关性来减少过拟合。在推理过程中,此层返回与 `x` 相同的Tensor。 论文 `Dropout: A Simple Way to Prevent Neural Networks from Overfitting `_ 中提出了该技术,并证明其能有效地减少过度拟合,防止神经元共适应。更多详细信息,请参见 `Improving neural networks by preventing co-adaptation of feature detectors `_ 。 .. note:: - 训练过程中每步对同一通道(或神经元)独立进行丢弃。 - `dtype` 参数会在未来版本删除。不建议使用这个参数。 + - 训练过程中每步对同一通道(或神经元)独立进行丢弃。 + - `keep_prob` 参数会在未来版本删除,请使用 `p` 参数代替它。`p` 表示输入Tensor中元素设置成0的概率。 参数: - - **keep_prob** (float) - 输入神经元保留率,数值范围在0到1之间。例如,rate=0.9,删除10%的神经元。默认值:0.5。 - - **dtype** (:class:`mindspore.dtype`) - `x` 的数据类型。默认值:mstype.float32。 + - **keep_prob** (float) - 废弃。输入神经元保留率,数值范围介于(0, 1]之间。例如,`keep_prob` =0.9,删除10%的神经元。默认值:0.5。 + - **p** (Union(float, int, None)) - 输入神经元丢弃率,数值范围介于[0, 1)之间。例如,`p` =0.9,删除90%的神经元。默认值:None。 输入: - **x** (Tensor) - Dropout的输入,任意维度的Tensor。数据类型必须为float16或float32。 @@ -25,7 +25,8 @@ mindspore.nn.Dropout 异常: - **TypeError** - `keep_prob` 不是浮点数。 + - **TypeError** - `p` 数据类型不是float或int。 - **TypeError** - `x` 的dtype既不是float16也不是float32。 - - **ValueError** - `keep_prob` 不在范围(0, 1]内。 + - **ValueError** - `keep_prob` 不在范围(0, 1]之间。 + - **ValueError** - `p` 不在范围[0, 1)之间。 - **ValueError** - `x` 的shape长度小于1。 - diff --git a/mindspore/lite/examples/export_models/models/NetworkInNetwork.py b/mindspore/lite/examples/export_models/models/NetworkInNetwork.py index f4dfdcf778e..ee084802cb8 100644 --- a/mindspore/lite/examples/export_models/models/NetworkInNetwork.py +++ b/mindspore/lite/examples/export_models/models/NetworkInNetwork.py @@ -35,7 +35,7 @@ class NiN(nn.Cell): nn.Conv2d(in_channels=160, out_channels=96, kernel_size=1, stride=1, has_bias=True), nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2, pad_mode='same'), - nn.Dropout(1.0) + nn.Dropout(p=0.0) ) self.block1 = nn.SequentialCell( # block 1 @@ -46,7 +46,7 @@ class NiN(nn.Cell): nn.Conv2d(in_channels=192, out_channels=192, kernel_size=1, stride=1, has_bias=True), nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2, pad_mode='same'), - nn.Dropout(1.0) + nn.Dropout(p=0.0) ) self.block2 = nn.SequentialCell( # block 2 diff --git a/mindspore/lite/examples/export_models/models/mini_alexnet.py b/mindspore/lite/examples/export_models/models/mini_alexnet.py index e6008fa714c..1644f000e0d 100644 --- a/mindspore/lite/examples/export_models/models/mini_alexnet.py +++ b/mindspore/lite/examples/export_models/models/mini_alexnet.py @@ -46,7 +46,7 @@ class AlexNet(nn.Cell): self.fc1 = fc_with_initialize(20*3*3, 1024) self.fc2 = fc_with_initialize(1024, 1024) self.fc3 = fc_with_initialize(1024, num_classes) - self.dropout = nn.Dropout(dropout_ratio) + self.dropout = nn.Dropout(p=1-dropout_ratio) def construct(self, x): """define network""" diff --git a/mindspore/lite/examples/export_models/models/xception_train_export.py b/mindspore/lite/examples/export_models/models/xception_train_export.py index 09b9490c5b6..f8507f60ebe 100644 --- a/mindspore/lite/examples/export_models/models/xception_train_export.py +++ b/mindspore/lite/examples/export_models/models/xception_train_export.py @@ -26,7 +26,7 @@ context.set_context(mode=context.GRAPH_MODE, device_target="GPU", save_graphs=Fa n = Xception(num_classes=1000) -n.dropout = nn.Dropout(keep_prob=1.0) +n.dropout = nn.Dropout(p=0.0) loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=False) optimizer = nn.SGD(n.trainable_params(), learning_rate=0.01, momentum=0.9, dampening=0.0, weight_decay=0.0, diff --git a/mindspore/python/mindspore/nn/layer/basic.py b/mindspore/python/mindspore/nn/layer/basic.py index a2f6c6b2672..ccebe258e4e 100644 --- a/mindspore/python/mindspore/nn/layer/basic.py +++ b/mindspore/python/mindspore/nn/layer/basic.py @@ -20,7 +20,8 @@ import math import numpy as np import mindspore.common.dtype as mstype -from mindspore import context, log as logger +from mindspore import context +from mindspore.log import logging from mindspore.ops.composite.multitype_ops import _constexpr_utils as const_utils from mindspore.common.seed import _get_graph_seed from mindspore.common.tensor import Tensor @@ -107,7 +108,7 @@ class Dropout(Cell): r""" Dropout layer for the input. - Randomly set some elements of the input tensor to zero with probability :math:`1 - keep\_prob` during training + Randomly set some elements of the input tensor to zero with probability `p` during training using samples from a Bernoulli distribution. The outputs are scaled by a factor of :math:`\frac{1}{keep\_prob}` during training so @@ -121,13 +122,15 @@ class Dropout(Cell): `_. Note: - Each channel will be zeroed out independently on every construct call. - Parameter `dtype` will be removed in a future version. It is not recommended to define this parameter. + - Each channel will be zeroed out independently on every construct call. + - Parameter `keep_prob` will be removed in a future version, please use parameter `p` instead. + Parameter `p` means the probability of the element of the input tensor to be zeroed. Args: - keep_prob (float): The keep rate, greater than 0 and less equal than 1. E.g. rate=0.9, - dropping out 10% of input units. Default: 0.5. - dtype (:class:`mindspore.dtype`): Data type of `x`. Default: mindspore.float32. + keep_prob (float): Deprecated. The keep rate, greater than 0 and less equal than 1. + E.g. rate=0.9, dropping out 10% of input neurons. Default: 0.5. + p (Union(float, int, None)): The dropout rate, greater than or equal to 0 and less than 1. + E.g. rate=0.9, dropping out 90% of input neurons. Default: None. Inputs: - **x** (Tensor) - The input of Dropout with data type of float16 or float32. @@ -138,8 +141,10 @@ class Dropout(Cell): Raises: TypeError: If `keep_prob` is not a float. + TypeError: If the dtype of `p` is not float or int. TypeError: If dtype of `x` is not neither float16 nor float32. ValueError: If `keep_prob` is not in range (0, 1]. + ValueError: If `p` is not in range [0, 1). ValueError: If length of shape of `x` is less than 1. Supported Platforms: @@ -147,45 +152,46 @@ class Dropout(Cell): Examples: >>> x = Tensor(np.ones([2, 2, 3]), mindspore.float32) - >>> net = nn.Dropout(keep_prob=0.8) + >>> net = nn.Dropout(p=0.2) >>> net.set_train() - Dropout >>> output = net(x) >>> print(output.shape) (2, 2, 3) """ - def __init__(self, keep_prob=0.5, dtype=mstype.float32): + def __init__(self, keep_prob=0.5, p=None): """Initialize Dropout.""" super(Dropout, self).__init__() - Validator.check_value_type('keep_prob', keep_prob, [ - float], self.cls_name) - if keep_prob <= 0 or keep_prob > 1: - raise ValueError(f"For '{self.cls_name}', the 'keep_prob' must be a number in range (0, 1], " - f"but got {keep_prob}.") - Validator.check_subclass( - "dtype", dtype, mstype.number_type, self.cls_name) - if dtype != mstype.float32: - logger.info( - "This parameter `dtype` will be deleted or invisible in the future. Please don't use it.") + if p is None: + logging.warning("This parameter `keep_prob` will be deprecated, please use `p` instead.") + Validator.check_value_type('keep_prob', keep_prob, [float], self.cls_name) + if keep_prob <= 0 or keep_prob > 1: + raise ValueError(f"For '{self.cls_name}', the 'keep_prob' must be a number in range (0, 1], " + f"but got {keep_prob}.") + seed0, seed1 = _get_graph_seed(0, "dropout") + self.dropout = P.Dropout(keep_prob, seed0, seed1) + else: + Validator.check_value_type('p', p, [float, int], self.cls_name) + if p < 0 or p >= 1: + raise ValueError(f"For '{self.cls_name}', the 'p' must be a number in range [0, 1), " + f"but got {p}.") + seed0, seed1 = _get_graph_seed(0, "dropout") + self.dropout = P.Dropout(1.0 - p, seed0, seed1) + self.p = p self.keep_prob = keep_prob - seed0, seed1 = _get_graph_seed(0, "dropout") - self.seed0 = seed0 - self.seed1 = seed1 - self.dropout = P.Dropout(keep_prob, seed0, seed1) def construct(self, x): - if not self.training: - return x - - if self.keep_prob == 1: + if not self.training or self.keep_prob == 1 or self.p == 0: return x out, _ = self.dropout(x) return out def extend_repr(self): - return 'keep_prob={}'.format(self.keep_prob) + if self.p is None: + logging.warning("This parameter `keep_prob` will be deprecated, please use `p` instead.") + return f'keep_prob={self.keep_prob}' + return f'p={self.p}' class Dropout1d(Cell): diff --git a/mindspore/python/mindspore/nn/layer/rnns.py b/mindspore/python/mindspore/nn/layer/rnns.py index 98c8eca04bd..a56f2483217 100644 --- a/mindspore/python/mindspore/nn/layer/rnns.py +++ b/mindspore/python/mindspore/nn/layer/rnns.py @@ -408,7 +408,7 @@ class _RNNBase(Cell): self.batch_first = batch_first self.num_layers = num_layers self.dropout = dropout - self.dropout_op = nn.Dropout(float(1 - dropout)) + self.dropout_op = nn.Dropout(p=float(dropout)) self.bidirectional = bidirectional self.has_bias = has_bias num_directions = 2 if bidirectional else 1 diff --git a/mindspore/python/mindspore/nn/layer/transformer.py b/mindspore/python/mindspore/nn/layer/transformer.py index b226de14c70..a9614388757 100644 --- a/mindspore/python/mindspore/nn/layer/transformer.py +++ b/mindspore/python/mindspore/nn/layer/transformer.py @@ -273,14 +273,14 @@ class TransformerEncoderLayer(Cell): self.self_attn = MultiheadAttention(d_model, nhead, dropout=dropout, batch_first=batch_first) # Implementation of Feedforward model self.linear1 = _Linear(d_model, dim_feedforward) - self.dropout = Dropout(1-dropout) + self.dropout = Dropout(p=dropout) self.linear2 = _Linear(dim_feedforward, d_model) self.norm_first = norm_first self.norm1 = LayerNorm((d_model,), epsilon=layer_norm_eps) self.norm2 = LayerNorm((d_model,), epsilon=layer_norm_eps) - self.dropout1 = Dropout(1-dropout) - self.dropout2 = Dropout(1-dropout) + self.dropout1 = Dropout(p=dropout) + self.dropout2 = Dropout(p=dropout) # Legacy string support for activation function. if isinstance(activation, str): @@ -380,16 +380,16 @@ class TransformerDecoderLayer(Cell): self.multihead_attn = MultiheadAttention(d_model, nhead, dropout=dropout, batch_first=batch_first) # Implementation of Feedforward model self.linear1 = _Linear(d_model, dim_feedforward) - self.dropout = Dropout(1-dropout) + self.dropout = Dropout(p=dropout) self.linear2 = _Linear(dim_feedforward, d_model) self.norm_first = norm_first self.norm1 = LayerNorm((d_model,), epsilon=layer_norm_eps) self.norm2 = LayerNorm((d_model,), epsilon=layer_norm_eps) self.norm3 = LayerNorm((d_model,), epsilon=layer_norm_eps) - self.dropout1 = Dropout(1-dropout) - self.dropout2 = Dropout(1-dropout) - self.dropout3 = Dropout(1-dropout) + self.dropout1 = Dropout(p=dropout) + self.dropout2 = Dropout(p=dropout) + self.dropout3 = Dropout(p=dropout) # Legacy string support for activation function. if isinstance(activation, str): diff --git a/mindspore/python/mindspore/parallel/_transformer/transformer.py b/mindspore/python/mindspore/parallel/_transformer/transformer.py index 6f95926f579..33af068fa6d 100644 --- a/mindspore/python/mindspore/parallel/_transformer/transformer.py +++ b/mindspore/python/mindspore/parallel/_transformer/transformer.py @@ -496,9 +496,9 @@ class FeedForward(Cell): else: self.projection.shard(strategy_matmul=((dp, mp), (mp, 1))) self.projection.bias.parallel_optimizer = False - self.dropout = nn.Dropout(1 - dropout_rate) - self.dropout_3d = nn.Dropout(1 - dropout_rate) - self.dropout_4d = nn.Dropout(1 - dropout_rate) + self.dropout = nn.Dropout(p=dropout_rate) + self.dropout_3d = nn.Dropout(p=dropout_rate) + self.dropout_4d = nn.Dropout(p=dropout_rate) self.cast = P.Cast() else: _check_config(parallel_config) @@ -556,11 +556,11 @@ class FeedForward(Cell): self.projection.shard(strategy_matmul=((dp, mp), (mp, 1)), strategy_bias=((dp, 1), (1,))) self.projection.bias.parallel_optimizer = False - self.dropout = nn.Dropout(1 - dropout_rate) + self.dropout = nn.Dropout(p=dropout_rate) self.dropout.dropout.shard(((dp, 1),)) - self.dropout_3d = nn.Dropout(1 - dropout_rate) + self.dropout_3d = nn.Dropout(p=dropout_rate) self.dropout_3d.dropout.shard(((dp, 1, 1),)) - self.dropout_4d = nn.Dropout(1 - dropout_rate) + self.dropout_4d = nn.Dropout(p=dropout_rate) self.dropout_4d.dropout.shard(((dp, ep, 1, 1),)) self.cast = P.Cast() @@ -950,8 +950,8 @@ class MultiHeadAttention(Cell): # Normalize factor for attention, sqrt(dk) as widely used self.scale_factor = Tensor(math.sqrt(math.sqrt(self.size_per_head))) self.use_past = use_past - self.dropout = nn.Dropout(1 - hidden_dropout_rate) - self.prob_dropout = nn.Dropout(1 - attention_dropout_rate) + self.dropout = nn.Dropout(p=hidden_dropout_rate) + self.prob_dropout = nn.Dropout(p=attention_dropout_rate) self.softmax = nn.Softmax().to_float(softmax_compute_type) self.softmax_3d = nn.Softmax().to_float(softmax_compute_type) self.expand_dims = P.ExpandDims() @@ -1051,9 +1051,9 @@ class MultiHeadAttention(Cell): # Normalize factor for attention, sqrt(dk) as widely used self.scale_factor = Tensor(math.sqrt(math.sqrt(self.size_per_head))) self.use_past = use_past - self.dropout = nn.Dropout(1 - hidden_dropout_rate) + self.dropout = nn.Dropout(p=hidden_dropout_rate) self.dropout.dropout.shard(((parallel_config.data_parallel, 1),)) - self.prob_dropout = nn.Dropout(1 - attention_dropout_rate) + self.prob_dropout = nn.Dropout(p=attention_dropout_rate) self.prob_dropout.dropout.shard( ((parallel_config.data_parallel, parallel_config.model_parallel, 1, 1),)) self.softmax = nn.Softmax().to_float(softmax_compute_type) diff --git a/tests/mindspore_test_framework/apps/bert_attention_submodules.py b/tests/mindspore_test_framework/apps/bert_attention_submodules.py index c8320c660c7..fa1c2bf0a1c 100644 --- a/tests/mindspore_test_framework/apps/bert_attention_submodules.py +++ b/tests/mindspore_test_framework/apps/bert_attention_submodules.py @@ -251,7 +251,7 @@ class BertAttentionSoftmax(nn.Cell): self.weight = TruncatedNormal(initializer_range) self.softmax = nn.Softmax() - self.dropout = nn.Dropout(1 - attention_probs_dropout_prob) + self.dropout = nn.Dropout(p=attention_probs_dropout_prob) self.transpose = P.Transpose() self.value_layer = nn.Dense(self.to_tensor_width, diff --git a/tests/st/dynamic_shape/test_dcn_dynamic_gpu.py b/tests/st/dynamic_shape/test_dcn_dynamic_gpu.py index fd336139e28..8f0635a4b9e 100644 --- a/tests/st/dynamic_shape/test_dcn_dynamic_gpu.py +++ b/tests/st/dynamic_shape/test_dcn_dynamic_gpu.py @@ -70,7 +70,7 @@ class DNN(nn.Cell): dense_layer = nn.Dense(in_channels=self.hidden_units[i], out_channels=self.hidden_units[i + 1], activation=self.activation, weight_init="heUniform") dense_layers.append(dense_layer) - drop_layer = nn.Dropout(1.0 - self.dropout_rate) + drop_layer = nn.Dropout(p=self.dropout_rate) drop_layers.append(drop_layer) self.dense_layers = nn.CellList(dense_layers) self.drop_layers = nn.CellList(drop_layers) diff --git a/tests/st/dynamic_shape/test_dynamic_asr.py b/tests/st/dynamic_shape/test_dynamic_asr.py index e938b96a4d7..79088d83754 100644 --- a/tests/st/dynamic_shape/test_dynamic_asr.py +++ b/tests/st/dynamic_shape/test_dynamic_asr.py @@ -91,7 +91,7 @@ class MultiheadAttention(nn.Cell): self.matmul = P.BatchMatMul() self.softmax = nn.Softmax() - self.dropout = nn.Dropout(1 - attention_probs_dropout_prob) + self.dropout = nn.Dropout(p=attention_probs_dropout_prob) self.sub = P.Sub() self.add = P.TensorAdd() self.cast = P.Cast() @@ -192,7 +192,7 @@ class ResidualNorm(nn.Cell): def __init__(self, size, dropout_prob=0.1): super(ResidualNorm, self).__init__() - self.dropout = nn.Dropout(1 - dropout_prob) + self.dropout = nn.Dropout(p=dropout_prob) self.add = P.TensorAdd() self.layernorm = nn.LayerNorm([size]) self.out_shape = (-1, size) @@ -213,7 +213,7 @@ class FeedForward(nn.Cell): def __init__(self, attention_size, intermediate_size, hidden_act, hidden_dropout_prob): super(FeedForward, self).__init__() - self.dropout = nn.Dropout(1 - hidden_dropout_prob) + self.dropout = nn.Dropout(p=hidden_dropout_prob) self.linear1 = CustomDense(in_channels=attention_size, out_channels=intermediate_size, activation=hidden_act, @@ -303,7 +303,7 @@ class EncoderCell(nn.Cell): has_attention_mask=has_attention_mask, compute_type=compute_type) - self.dropout = nn.Dropout(1 - hidden_dropout_prob) + self.dropout = nn.Dropout(p=hidden_dropout_prob) self.intermediate = CustomDense(in_channels=size, out_channels=intermediate_size, activation=hidden_act, weight_init="zeros") self.res_norm = ResidualNorm(size, dropout_prob=hidden_dropout_prob) @@ -345,7 +345,7 @@ class PositionalEncoding(nn.Cell): super(PositionalEncoding, self).__init__() xscale = math.sqrt(dim) - self.dropout = nn.Dropout(1 - dropout_rate) + self.dropout = nn.Dropout(p=dropout_rate) self.mul = P.Mul() self.add = P.TensorAdd() self.shape = P.Shape() @@ -593,7 +593,7 @@ class CTC(nn.Cell): self.reshape = P.Reshape() self.adim = adim self.odim = odim - self.dropout = nn.Dropout(1 - dropout_prob) + self.dropout = nn.Dropout(p=dropout_prob) self.cast = P.Cast() self.not_equal = P.NotEqual() self.ignore_id = ignore_id diff --git a/tests/st/dynamic_shape/test_dynamic_wenet_ascend.py b/tests/st/dynamic_shape/test_dynamic_wenet_ascend.py index bf8b1292bbb..0298fe4c4f6 100644 --- a/tests/st/dynamic_shape/test_dynamic_wenet_ascend.py +++ b/tests/st/dynamic_shape/test_dynamic_wenet_ascend.py @@ -708,7 +708,7 @@ class TransformerEncoderLayer(nn.Cell): self.feed_forward = feed_forward self.norm1 = CustomLayerNorm(size, epsilon=1e-5) self.norm2 = CustomLayerNorm(size, epsilon=1e-5) - self.dropout = nn.Dropout(keep_prob=1 - dropout_rate) + self.dropout = nn.Dropout(p=dropout_rate) self.normalize_before = normalize_before self.concat_after = concat_after if self.concat_after: @@ -979,7 +979,7 @@ class DecoderLayer(nn.Cell): self.norm1 = CustomLayerNorm(size, epsilon=1e-12) self.norm2 = CustomLayerNorm(size, epsilon=1e-12) self.norm3 = CustomLayerNorm(size, epsilon=1e-12) - self.dropout = nn.Dropout(keep_prob=1.0 - dropout_rate) + self.dropout = nn.Dropout(p=dropout_rate) self.normalize_before = normalize_before self.concat_after = concat_after if self.concat_after: @@ -1216,7 +1216,7 @@ class PositionwiseFeedForward(nn.Cell): super(PositionwiseFeedForward, self).__init__() self.w_1 = Dense(idim, hidden_units).to_float(compute_type) self.activation = activation - self.dropout = nn.Dropout(1 - dropout_rate) + self.dropout = nn.Dropout(p=dropout_rate) self.w_2 = Dense(hidden_units, idim).to_float(compute_type) def construct(self, xs): @@ -1318,7 +1318,7 @@ class PositionalEncoding(nn.Cell): super().__init__() self.d_model = d_model self.xscale = Tensor([math.sqrt(self.d_model)], dtype=mstype.float32) - self.dropout = nn.Dropout(1 - dropout_rate) + self.dropout = nn.Dropout(p=dropout_rate) self.max_len = max_len self.pe = np.zeros((self.max_len, self.d_model)) @@ -1399,7 +1399,7 @@ class MultiHeadedAttention(nn.Cell): self.linear_k = Dense(n_feat, n_feat).to_float(compute_type) self.linear_v = Dense(n_feat, n_feat).to_float(compute_type) self.linear_out = Dense(n_feat, n_feat).to_float(compute_type) - self.dropout = nn.Dropout(keep_prob=1 - dropout_rate) + self.dropout = nn.Dropout(p=dropout_rate) self.softmax = nn.Softmax() self.expand_dims = ops.ExpandDims() diff --git a/tests/st/dynamic_shape/transformer/transformer_model.py b/tests/st/dynamic_shape/transformer/transformer_model.py index c39cd5f2ba6..148d4505653 100644 --- a/tests/st/dynamic_shape/transformer/transformer_model.py +++ b/tests/st/dynamic_shape/transformer/transformer_model.py @@ -373,7 +373,7 @@ class EmbeddingPostprocessor(nn.Cell): self.scores_mul = Tensor([math.sqrt(float(embedding_size))], dtype=ms.float32) self.multiply = ops.Mul() self.add = ops.Add() - self.dropout = nn.Dropout(1 - dropout_prob, dtype=ms.float32) + self.dropout = nn.Dropout(p=dropout_prob) self.use_dropout = dropout_prob > 0 self.expand_dims = ops.ExpandDims() self.position_embedding_table = Tensor(position_encoding(max_position_embeddings, embedding_size), @@ -436,7 +436,7 @@ class LayerPostprocess(nn.Cell): dropout_prob=0.1): super(LayerPostprocess, self).__init__() self.add = ops.Add() - self.dropout = nn.Dropout(1 - dropout_prob) + self.dropout = nn.Dropout(p=dropout_prob) self.use_dropout = dropout_prob > 0 def construct(self, hidden_tensor, input_tensor): @@ -535,7 +535,7 @@ class MultiheadAttention(nn.Cell): self.matmul = ops.BatchMatMul() self.softmax = nn.Softmax() - self.dropout = nn.Dropout(1 - attention_probs_dropout_prob) + self.dropout = nn.Dropout(p=attention_probs_dropout_prob) self.use_dropout = attention_probs_dropout_prob > 0 if self.has_attention_mask: @@ -704,7 +704,7 @@ class FeedForward(nn.Cell): self.reshape = ops.Reshape() self.shape = (-1, in_channels) - self.dropout = nn.Dropout(1 - hidden_dropout_prob) + self.dropout = nn.Dropout(p=hidden_dropout_prob) self.use_dropout = hidden_dropout_prob > 0 def construct(self, input_tensor): diff --git a/tests/st/export_and_load/test_bgcf.py b/tests/st/export_and_load/test_bgcf.py index 9ba17bb830a..aef92731a89 100644 --- a/tests/st/export_and_load/test_bgcf.py +++ b/tests/st/export_and_load/test_bgcf.py @@ -46,7 +46,7 @@ class MeanConv(nn.Cell): self.matmul = P.MatMul() self.concat = P.Concat(axis=1) self.reduce_mean = P.ReduceMean(keep_dims=False) - self.dropout = nn.Dropout(keep_prob=1 - dropout) + self.dropout = nn.Dropout(p=dropout) def construct(self, self_feature, neigh_feature): neigh_matrix = self.reduce_mean(neigh_feature, 1) @@ -72,7 +72,7 @@ class AttenConv(nn.Cell): self.matmul = P.MatMul() self.matmul_3 = P.BatchMatMul() self.matmul_t = P.BatchMatMul(transpose_b=True) - self.dropout = nn.Dropout(keep_prob=1 - dropout) + self.dropout = nn.Dropout(p=dropout) def construct(self, self_feature, neigh_feature): query = self.expanddims(self_feature, 1) diff --git a/tests/st/ge/ge_pass/pass_dropout.py b/tests/st/ge/ge_pass/pass_dropout.py index c9d9d854171..d37a1a24f57 100644 --- a/tests/st/ge/ge_pass/pass_dropout.py +++ b/tests/st/ge/ge_pass/pass_dropout.py @@ -23,7 +23,7 @@ from mindspore.ops.composite import GradOperation class DropoutNet(nn.Cell): def __init__(self, keep_prob): super(DropoutNet, self).__init__() - self.drop = nn.Dropout(keep_prob) + self.drop = nn.Dropout(p=1.0 - keep_prob) self.relu = ops.ReLU() def construct(self, x): diff --git a/tests/st/gnn/aggregator.py b/tests/st/gnn/aggregator.py index 9f81c616fa2..cdc53065394 100644 --- a/tests/st/gnn/aggregator.py +++ b/tests/st/gnn/aggregator.py @@ -160,7 +160,7 @@ class _BaseAggregator(nn.Cell): has_bias=self.has_bias) self.dropout_ratio = dropout_ratio if self.dropout_ratio is not None: - self.dropout = nn.Dropout(keep_prob=self.dropout_ratio) + self.dropout = nn.Dropout(p=1.0 - self.dropout_ratio) self.dropout_flag = self.dropout_ratio is not None self.activation = get_activation(activation) self.activation_flag = self.activation is not None @@ -263,8 +263,8 @@ class AttentionHead(nn.Cell): self.in_channel = Validator.check_positive_int(in_channel) self.out_channel = Validator.check_positive_int(out_channel) self.in_drop_ratio = in_drop_ratio - self.in_drop = nn.Dropout(keep_prob=1 - in_drop_ratio) - self.in_drop_2 = nn.Dropout(keep_prob=1 - in_drop_ratio) + self.in_drop = nn.Dropout(p=in_drop_ratio) + self.in_drop_2 = nn.Dropout(p=in_drop_ratio) self.feature_transform = GNNFeatureTransform( in_channels=self.in_channel, out_channels=self.out_channel, @@ -278,7 +278,7 @@ class AttentionHead(nn.Cell): out_channels=1) self.softmax = nn.Softmax() - self.coef_drop = nn.Dropout(keep_prob=1 - coef_drop_ratio) + self.coef_drop = nn.Dropout(p=coef_drop_ratio) self.batch_matmul = P.BatchMatMul() self.bias_add = P.BiasAdd() self.bias = Parameter(initializer('zeros', self.out_channel), name='bias') diff --git a/tests/st/gnn/gnn_csr/test_appnp_csr.py b/tests/st/gnn/gnn_csr/test_appnp_csr.py index 3f056debea4..ec76e263a8d 100644 --- a/tests/st/gnn/gnn_csr/test_appnp_csr.py +++ b/tests/st/gnn/gnn_csr/test_appnp_csr.py @@ -23,7 +23,6 @@ import mindspore.context as context from gnngraph_dataset import GraphDataset, GatherNet, CSRReduceSumNet - DATASET_PATH = "/home/workspace/mindspore_dataset/cora/cora_mr/cora_v2_with_mask.npz" FEAT_DROPOUT = 0.5 EDGE_DROPOUT = 0.5 @@ -47,7 +46,7 @@ class APPNPConv(ms.nn.Cell): super().__init__() self.k_ = k self.alpha_ = alpha - self.edge_drop = ms.nn.Dropout(edge_drop) + self.edge_drop = ms.nn.Dropout(p=1.0 - edge_drop) self.min_clip = Tensor(1, ms.int32) self.max_clip = Tensor(10000000, ms.int32) self.gather = GatherNet(indptr_backward, indices_backward) @@ -86,7 +85,7 @@ class APPNPNet(nn.Cell): self.fc0 = nn.Dense(in_feats, hidden_dim, weight_init=XavierUniform()) self.fc1 = nn.Dense(hidden_dim, n_classes, weight_init=XavierUniform()) self.act = activation() - self.feat_drop = nn.Dropout(feat_dropout) + self.feat_drop = nn.Dropout(p=1.0 - feat_dropout) self.propagate = APPNPConv(k, alpha, edge_dropout, indptr_backward, indices_backward) def construct(self, x, in_deg, out_deg, n_nodes, indptr, indices): diff --git a/tests/st/gnn/gnn_csr/test_gat_csr.py b/tests/st/gnn/gnn_csr/test_gat_csr.py index 4538686020b..6a583edf23c 100644 --- a/tests/st/gnn/gnn_csr/test_gat_csr.py +++ b/tests/st/gnn/gnn_csr/test_gat_csr.py @@ -66,8 +66,8 @@ class GATConv(ms.nn.Cell): self.attn_d = ms.Parameter(initializer(XavierUniform(gain), [num_attn_head, out_size], ms.float32), name="attn_d") self.bias = ms.Parameter(initializer('zero', [num_attn_head, out_size], ms.float32), name='bias') - self.feat_drop = ms.nn.Dropout(input_drop_out_rate) - self.attn_drop = ms.nn.Dropout(attn_drop_out_rate) + self.feat_drop = ms.nn.Dropout(p=1.0 - input_drop_out_rate) + self.attn_drop = ms.nn.Dropout(p=1.0 - attn_drop_out_rate) self.leaky_relu = ms.nn.LeakyReLU(leaky_relu_slope) self.exp = ms.ops.Exp() if add_norm: diff --git a/tests/st/gnn/gnn_csr/test_gcn_csr.py b/tests/st/gnn/gnn_csr/test_gcn_csr.py index 6ccecc9db04..c136d9dae73 100644 --- a/tests/st/gnn/gnn_csr/test_gcn_csr.py +++ b/tests/st/gnn/gnn_csr/test_gcn_csr.py @@ -51,7 +51,7 @@ class GCNConv(ms.nn.Cell): self.activation = activation self.min_clip = Tensor(1, ms.int32) self.max_clip = Tensor(100000000, ms.int32) - self.drop_out = ms.nn.Dropout(dropout) + self.drop_out = ms.nn.Dropout(p=1.0 - dropout) self.gather = GatherNet(indptr_backward, indices_backward) self.csr_reduce_sum = CSRReduceSumNet(indices_backward) diff --git a/tests/st/model_zoo_tests/DeepFM/src/deepfm.py b/tests/st/model_zoo_tests/DeepFM/src/deepfm.py index db0e6d68272..7c239f428df 100644 --- a/tests/st/model_zoo_tests/DeepFM/src/deepfm.py +++ b/tests/st/model_zoo_tests/DeepFM/src/deepfm.py @@ -137,7 +137,7 @@ class DenseLayer(nn.Cell): self.matmul = P.MatMul(transpose_b=False) self.bias_add = P.BiasAdd() self.cast = P.Cast() - self.dropout = Dropout(keep_prob=1.0) + self.dropout = Dropout(p=0.0) self.mul = P.Mul() self.realDiv = P.RealDiv() self.scale_coef = scale_coef diff --git a/tests/st/networks/models/bert/bert_performance/src/bert_model.py b/tests/st/networks/models/bert/bert_performance/src/bert_model.py index 3a0674c6e4c..972aa717862 100644 --- a/tests/st/networks/models/bert/bert_performance/src/bert_model.py +++ b/tests/st/networks/models/bert/bert_performance/src/bert_model.py @@ -180,7 +180,7 @@ class EmbeddingPostprocessor(nn.Cell): self.array_mul = P.MatMul() self.reshape = P.Reshape() self.shape = tuple(embedding_shape) - self.dropout = nn.Dropout(1 - dropout_prob) + self.dropout = nn.Dropout(p=dropout_prob) self.gather = P.Gather() self.use_relative_positions = use_relative_positions self.slice = P.StridedSlice() @@ -230,7 +230,7 @@ class BertOutput(nn.Cell): super(BertOutput, self).__init__() self.dense = nn.Dense(in_channels, out_channels, weight_init=TruncatedNormal(initializer_range)).to_float(compute_type) - self.dropout = nn.Dropout(1 - dropout_prob) + self.dropout = nn.Dropout(p=dropout_prob) self.dropout_prob = dropout_prob self.add = P.Add() self.layernorm = nn.LayerNorm((out_channels,)).to_float(compute_type) @@ -433,7 +433,7 @@ class BertAttention(nn.Cell): self.matmul = P.BatchMatMul() self.softmax = nn.Softmax() - self.dropout = nn.Dropout(1 - attention_probs_dropout_prob) + self.dropout = nn.Dropout(p=attention_probs_dropout_prob) if self.has_attention_mask: self.expand_dims = P.ExpandDims() diff --git a/tests/st/networks/models/bert/src/bert_model.py b/tests/st/networks/models/bert/src/bert_model.py index 51e64c88712..a4a6f3bc3d3 100644 --- a/tests/st/networks/models/bert/src/bert_model.py +++ b/tests/st/networks/models/bert/src/bert_model.py @@ -193,7 +193,7 @@ class EmbeddingPostprocessor(nn.Cell): self.reshape = P.Reshape() self.shape = tuple(embedding_shape) self.layernorm = nn.LayerNorm((embedding_size,)) - self.dropout = nn.Dropout(1 - dropout_prob) + self.dropout = nn.Dropout(p=dropout_prob) self.gather = P.Gather() self.use_relative_positions = use_relative_positions self.slice = P.StridedSlice() @@ -247,7 +247,7 @@ class BertOutput(nn.Cell): super(BertOutput, self).__init__() self.dense = nn.Dense(in_channels, out_channels, weight_init=TruncatedNormal(initializer_range)).to_float(compute_type) - self.dropout = nn.Dropout(1 - dropout_prob) + self.dropout = nn.Dropout(p=dropout_prob) self.dropout_prob = dropout_prob self.add = P.Add() self.layernorm = nn.LayerNorm((out_channels,)).to_float(compute_type) @@ -469,7 +469,7 @@ class BertAttention(nn.Cell): self.matmul = P.BatchMatMul() self.softmax = nn.Softmax() - self.dropout = nn.Dropout(1 - attention_probs_dropout_prob) + self.dropout = nn.Dropout(p=attention_probs_dropout_prob) if self.has_attention_mask: self.expand_dims = P.ExpandDims() diff --git a/tests/st/networks/models/deeplabv3/src/deeplabv3.py b/tests/st/networks/models/deeplabv3/src/deeplabv3.py index bbfc4dceb31..e2dedbac229 100644 --- a/tests/st/networks/models/deeplabv3/src/deeplabv3.py +++ b/tests/st/networks/models/deeplabv3/src/deeplabv3.py @@ -300,7 +300,7 @@ class SingleDeepLabV3(nn.Cell): float(feature_shape[3])] self.pad = P.Pad(((0, 0), (0, 0), (1, 1), (1, 1))) - self.dropout = nn.Dropout(keep_prob=0.9) + self.dropout = nn.Dropout(p=0.1) self.shape = P.Shape() self.decoder_output_stride = decoder_output_stride if decoder_output_stride is not None: diff --git a/tests/st/networks/models/mlp/test_mlp_cell_attr.py b/tests/st/networks/models/mlp/test_mlp_cell_attr.py index e40ef1ccf89..37e19b8bb69 100644 --- a/tests/st/networks/models/mlp/test_mlp_cell_attr.py +++ b/tests/st/networks/models/mlp/test_mlp_cell_attr.py @@ -380,7 +380,7 @@ class CellDropDense(nn.Cell): def __init__(self): super(CellDropDense, self).__init__() self.fc = nn.Dense(100, 100) - self.drop = nn.Dropout(1.0 - 0.1) + self.drop = nn.Dropout(p=0.1) def construct(self, input_x): out = self.fc(input_x) diff --git a/tests/st/ops/ascend/test_drop_out_gen_mask.py b/tests/st/ops/ascend/test_drop_out_gen_mask.py index 5998997b1f7..df8801d7f54 100644 --- a/tests/st/ops/ascend/test_drop_out_gen_mask.py +++ b/tests/st/ops/ascend/test_drop_out_gen_mask.py @@ -57,7 +57,7 @@ def test_net(): class Drop(nn.Cell): def __init__(self): super(Drop, self).__init__() - self.drop = nn.Dropout(1.0 - 0.5) + self.drop = nn.Dropout(p=0.5) def construct(self, out): out = self.drop(out) diff --git a/tests/st/ops/ascend/test_dynamic_ops.py b/tests/st/ops/ascend/test_dynamic_ops.py index bd51e24fac3..0db5cbe82f9 100644 --- a/tests/st/ops/ascend/test_dynamic_ops.py +++ b/tests/st/ops/ascend/test_dynamic_ops.py @@ -144,7 +144,7 @@ class Conv2dNet(nn.Cell): class DropoutNet(nn.Cell): def __init__(self): super(DropoutNet, self).__init__() - self.drop = nn.Dropout(0.5) + self.drop = nn.Dropout(p=0.5) self.relu = ops.ReLU() def construct(self, x): diff --git a/tests/st/ops/graph_kernel/test_layernorm_stitch.py b/tests/st/ops/graph_kernel/test_layernorm_stitch.py index d0c842dc5cc..37aba5cfe2d 100644 --- a/tests/st/ops/graph_kernel/test_layernorm_stitch.py +++ b/tests/st/ops/graph_kernel/test_layernorm_stitch.py @@ -31,7 +31,7 @@ class EmbeddingPostprocessor(Cell): super(EmbeddingPostprocessor, self).__init__() self.layernorm = nn.LayerNorm((768,)) self.add = P.Add() - self.dropout = nn.Dropout(1 - 0.1) + self.dropout = nn.Dropout(p=0.1) def construct(self, word_embeddings, token_type_embeddings, position_embeddings): output = word_embeddings diff --git a/tests/st/ops/graph_kernel/test_softmax_stitch.py b/tests/st/ops/graph_kernel/test_softmax_stitch.py index 86191d76f3c..80d68c44fef 100644 --- a/tests/st/ops/graph_kernel/test_softmax_stitch.py +++ b/tests/st/ops/graph_kernel/test_softmax_stitch.py @@ -31,7 +31,7 @@ class BertAttentionPiece(Cell): def __init__(self): super(BertAttentionPiece, self).__init__() self.add = P.Add() - self.dropout = nn.Dropout(1 - 0.1) + self.dropout = nn.Dropout(p=0.1) self.softmax = nn.Softmax() self.multiply_data = -10000.0 self.sub = P.Sub() diff --git a/tests/ut/python/ops/test_nn_ops.py b/tests/ut/python/ops/test_nn_ops.py index 111e13f7374..9f17e9a4bdb 100644 --- a/tests/ut/python/ops/test_nn_ops.py +++ b/tests/ut/python/ops/test_nn_ops.py @@ -570,7 +570,7 @@ test_cases = [ 'desc_bprop': [[128, 32, 32, 64]], }), ('DropoutGrad', { - 'block': DropoutGrad(VirtualNetWithLoss(nn.Dropout())), + 'block': DropoutGrad(VirtualNetWithLoss(nn.Dropout(p=0.5))), 'desc_inputs': [[128, 32, 32, 64]], 'desc_bprop': [[128, 32, 32, 64]], }), diff --git a/tests/ut/python/ops/test_ops.py b/tests/ut/python/ops/test_ops.py index 2e2b0dee99e..8c915f46fd1 100644 --- a/tests/ut/python/ops/test_ops.py +++ b/tests/ut/python/ops/test_ops.py @@ -3101,7 +3101,7 @@ test_case_nn_ops = [ 'desc_inputs': [[64, 12, 128, 128], Tensor(np.ones(1572864).astype(np.uint8))], 'desc_bprop': [[64, 12, 128, 128]]}), ('Dropout', { - 'block': nn.Dropout(0.5), + 'block': nn.Dropout(p=0.5), 'desc_inputs': [[64, 12, 128, 128]], 'desc_bprop': [[64, 12, 128, 128]]}), ('ReduceMean0', { diff --git a/tests/ut/python/ops/test_ops_reid.py b/tests/ut/python/ops/test_ops_reid.py index 5cf320d5c94..b81148010c3 100644 --- a/tests/ut/python/ops/test_ops_reid.py +++ b/tests/ut/python/ops/test_ops_reid.py @@ -135,7 +135,7 @@ test_case_reid_ops = [ 'desc_inputs': [convert([256], np.float16), convert([256], np.float16)], 'desc_bprop': [convert([256], np.bool_)]}), ('Dropout', { - 'block': nn.Dropout(), + 'block': nn.Dropout(p=0.5), 'desc_inputs': [[1, 512, 7, 7]], 'desc_bprop': [[1, 512, 7, 7]]}), ('MatMul', { diff --git a/tests/ut/python/parallel/test_auto_parallel_matmul_drop.py b/tests/ut/python/parallel/test_auto_parallel_matmul_drop.py index 7ec16b2521e..54685e8f55f 100644 --- a/tests/ut/python/parallel/test_auto_parallel_matmul_drop.py +++ b/tests/ut/python/parallel/test_auto_parallel_matmul_drop.py @@ -57,7 +57,7 @@ def test_two_matmul_dropout(): def __init__(self): super().__init__() self.matmul1 = P.MatMul() - self.dropout = nn.Dropout() + self.dropout = nn.Dropout(p=0.5) self.matmul2 = P.MatMul() def construct(self, x, y, b): diff --git a/tests/ut/python/parallel/test_auto_parallel_pangu_alpha_shard_propagation.py b/tests/ut/python/parallel/test_auto_parallel_pangu_alpha_shard_propagation.py index d69fb1ebe85..412e2b9e459 100644 --- a/tests/ut/python/parallel/test_auto_parallel_pangu_alpha_shard_propagation.py +++ b/tests/ut/python/parallel/test_auto_parallel_pangu_alpha_shard_propagation.py @@ -193,8 +193,8 @@ class FeedForward(Cell): param_init_type=param_init_type) self.projection.shard(strategy_matmul=((dp, mp), (mp, 1))) self.projection.bias.parallel_optimizer = False - self.dropout = nn.Dropout(1 - dropout_rate) - self.dropout_3d = nn.Dropout(1 - dropout_rate) + self.dropout = nn.Dropout(p=dropout_rate) + self.dropout_3d = nn.Dropout(p=dropout_rate) self.cast = P.Cast() def construct(self, x): @@ -246,8 +246,8 @@ class MultiHeadAttention(Cell): self.mul = P.Mul() self.add = P.Add() self.scale_factor = Tensor(math.sqrt(self.size_per_head)) - self.dropout = nn.Dropout(1 - hidden_dropout_rate) - self.prob_dropout = nn.Dropout(1 - attention_dropout_rate) + self.dropout = nn.Dropout(p=hidden_dropout_rate) + self.prob_dropout = nn.Dropout(p=attention_dropout_rate) self.softmax = nn.Softmax().to_float(softmax_compute_type) self.expand_dims = P.ExpandDims() # Query @@ -474,7 +474,7 @@ class EmbeddingLayer(nn.Cell): self.word_embedding = VocabEmbedding(vocab_size=40000, embedding_size=2560) self.position_embedding = VocabEmbedding(vocab_size=40000, embedding_size=2560) self.add = P.Add() - self.dropout = nn.Dropout(0.9) + self.dropout = nn.Dropout(p=0.1) def construct(self, input_ids, input_position, init_reset, batch_valid_length): word_embedding, word_table = self.word_embedding(input_ids) diff --git a/tests/ut/python/parallel/test_batch_parallel_dropout.py b/tests/ut/python/parallel/test_batch_parallel_dropout.py index a42e8928628..a71fe3a93fc 100644 --- a/tests/ut/python/parallel/test_batch_parallel_dropout.py +++ b/tests/ut/python/parallel/test_batch_parallel_dropout.py @@ -57,7 +57,7 @@ def test_batch_parallel_dropout(): def __init__(self): super().__init__() self.matmul1 = P.MatMul() - self.dropout = nn.Dropout() + self.dropout = nn.Dropout(p=0.5) self.matmul2 = P.MatMul() def construct(self, x, y, b): diff --git a/tests/ut/python/parallel/test_conformer.py b/tests/ut/python/parallel/test_conformer.py index d831741d526..bf3dc99f5d1 100644 --- a/tests/ut/python/parallel/test_conformer.py +++ b/tests/ut/python/parallel/test_conformer.py @@ -206,9 +206,9 @@ class Mlp(nn.Cell): self.fc2.matmul.shard(((dp, mp), (1, mp))) self.fc2.bias_add.shard(((dp, 1), (1,))) - self.drop = nn.Dropout(1.0-drop) + self.drop = nn.Dropout(p=drop) self.drop.dropout.shard(((dp, 1),)) - self.drop2 = nn.Dropout(1.0-drop) + self.drop2 = nn.Dropout(p=drop) self.drop2.dropout.shard(((dp, mp),)) def construct(self, x): @@ -263,14 +263,14 @@ class Attention(nn.Cell): self.softmax.softmax.shard(((dp, mp, 1, 1),)) self.batmatmul_trans_b = P.BatchMatMul().shard(((dp, mp, 1, 1), (dp, mp, 1, 1))) - self.attn_drop = nn.Dropout(1. - attn_drop) + self.attn_drop = nn.Dropout(p=attn_drop) self.attn_drop.dropout.shard(((dp, mp, 1, 1),)) self.proj = nn.Dense(hidden_dim, dim, weight_init=TruncatedNormal(0.02)).to_float(mindspore.float16) self.proj.matmul.shard(((dp, mp), (1, mp))) self.proj.bias_add.shard(((dp, 1), (1,))) - self.proj_drop = nn.Dropout(1. - proj_drop) + self.proj_drop = nn.Dropout(p=proj_drop) self.proj_drop.dropout.shard(((dp, 1),)) self.transpose = P.Transpose().shard(((dp, 1, mp, 1),)) diff --git a/tests/ut/python/pynative_mode/nn/test_dropout.py b/tests/ut/python/pynative_mode/nn/test_dropout.py index 6865baa0189..1f5ee97c1ab 100644 --- a/tests/ut/python/pynative_mode/nn/test_dropout.py +++ b/tests/ut/python/pynative_mode/nn/test_dropout.py @@ -25,14 +25,14 @@ context.set_context(device_target="Ascend") def test_check_dropout(): x = Tensor(np.ones([20, 16, 50]), mstype.float32) - m = nn.Dropout(0.8) + m = nn.Dropout(p=0.2) m(x) class Net_Dropout(nn.Cell): def __init__(self): super(Net_Dropout, self).__init__() - self.dropout = nn.Dropout(0.5) + self.dropout = nn.Dropout(p=0.5) def construct(self, x): return self.dropout(x)