fix weight_init bug

2020-09-16 18:55:15 +08:00 · 2020-09-16 18:55:15 +08:00 · 5010956773
parent bd8cf0c14f
commit 5010956773
3 changed files with 120 additions and 1 deletions
--- a/mindspore/nn/layer/conv.py
+++ b/mindspore/nn/layer/conv.py
@ -20,7 +20,7 @@ from mindspore import context
 from mindspore.ops import operations as P
 from mindspore.ops.primitive import constexpr
 from mindspore.common.parameter import Parameter
-from mindspore.common.initializer import initializer
+from mindspore.common.initializer import initializer, Initializer
 from mindspore.common.tensor import Tensor
 from mindspore._checkparam import ParamValidator as validator, Rel
 from mindspore._checkparam import Validator
@ -251,6 +251,10 @@ class Conv2d(_Conv):
                                                  stride=self.stride,
                                                  dilation=self.dilation)
            weight_shape = [1, self.in_channels, *self.kernel_size]
+            if isinstance(self.weight_init, Tensor):
+                self.weight_init = Tensor(self.weight_init.asnumpy().swapaxes(0, 1), self.weight_init.dtype)
+            if isinstance(self.weight_init, Initializer):
+                self.weight_init.shape = weight_shape
            self.weight = Parameter(initializer(self.weight_init, weight_shape), name='weight')

    def construct(self, x):
--- a/tests/st/ops/ascend/test_conv2d_depthwiseconv2d.py
+++ b/tests/st/ops/ascend/test_conv2d_depthwiseconv2d.py
@ -0,0 +1,59 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+import numpy as np
+import pytest
+
+import mindspore.context as context
+import mindspore.nn as nn
+import mindspore.common.dtype as mstype
+from mindspore.common.initializer import Normal
+from mindspore import Tensor
+
+
+context.set_context(mode=context.GRAPH_MODE, device_target="Ascend")
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.env_onecard
+def test_conv2d_depthwiseconv2d_str():
+    net = nn.Conv2d(128, 128, (2, 3), stride=4, pad_mode='valid', padding=0, group=128, weight_init='normal')
+    input_data = Tensor(np.ones([3, 128, 127, 114]), dtype=mstype.float32)
+    output = net(input_data)
+    assert output.shape == (3, 128, 32, 28)
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.env_onecard
+def test_conv2d_depthwiseconv2d_initializer():
+    net = nn.Conv2d(128, 128, (2, 3), stride=4, pad_mode='valid', padding=0, group=128, weight_init=Normal())
+    input_data = Tensor(np.ones([3, 128, 127, 114]), dtype=mstype.float32)
+    output = net(input_data)
+    assert output.shape == (3, 128, 32, 28)
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_ascend_training
+@pytest.mark.platform_arm_ascend_training
+@pytest.mark.env_onecard
+def test_conv2d_depthwiseconv2d_tensor():
+    weight_init = Tensor(np.random.randn(128, 1, 2, 3).astype(np.float32))
+    net = nn.Conv2d(128, 128, (2, 3), stride=4, pad_mode='valid', padding=0, group=128, weight_init=weight_init)
+    input_data = Tensor(np.ones([3, 128, 127, 114]), dtype=mstype.float32)
+    output = net(input_data)
+    assert output.shape == (3, 128, 32, 28)
--- a/tests/st/ops/gpu/test_conv2d_depthwiseconv2d.py
+++ b/tests/st/ops/gpu/test_conv2d_depthwiseconv2d.py
@ -0,0 +1,56 @@
+# Copyright 2020 Huawei Technologies Co., Ltd
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ============================================================================
+
+import numpy as np
+import pytest
+
+import mindspore.nn as nn
+import mindspore.common.dtype as mstype
+from mindspore.common.initializer import Normal
+from mindspore import Tensor
+from mindspore import context
+
+context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
+
+
+@pytest.mark.level1
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_conv2d_depthwiseconv2d_str():
+    net = nn.Conv2d(128, 128, (2, 3), stride=4, pad_mode='valid', padding=0, group=128, weight_init='normal')
+    input_data = Tensor(np.ones([3, 128, 127, 114]), dtype=mstype.float32)
+    output = net(input_data)
+    assert output.shape == (3, 128, 32, 28)
+
+
+@pytest.mark.level1
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_conv2d_depthwiseconv2d_initializer():
+    net = nn.Conv2d(128, 128, (2, 3), stride=4, pad_mode='valid', padding=0, group=128, weight_init=Normal())
+    input_data = Tensor(np.ones([3, 128, 127, 114]), dtype=mstype.float32)
+    output = net(input_data)
+    assert output.shape == (3, 128, 32, 28)
+
+
+@pytest.mark.level1
+@pytest.mark.platform_x86_gpu_training
+@pytest.mark.env_onecard
+def test_conv2d_depthwiseconv2d_tensor():
+    weight_init = Tensor(np.random.randn(128, 1, 2, 3).astype(np.float32))
+    net = nn.Conv2d(128, 128, (2, 3), stride=4, pad_mode='valid', padding=0, group=128, weight_init=weight_init)
+    input_data = Tensor(np.ones([3, 128, 127, 114]), dtype=mstype.float32)
+    output = net(input_data)
+    assert output.shape == (3, 128, 32, 28)