diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.cc index 589dbee97f0..52ad1099c02 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.cc @@ -42,5 +42,11 @@ MS_REG_GPU_KERNEL_ONE(ReduceAny, KernelAttr().AddInputAttr(kNumberTypeBool).AddO ArrayReduceGpuKernel, bool) MS_REG_GPU_KERNEL_ONE(ReduceAll, KernelAttr().AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool), ArrayReduceGpuKernel, bool) +MS_REG_GPU_KERNEL_ONE(ReduceProd, KernelAttr().AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8), + ArrayReduceGpuKernel, int8_t) +MS_REG_GPU_KERNEL_ONE(ReduceProd, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16), + ArrayReduceGpuKernel, half) +MS_REG_GPU_KERNEL_ONE(ReduceProd, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32), + ArrayReduceGpuKernel, float) } // namespace kernel } // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h index 9cc4959211a..f38e1cace11 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/array_reduce_gpu_kernel.h @@ -27,9 +27,10 @@ namespace mindspore { namespace kernel { const std::map kReduceTypeMap = { - {"ReduceMax", CUDNN_REDUCE_TENSOR_MAX}, {"ReduceMean", CUDNN_REDUCE_TENSOR_AVG}, - {"ReduceSum", CUDNN_REDUCE_TENSOR_ADD}, {"ReduceMin", CUDNN_REDUCE_TENSOR_MIN}, - {"ReduceAny", CUDNN_REDUCE_TENSOR_MAX}, {"ReduceAll", CUDNN_REDUCE_TENSOR_MUL}, + {"ReduceMax", CUDNN_REDUCE_TENSOR_MAX}, {"ReduceMean", CUDNN_REDUCE_TENSOR_AVG}, + {"ReduceSum", CUDNN_REDUCE_TENSOR_ADD}, {"ReduceMin", CUDNN_REDUCE_TENSOR_MIN}, + {"ReduceAny", CUDNN_REDUCE_TENSOR_MAX}, {"ReduceAll", CUDNN_REDUCE_TENSOR_MUL}, + {"ReduceProd", CUDNN_REDUCE_TENSOR_MUL}, }; template class ArrayReduceGpuKernel : public GpuKernel { diff --git a/tests/st/ops/gpu/test_reduce_prod_op.py b/tests/st/ops/gpu/test_reduce_prod_op.py new file mode 100644 index 00000000000..b77fd790150 --- /dev/null +++ b/tests/st/ops/gpu/test_reduce_prod_op.py @@ -0,0 +1,170 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import pytest + +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor +from mindspore.common.api import ms_function +from mindspore.ops import operations as P + +x0 = np.random.rand(2, 3, 4, 4).astype(np.float32) +axis0 = 3 +keep_dims0 = True + +x1 = np.random.rand(2, 3, 4, 4).astype(np.float16) +axis1 = 3 +keep_dims1 = False + +x2 = np.random.rand(2, 3, 1, 4).astype(np.int8) +axis2 = 2 +keep_dims2 = True + +x3 = np.random.rand(2, 3, 1, 4).astype(np.float32) +axis3 = 2 +keep_dims3 = False + +x4 = np.random.rand(2, 3, 4, 4).astype(np.float16) +axis4 = () +np_axis4 = None +keep_dims4 = True + +x5 = np.random.rand(2, 3, 4, 4).astype(np.int8) +axis5 = () +np_axis5 = None +keep_dims5 = False + +x6 = np.random.rand(2, 3, 4, 4).astype(np.float32) +axis6 = -2 +keep_dims6 = False + +x7 = np.random.rand(2, 3, 4, 4).astype(np.float16) +axis7 = (-2, -1) +keep_dims7 = True + +x8 = np.random.rand(1, 1, 1, 1).astype(np.float32) +axis8 = () +np_axis8 = None +keep_dims8 = True + + +class ReduceProd(nn.Cell): + def __init__(self): + super(ReduceProd, self).__init__() + + self.x0 = Tensor(x0) + self.axis0 = axis0 + self.keep_dims0 = keep_dims0 + + self.x1 = Tensor(x1) + self.axis1 = axis1 + self.keep_dims1 = keep_dims1 + + self.x2 = Tensor(x2) + self.axis2 = axis2 + self.keep_dims2 = keep_dims2 + + self.x3 = Tensor(x3) + self.axis3 = axis3 + self.keep_dims3 = keep_dims3 + + self.x4 = Tensor(x4) + self.axis4 = axis4 + self.keep_dims4 = keep_dims4 + + self.x5 = Tensor(x5) + self.axis5 = axis5 + self.keep_dims5 = keep_dims5 + + self.x6 = Tensor(x6) + self.axis6 = axis6 + self.keep_dims6 = keep_dims6 + + self.x7 = Tensor(x7) + self.axis7 = axis7 + self.keep_dims7 = keep_dims7 + + self.x8 = Tensor(x8) + self.axis8 = axis8 + self.keep_dims8 = keep_dims8 + + @ms_function + def construct(self): + return (P.ReduceProd(self.keep_dims0)(self.x0, self.axis0), + P.ReduceProd(self.keep_dims1)(self.x1, self.axis1), + P.ReduceProd(self.keep_dims2)(self.x2, self.axis2), + P.ReduceProd(self.keep_dims3)(self.x3, self.axis3), + P.ReduceProd(self.keep_dims4)(self.x4, self.axis4), + P.ReduceProd(self.keep_dims5)(self.x5, self.axis5), + P.ReduceProd(self.keep_dims6)(self.x6, self.axis6), + P.ReduceProd(self.keep_dims7)(self.x7, self.axis7), + P.ReduceProd(self.keep_dims8)(self.x8, self.axis8)) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_reduce_prod(): + context.set_context(mode=context.PYNATIVE_MODE, device_target='GPU') + reduce_max = ReduceProd() + output = reduce_max() + + expect1 = np.prod(x1, axis=axis1, keepdims=keep_dims1) + diff1 = abs(output[1].asnumpy() - expect1) + error1 = np.ones(shape=expect1.shape) * 1.0e-5 + assert np.all(diff1 < error1) + assert output[1].shape == expect1.shape + + expect2 = np.prod(x2, axis=axis2, keepdims=keep_dims2) + diff2 = abs(output[2].asnumpy() - expect2) + error2 = np.ones(shape=expect2.shape) * 1.0e-5 + assert np.all(diff2 < error2) + assert output[2].shape == expect2.shape + + expect3 = np.prod(x3, axis=axis3, keepdims=keep_dims3) + diff3 = abs(output[3].asnumpy() - expect3) + error3 = np.ones(shape=expect3.shape) * 1.0e-5 + assert np.all(diff3 < error3) + assert output[3].shape == expect3.shape + + expect4 = np.prod(x4, axis=np_axis4, keepdims=keep_dims4) + diff4 = abs(output[4].asnumpy() - expect4) + error4 = np.ones(shape=expect4.shape) * 1.0e-5 + assert np.all(diff4 < error4) + assert output[4].shape == expect4.shape + + expect5 = np.prod(x5, axis=np_axis5, keepdims=keep_dims5) + diff5 = abs(output[5].asnumpy() - expect5) + error5 = np.ones(shape=expect5.shape) * 1.0e-5 + assert np.all(diff5 < error5) + assert output[5].shape == expect5.shape + + expect6 = np.prod(x6, axis=axis6, keepdims=keep_dims6) + diff6 = abs(output[6].asnumpy() - expect6) + error6 = np.ones(shape=expect6.shape) * 1.0e-5 + assert np.all(diff6 < error6) + assert output[6].shape == expect6.shape + + expect7 = np.prod(x7, axis=axis7, keepdims=keep_dims7) + diff7 = abs(output[7].asnumpy() - expect7) + error7 = np.ones(shape=expect7.shape) * 1.0e-5 + assert np.all(diff7 < error7) + + expect8 = np.prod(x8, axis=np_axis8, keepdims=keep_dims8) + diff8 = abs(output[8].asnumpy() - expect8) + error8 = np.ones(shape=expect8.shape) * 1.0e-5 + assert np.all(diff8 < error8)