diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_functor_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_functor_gpu_kernel.cc new file mode 100644 index 00000000000..346b93d09ab --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_functor_gpu_kernel.cc @@ -0,0 +1,363 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/arrays/scatter_nd_functor_gpu_kernel.h" + +namespace mindspore { +namespace kernel { +// ScatterNdUpdate +MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeFloat64) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat64) + .AddOutputAttr(kNumberTypeFloat64), + ScatterNdFunctorKernel, double, int) +MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeFloat64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeFloat64) + .AddOutputAttr(kNumberTypeFloat64), + ScatterNdFunctorKernel, double, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + ScatterNdFunctorKernel, float, int) +MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + ScatterNdFunctorKernel, float, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16), + ScatterNdFunctorKernel, half, int) +MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16), + ScatterNdFunctorKernel, half, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32), + ScatterNdFunctorKernel, int, int) +MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32), + ScatterNdFunctorKernel, int, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeInt16) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt16) + .AddOutputAttr(kNumberTypeInt16), + ScatterNdFunctorKernel, int16_t, int) +MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeInt16) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt16) + .AddOutputAttr(kNumberTypeInt16), + ScatterNdFunctorKernel, int16_t, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeUInt8) + .AddOutputAttr(kNumberTypeUInt8), + ScatterNdFunctorKernel, uint8_t, int) +MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeUInt8) + .AddOutputAttr(kNumberTypeUInt8), + ScatterNdFunctorKernel, uint8_t, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeInt8) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt8) + .AddOutputAttr(kNumberTypeInt8), + ScatterNdFunctorKernel, int8_t, int) +MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt8) + .AddOutputAttr(kNumberTypeInt8), + ScatterNdFunctorKernel, int8_t, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeBool) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeBool) + .AddOutputAttr(kNumberTypeBool), + ScatterNdFunctorKernel, bool, int) +MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeBool) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeBool) + .AddOutputAttr(kNumberTypeBool), + ScatterNdFunctorKernel, bool, int64_t) + +// ScatterNdAdd +MS_REG_GPU_KERNEL_TWO(ScatterNdAdd, + KernelAttr() + .AddInputAttr(kNumberTypeFloat64) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat64) + .AddOutputAttr(kNumberTypeFloat64), + ScatterNdFunctorKernel, double, int) +MS_REG_GPU_KERNEL_TWO(ScatterNdAdd, + KernelAttr() + .AddInputAttr(kNumberTypeFloat64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeFloat64) + .AddOutputAttr(kNumberTypeFloat64), + ScatterNdFunctorKernel, double, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterNdAdd, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + ScatterNdFunctorKernel, float, int) +MS_REG_GPU_KERNEL_TWO(ScatterNdAdd, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + ScatterNdFunctorKernel, float, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterNdAdd, + KernelAttr() + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16), + ScatterNdFunctorKernel, half, int) +MS_REG_GPU_KERNEL_TWO(ScatterNdAdd, + KernelAttr() + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16), + ScatterNdFunctorKernel, half, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterNdAdd, + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32), + ScatterNdFunctorKernel, int, int) +MS_REG_GPU_KERNEL_TWO(ScatterNdAdd, + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32), + ScatterNdFunctorKernel, int, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterNdAdd, + KernelAttr() + .AddInputAttr(kNumberTypeInt16) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt16) + .AddOutputAttr(kNumberTypeInt16), + ScatterNdFunctorKernel, int16_t, int) +MS_REG_GPU_KERNEL_TWO(ScatterNdAdd, + KernelAttr() + .AddInputAttr(kNumberTypeInt16) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt16) + .AddOutputAttr(kNumberTypeInt16), + ScatterNdFunctorKernel, int16_t, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterNdAdd, + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeUInt8) + .AddOutputAttr(kNumberTypeUInt8), + ScatterNdFunctorKernel, uint8_t, int) +MS_REG_GPU_KERNEL_TWO(ScatterNdAdd, + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeUInt8) + .AddOutputAttr(kNumberTypeUInt8), + ScatterNdFunctorKernel, uint8_t, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterNdAdd, + KernelAttr() + .AddInputAttr(kNumberTypeInt8) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt8) + .AddOutputAttr(kNumberTypeInt8), + ScatterNdFunctorKernel, int8_t, int) +MS_REG_GPU_KERNEL_TWO(ScatterNdAdd, + KernelAttr() + .AddInputAttr(kNumberTypeInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt8) + .AddOutputAttr(kNumberTypeInt8), + ScatterNdFunctorKernel, int8_t, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterNdAdd, + KernelAttr() + .AddInputAttr(kNumberTypeBool) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeBool) + .AddOutputAttr(kNumberTypeBool), + ScatterNdFunctorKernel, bool, int) +MS_REG_GPU_KERNEL_TWO(ScatterNdAdd, + KernelAttr() + .AddInputAttr(kNumberTypeBool) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeBool) + .AddOutputAttr(kNumberTypeBool), + ScatterNdFunctorKernel, bool, int64_t) + +// ScatterNdSub +MS_REG_GPU_KERNEL_TWO(ScatterNdSub, + KernelAttr() + .AddInputAttr(kNumberTypeFloat64) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat64) + .AddOutputAttr(kNumberTypeFloat64), + ScatterNdFunctorKernel, double, int) +MS_REG_GPU_KERNEL_TWO(ScatterNdSub, + KernelAttr() + .AddInputAttr(kNumberTypeFloat64) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeFloat64) + .AddOutputAttr(kNumberTypeFloat64), + ScatterNdFunctorKernel, double, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterNdSub, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + ScatterNdFunctorKernel, float, int) +MS_REG_GPU_KERNEL_TWO(ScatterNdSub, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + ScatterNdFunctorKernel, float, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterNdSub, + KernelAttr() + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16), + ScatterNdFunctorKernel, half, int) +MS_REG_GPU_KERNEL_TWO(ScatterNdSub, + KernelAttr() + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16), + ScatterNdFunctorKernel, half, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterNdSub, + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32), + ScatterNdFunctorKernel, int, int) +MS_REG_GPU_KERNEL_TWO(ScatterNdSub, + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32), + ScatterNdFunctorKernel, int, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterNdSub, + KernelAttr() + .AddInputAttr(kNumberTypeInt16) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt16) + .AddOutputAttr(kNumberTypeInt16), + ScatterNdFunctorKernel, int16_t, int) +MS_REG_GPU_KERNEL_TWO(ScatterNdSub, + KernelAttr() + .AddInputAttr(kNumberTypeInt16) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt16) + .AddOutputAttr(kNumberTypeInt16), + ScatterNdFunctorKernel, int16_t, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterNdSub, + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeUInt8) + .AddOutputAttr(kNumberTypeUInt8), + ScatterNdFunctorKernel, uint8_t, int) +MS_REG_GPU_KERNEL_TWO(ScatterNdSub, + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeUInt8) + .AddOutputAttr(kNumberTypeUInt8), + ScatterNdFunctorKernel, uint8_t, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterNdSub, + KernelAttr() + .AddInputAttr(kNumberTypeInt8) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt8) + .AddOutputAttr(kNumberTypeInt8), + ScatterNdFunctorKernel, int8_t, int) +MS_REG_GPU_KERNEL_TWO(ScatterNdSub, + KernelAttr() + .AddInputAttr(kNumberTypeInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt8) + .AddOutputAttr(kNumberTypeInt8), + ScatterNdFunctorKernel, int8_t, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterNdSub, + KernelAttr() + .AddInputAttr(kNumberTypeBool) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeBool) + .AddOutputAttr(kNumberTypeBool), + ScatterNdFunctorKernel, bool, int) +MS_REG_GPU_KERNEL_TWO(ScatterNdSub, + KernelAttr() + .AddInputAttr(kNumberTypeBool) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeBool) + .AddOutputAttr(kNumberTypeBool), + ScatterNdFunctorKernel, bool, int64_t) +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_update_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_functor_gpu_kernel.h similarity index 83% rename from mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_update_gpu_kernel.h rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_functor_gpu_kernel.h index e7b8813d78f..1d217fa528e 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_update_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_functor_gpu_kernel.h @@ -14,21 +14,30 @@ * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_ND_UPDATE_GPU_KERNEL_H_ -#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_ND_UPDATE_GPU_KERNEL_H_ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_ND_FUNCTOR_GPU_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_ND_FUNCTOR_GPU_KERNEL_H_ #include +#include +#include #include "backend/kernel_compiler/gpu/gpu_kernel.h" #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" -#include "backend/kernel_compiler/gpu/cuda_impl/scatter_nd_update_impl.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/scatter_nd_functor_impl.cuh" namespace mindspore { namespace kernel { + +static const std::map kScatterNdFunctorTypeMap = { + {"ScatterNdUpdate", SCATTER_ND_FUNC_UPDATE}, + {"ScatterNdAdd", SCATTER_ND_FUNC_ADD}, + {"ScatterNdSub", SCATTER_ND_FUNC_SUB}, +}; + template -class ScatterNdUpdateKernel : public GpuKernel { +class ScatterNdFunctorKernel : public GpuKernel { public: - ScatterNdUpdateKernel() { ResetResource(); } - ~ScatterNdUpdateKernel() { + ScatterNdFunctorKernel() { ResetResource(); } + ~ScatterNdFunctorKernel() { if (indices_stride_ != nullptr) { device::gpu::GPUMemoryAllocator::GetInstance().FreeTensorMem(static_cast(indices_stride_)); } @@ -54,9 +63,9 @@ class ScatterNdUpdateKernel : public GpuKernel { CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaMemcpyAsync(indices_stride_, &out_strides_[0], indices_len, cudaMemcpyHostToDevice, reinterpret_cast(stream_ptr)), - "cudaMemcpyAsync failed in ScatterNdUpdateGpuFwdKernel::Launch."); - CalScatterNdUpdate(unit_size_, num_units_, index_depth_, indices_stride_, indices, updates, input, - reinterpret_cast(stream_ptr)); + "cudaMemcpyAsync failed in ScatterNdFunctorGpuFwdKernel::Launch."); + CalScatterNdFunctor(scatter_nd_functor_type_, unit_size_, num_units_, index_depth_, indices_stride_, indices, + updates, input, reinterpret_cast(stream_ptr)); CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaMemcpyAsync(&output[0], &input[0], input_size_ * sizeof(T), cudaMemcpyDeviceToDevice, reinterpret_cast(stream_ptr)), @@ -65,15 +74,22 @@ class ScatterNdUpdateKernel : public GpuKernel { } bool Init(const CNodePtr &kernel_node) override { + std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); + auto iter = kScatterNdFunctorTypeMap.find(kernel_name); + if (iter == kScatterNdFunctorTypeMap.end()) { + MS_LOG(EXCEPTION) << "ScatterNd functor " << kernel_name << " is not supported."; + } else { + scatter_nd_functor_type_ = iter->second; + } kernel_node_ = kernel_node; size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 3) { - MS_LOG(ERROR) << "Input number is " << input_num << ", but ScatterNdUpdate needs 3 inputs."; + MS_LOG(ERROR) << "Input number is " << input_num << ", but " << kernel_name << " needs 3 inputs."; return false; } size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); if (output_num != 1) { - MS_LOG(ERROR) << "Output number is " << output_num << ", but ScatterNdUpdate has 1 output."; + MS_LOG(ERROR) << "Output number is " << output_num << ", but " << kernel_name << " has 1 output."; return false; } @@ -151,6 +167,7 @@ class ScatterNdUpdateKernel : public GpuKernel { } private: + ScatterNdFunctorType scatter_nd_functor_type_; size_t input_size_; size_t indices_size_; size_t updates_size_; @@ -167,4 +184,4 @@ class ScatterNdUpdateKernel : public GpuKernel { }; } // namespace kernel } // namespace mindspore -#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_ND_UPDATE_GPU_KERNEL_H_ +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_ND_FUNCTOR_GPU_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_update_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_update_gpu_kernel.cc deleted file mode 100644 index 449ff9b3dab..00000000000 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_nd_update_gpu_kernel.cc +++ /dev/null @@ -1,134 +0,0 @@ -/** - * Copyright 2021 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "backend/kernel_compiler/gpu/arrays/scatter_nd_update_gpu_kernel.h" - -namespace mindspore { -namespace kernel { -MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, - KernelAttr() - .AddInputAttr(kNumberTypeFloat64) - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeFloat64) - .AddOutputAttr(kNumberTypeFloat64), - ScatterNdUpdateKernel, double, int) -MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, - KernelAttr() - .AddInputAttr(kNumberTypeFloat64) - .AddInputAttr(kNumberTypeInt64) - .AddInputAttr(kNumberTypeFloat64) - .AddOutputAttr(kNumberTypeFloat64), - ScatterNdUpdateKernel, double, int64_t) -MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, - KernelAttr() - .AddInputAttr(kNumberTypeFloat32) - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeFloat32) - .AddOutputAttr(kNumberTypeFloat32), - ScatterNdUpdateKernel, float, int) -MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, - KernelAttr() - .AddInputAttr(kNumberTypeFloat32) - .AddInputAttr(kNumberTypeInt64) - .AddInputAttr(kNumberTypeFloat32) - .AddOutputAttr(kNumberTypeFloat32), - ScatterNdUpdateKernel, float, int64_t) -MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, - KernelAttr() - .AddInputAttr(kNumberTypeFloat16) - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeFloat16) - .AddOutputAttr(kNumberTypeFloat16), - ScatterNdUpdateKernel, half, int) -MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, - KernelAttr() - .AddInputAttr(kNumberTypeFloat16) - .AddInputAttr(kNumberTypeInt64) - .AddInputAttr(kNumberTypeFloat16) - .AddOutputAttr(kNumberTypeFloat16), - ScatterNdUpdateKernel, half, int64_t) -MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, - KernelAttr() - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeInt32) - .AddOutputAttr(kNumberTypeInt32), - ScatterNdUpdateKernel, int, int) -MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, - KernelAttr() - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeInt64) - .AddInputAttr(kNumberTypeInt32) - .AddOutputAttr(kNumberTypeInt32), - ScatterNdUpdateKernel, int, int64_t) -MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, - KernelAttr() - .AddInputAttr(kNumberTypeInt16) - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeInt16) - .AddOutputAttr(kNumberTypeInt16), - ScatterNdUpdateKernel, int16_t, int) -MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, - KernelAttr() - .AddInputAttr(kNumberTypeInt16) - .AddInputAttr(kNumberTypeInt64) - .AddInputAttr(kNumberTypeInt16) - .AddOutputAttr(kNumberTypeInt16), - ScatterNdUpdateKernel, int16_t, int64_t) -MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, - KernelAttr() - .AddInputAttr(kNumberTypeUInt8) - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeUInt8) - .AddOutputAttr(kNumberTypeUInt8), - ScatterNdUpdateKernel, uint8_t, int) -MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, - KernelAttr() - .AddInputAttr(kNumberTypeUInt8) - .AddInputAttr(kNumberTypeInt64) - .AddInputAttr(kNumberTypeUInt8) - .AddOutputAttr(kNumberTypeUInt8), - ScatterNdUpdateKernel, uint8_t, int64_t) -MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, - KernelAttr() - .AddInputAttr(kNumberTypeInt8) - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeInt8) - .AddOutputAttr(kNumberTypeInt8), - ScatterNdUpdateKernel, int8_t, int) -MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, - KernelAttr() - .AddInputAttr(kNumberTypeInt8) - .AddInputAttr(kNumberTypeInt64) - .AddInputAttr(kNumberTypeInt8) - .AddOutputAttr(kNumberTypeInt8), - ScatterNdUpdateKernel, int8_t, int64_t) -MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, - KernelAttr() - .AddInputAttr(kNumberTypeBool) - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeBool) - .AddOutputAttr(kNumberTypeBool), - ScatterNdUpdateKernel, bool, int) -MS_REG_GPU_KERNEL_TWO(ScatterNdUpdate, - KernelAttr() - .AddInputAttr(kNumberTypeBool) - .AddInputAttr(kNumberTypeInt64) - .AddInputAttr(kNumberTypeBool) - .AddOutputAttr(kNumberTypeBool), - ScatterNdUpdateKernel, bool, int64_t) -} // namespace kernel -} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd_functor_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd_functor_impl.cu new file mode 100644 index 00000000000..3de39560b15 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd_functor_impl.cu @@ -0,0 +1,181 @@ +/** + * Copyright 2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/cuda_impl/util.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/scatter_nd_functor_impl.cuh" + +template +__global__ void ScatterNdUpdate(const size_t unit_size, const size_t index_depth, const size_t updates_size, + const S *out_strides, const S *indices, const T *updates, T *input) { + int i, j; + for (size_t read_index = blockIdx.x * blockDim.x + threadIdx.x; read_index < (updates_size); + read_index += blockDim.x * gridDim.x) { + size_t write_index = 0; + bool out_bound = false; + + i = read_index / unit_size; + j = read_index % unit_size; + + for (size_t k = 0; k < index_depth; k++) { + S indices_i = indices[i * index_depth + k]; + out_bound |= indices_i < 0; + write_index += indices_i * out_strides[k] * unit_size; + } + + write_index += j; + + if (!out_bound) { + input[write_index] = updates[read_index]; + } + } +} + +template +__global__ void ScatterNdAdd(const size_t unit_size, const size_t index_depth, const size_t updates_size, + const S *out_strides, const S *indices, const T *updates, T *input) { + int i, j; + for (size_t read_index = blockIdx.x * blockDim.x + threadIdx.x; read_index < (updates_size); + read_index += blockDim.x * gridDim.x) { + size_t write_index = 0; + bool out_bound = false; + + i = read_index / unit_size; + j = read_index % unit_size; + + for (size_t k = 0; k < index_depth; k++) { + S indices_i = indices[i * index_depth + k]; + out_bound |= indices_i < 0; + write_index += indices_i * out_strides[k] * unit_size; + } + + write_index += j; + + if (!out_bound) { + MsAtomicAdd(&input[write_index], updates[read_index]); + } + } +} + +template +__global__ void ScatterNdSub(const size_t unit_size, const size_t index_depth, const size_t updates_size, + const S *out_strides, const S *indices, const T *updates, T *input) { + int i, j; + for (size_t read_index = blockIdx.x * blockDim.x + threadIdx.x; read_index < (updates_size); + read_index += blockDim.x * gridDim.x) { + size_t write_index = 0; + bool out_bound = false; + + i = read_index / unit_size; + j = read_index % unit_size; + + for (size_t k = 0; k < index_depth; k++) { + S indices_i = indices[i * index_depth + k]; + out_bound |= indices_i < 0; + write_index += indices_i * out_strides[k] * unit_size; + } + + write_index += j; + + if (!out_bound) { + MsAtomicAdd(&input[write_index], -updates[read_index]); + } + } +} + +template +void CalScatterNdFunctor(enum ScatterNdFunctorType func_type, const size_t &unit_size, const size_t &num_units, + const size_t &index_depth, const S *out_strides, const S *indices, const T *updates, T *input, + cudaStream_t cuda_stream) { + const size_t updates_size = unit_size * num_units; + switch (func_type) { + case SCATTER_ND_FUNC_UPDATE: + return ScatterNdUpdate<<>>( + unit_size, index_depth, updates_size, out_strides, indices, updates, input); + case SCATTER_ND_FUNC_ADD: + return ScatterNdAdd<<>>( + unit_size, index_depth, updates_size, out_strides, indices, updates, input); + case SCATTER_ND_FUNC_SUB: + return ScatterNdSub<<>>( + unit_size, index_depth, updates_size, out_strides, indices, updates, input); + default: + break; + } +} + +template void CalScatterNdFunctor(enum ScatterNdFunctorType func_type, const size_t &unit_size, + const size_t &num_units, const size_t &index_depth, + const int64_t *out_strides, const int64_t *indices, + const double *updates, double *input, cudaStream_t cuda_stream); +template void CalScatterNdFunctor(enum ScatterNdFunctorType func_type, const size_t &unit_size, + const size_t &num_units, const size_t &index_depth, + const int32_t *out_strides, const int32_t *indices, + const double *updates, double *input, cudaStream_t cuda_stream); +template void CalScatterNdFunctor(enum ScatterNdFunctorType func_type, const size_t &unit_size, + const size_t &num_units, const size_t &index_depth, + const int64_t *out_strides, const int64_t *indices, + const float *updates, float *input, cudaStream_t cuda_stream); +template void CalScatterNdFunctor(enum ScatterNdFunctorType func_type, const size_t &unit_size, + const size_t &num_units, const size_t &index_depth, + const int32_t *out_strides, const int32_t *indices, + const float *updates, float *input, cudaStream_t cuda_stream); +template void CalScatterNdFunctor(enum ScatterNdFunctorType func_type, const size_t &unit_size, + const size_t &num_units, const size_t &index_depth, + const int64_t *out_strides, const int64_t *indices, + const half *updates, half *input, cudaStream_t cuda_stream); +template void CalScatterNdFunctor(enum ScatterNdFunctorType func_type, const size_t &unit_size, + const size_t &num_units, const size_t &index_depth, + const int32_t *out_strides, const int32_t *indices, + const half *updates, half *input, cudaStream_t cuda_stream); +template void CalScatterNdFunctor(enum ScatterNdFunctorType func_type, const size_t &unit_size, + const size_t &num_units, const size_t &index_depth, + const int64_t *out_strides, const int64_t *indices, + const int32_t *updates, int32_t *input, cudaStream_t cuda_stream); +template void CalScatterNdFunctor(enum ScatterNdFunctorType func_type, const size_t &unit_size, + const size_t &num_units, const size_t &index_depth, + const int32_t *out_strides, const int32_t *indices, + const int32_t *updates, int32_t *input, cudaStream_t cuda_stream); +template void CalScatterNdFunctor(enum ScatterNdFunctorType func_type, const size_t &unit_size, + const size_t &num_units, const size_t &index_depth, + const int64_t *out_strides, const int64_t *indices, + const int16_t *updates, int16_t *input, cudaStream_t cuda_stream); +template void CalScatterNdFunctor(enum ScatterNdFunctorType func_type, const size_t &unit_size, + const size_t &num_units, const size_t &index_depth, + const int32_t *out_strides, const int32_t *indices, + const int16_t *updates, int16_t *input, cudaStream_t cuda_stream); +template void CalScatterNdFunctor(enum ScatterNdFunctorType func_type, const size_t &unit_size, + const size_t &num_units, const size_t &index_depth, + const int64_t *out_strides, const int64_t *indices, + const uint8_t *updates, uint8_t *input, cudaStream_t cuda_stream); +template void CalScatterNdFunctor(enum ScatterNdFunctorType func_type, const size_t &unit_size, + const size_t &num_units, const size_t &index_depth, + const int32_t *out_strides, const int32_t *indices, + const uint8_t *updates, uint8_t *input, cudaStream_t cuda_stream); +template void CalScatterNdFunctor(enum ScatterNdFunctorType func_type, const size_t &unit_size, + const size_t &num_units, const size_t &index_depth, + const int64_t *out_strides, const int64_t *indices, + const int8_t *updates, int8_t *input, cudaStream_t cuda_stream); +template void CalScatterNdFunctor(enum ScatterNdFunctorType func_type, const size_t &unit_size, + const size_t &num_units, const size_t &index_depth, + const int32_t *out_strides, const int32_t *indices, + const int8_t *updates, int8_t *input, cudaStream_t cuda_stream); +template void CalScatterNdFunctor(enum ScatterNdFunctorType func_type, const size_t &unit_size, + const size_t &num_units, const size_t &index_depth, + const int64_t *out_strides, const int64_t *indices, + const bool *updates, bool *input, cudaStream_t cuda_stream); +template void CalScatterNdFunctor(enum ScatterNdFunctorType func_type, const size_t &unit_size, + const size_t &num_units, const size_t &index_depth, + const int32_t *out_strides, const int32_t *indices, + const bool *updates, bool *input, cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd_update_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd_functor_impl.cuh similarity index 63% rename from mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd_update_impl.cuh rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd_functor_impl.cuh index 278b10ae9c2..03d9f5b5fb8 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd_update_impl.cuh +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd_functor_impl.cuh @@ -14,13 +14,21 @@ * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_ND_UPDATE_IMPL_CUH_ -#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_ND_UPDATE_IMPL_CUH_ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_ND_FUNCTOR_IMPL_CUH_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_ND_FUNCTOR_IMPL_CUH_ #include "runtime/device/gpu/cuda_common.h" -template -void CalScatterNdUpdate(const size_t &unit_size, const size_t &num_units, const size_t &index_depth, - const S *out_strides, const S *indices, const T *updates, T *input, cudaStream_t cuda_stream); +enum ScatterNdFunctorType { + SCATTER_ND_FUNC_UPDATE = 0, + SCATTER_ND_FUNC_ADD, + SCATTER_ND_FUNC_SUB, + SCATTER_ND_FUNC_INVALID_TYPE = 255 +}; -#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_ND_UPDATE_IMPL_CUH_ +template +void CalScatterNdFunctor(enum ScatterNdFunctorType func_type, const size_t &unit_size, const size_t &num_units, + const size_t &index_depth, const S *out_strides, const S *indices, const T *updates, T *input, + cudaStream_t cuda_stream); + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_ND_FUNCTOR_IMPL_CUH_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd_update_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd_update_impl.cu deleted file mode 100644 index 6f716edc9ec..00000000000 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_nd_update_impl.cu +++ /dev/null @@ -1,116 +0,0 @@ -/** - * Copyright 2021 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "backend/kernel_compiler/gpu/cuda_impl/scatter_nd_update_impl.cuh" - -template -__global__ void ScatterNdUpdate(const size_t unit_size, const size_t index_depth, const size_t updates_size, - const S *out_strides, const S *indices, const T *updates, T *input) { - int i, j; - for (size_t read_index = blockIdx.x * blockDim.x + threadIdx.x; read_index < (updates_size); - read_index += blockDim.x * gridDim.x) { - size_t write_index = 0; - bool out_bound = false; - - i = read_index / unit_size; - j = read_index % unit_size; - - for (size_t k = 0; k < index_depth; k++) { - S indices_i = indices[i * index_depth + k]; - out_bound |= indices_i < 0; - write_index += indices_i * out_strides[k] * unit_size; - } - - write_index += j; - - if (!out_bound) { - input[write_index] = updates[read_index]; - } - } -} - -template -void CalScatterNdUpdate(const size_t &unit_size, const size_t &num_units, const size_t &index_depth, - const S *out_strides, const S *indices, const T *updates, T *input, cudaStream_t cuda_stream) { - const size_t updates_size = unit_size * num_units; - ScatterNdUpdate<<>>(unit_size, index_depth, updates_size, - out_strides, indices, updates, input); -} - -template void CalScatterNdUpdate(const size_t &unit_size, const size_t &num_units, - const size_t &index_depth, const int64_t *out_strides, - const int64_t *indices, const double *updates, double *input, - cudaStream_t cuda_stream); -template void CalScatterNdUpdate(const size_t &unit_size, const size_t &num_units, - const size_t &index_depth, const int32_t *out_strides, - const int32_t *indices, const double *updates, double *input, - cudaStream_t cuda_stream); -template void CalScatterNdUpdate(const size_t &unit_size, const size_t &num_units, - const size_t &index_depth, const int64_t *out_strides, - const int64_t *indices, const float *updates, float *input, - cudaStream_t cuda_stream); -template void CalScatterNdUpdate(const size_t &unit_size, const size_t &num_units, - const size_t &index_depth, const int32_t *out_strides, - const int32_t *indices, const float *updates, float *input, - cudaStream_t cuda_stream); -template void CalScatterNdUpdate(const size_t &unit_size, const size_t &num_units, - const size_t &index_depth, const int64_t *out_strides, - const int64_t *indices, const half *updates, half *input, - cudaStream_t cuda_stream); -template void CalScatterNdUpdate(const size_t &unit_size, const size_t &num_units, - const size_t &index_depth, const int32_t *out_strides, - const int32_t *indices, const half *updates, half *input, - cudaStream_t cuda_stream); -template void CalScatterNdUpdate(const size_t &unit_size, const size_t &num_units, - const size_t &index_depth, const int64_t *out_strides, - const int64_t *indices, const int32_t *updates, int32_t *input, - cudaStream_t cuda_stream); -template void CalScatterNdUpdate(const size_t &unit_size, const size_t &num_units, - const size_t &index_depth, const int32_t *out_strides, - const int32_t *indices, const int32_t *updates, int32_t *input, - cudaStream_t cuda_stream); -template void CalScatterNdUpdate(const size_t &unit_size, const size_t &num_units, - const size_t &index_depth, const int64_t *out_strides, - const int64_t *indices, const int16_t *updates, int16_t *input, - cudaStream_t cuda_stream); -template void CalScatterNdUpdate(const size_t &unit_size, const size_t &num_units, - const size_t &index_depth, const int32_t *out_strides, - const int32_t *indices, const int16_t *updates, int16_t *input, - cudaStream_t cuda_stream); -template void CalScatterNdUpdate(const size_t &unit_size, const size_t &num_units, - const size_t &index_depth, const int64_t *out_strides, - const int64_t *indices, const uint8_t *updates, uint8_t *input, - cudaStream_t cuda_stream); -template void CalScatterNdUpdate(const size_t &unit_size, const size_t &num_units, - const size_t &index_depth, const int32_t *out_strides, - const int32_t *indices, const uint8_t *updates, uint8_t *input, - cudaStream_t cuda_stream); -template void CalScatterNdUpdate(const size_t &unit_size, const size_t &num_units, - const size_t &index_depth, const int64_t *out_strides, - const int64_t *indices, const int8_t *updates, int8_t *input, - cudaStream_t cuda_stream); -template void CalScatterNdUpdate(const size_t &unit_size, const size_t &num_units, - const size_t &index_depth, const int32_t *out_strides, - const int32_t *indices, const int8_t *updates, int8_t *input, - cudaStream_t cuda_stream); -template void CalScatterNdUpdate(const size_t &unit_size, const size_t &num_units, - const size_t &index_depth, const int64_t *out_strides, - const int64_t *indices, const bool *updates, bool *input, - cudaStream_t cuda_stream); -template void CalScatterNdUpdate(const size_t &unit_size, const size_t &num_units, - const size_t &index_depth, const int32_t *out_strides, - const int32_t *indices, const bool *updates, bool *input, - cudaStream_t cuda_stream); diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py index 6ec5981485e..33fb685809c 100755 --- a/mindspore/ops/operations/array_ops.py +++ b/mindspore/ops/operations/array_ops.py @@ -4479,7 +4479,7 @@ class ScatterNdAdd(_ScatterNdOp): ValueError: If the shape of `updates` is not equal to `indices_shape[:-1] + x_shape[indices_shape[-1]:]`. Supported Platforms: - ``Ascend`` + ``Ascend`` ``GPU`` Examples: >>> input_x = Parameter(Tensor(np.array([1, 2, 3, 4, 5, 6, 7, 8]), mindspore.float32), name="x") @@ -4556,7 +4556,7 @@ class ScatterNdSub(_ScatterNdOp): ValueError: If the shape of `updates` is not equal to `indices_shape[:-1] + x_shape[indices_shape[-1]:]`. Supported Platforms: - ``Ascend`` + ``Ascend`` ``GPU`` Examples: >>> input_x = Parameter(Tensor(np.array([1, 2, 3, 4, 5, 6, 7, 8]), mindspore.float32), name="x") diff --git a/tests/st/ops/gpu/test_scatter_nd_func_op.py b/tests/st/ops/gpu/test_scatter_nd_func_op.py new file mode 100644 index 00000000000..abe060c3f14 --- /dev/null +++ b/tests/st/ops/gpu/test_scatter_nd_func_op.py @@ -0,0 +1,234 @@ +# Copyright 2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import pytest + +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor, Parameter +import mindspore.common.dtype as mstype +import mindspore.ops as ops + +context.set_context(mode=context.GRAPH_MODE, device_target="GPU") + +func_map = { + "update": ops.ScatterNdUpdate, + "add": ops.ScatterNdAdd, + "sub": ops.ScatterNdSub, +} + + +class TestScatterNdFuncNet(nn.Cell): + def __init__(self, func, lock, inputx, indices, updates): + super(TestScatterNdFuncNet, self).__init__() + + self.scatter_func = func_map[func](use_locking=lock) + self.inputx = Parameter(inputx, name="inputx") + self.indices = Parameter(indices, name="indices") + self.updates = Parameter(updates, name="updates") + + def construct(self): + out = self.scatter_func(self.inputx, self.indices, self.updates) + return out + + +def scatter_nd_func_net(func, inputx, indices, updates): + lock = True + net = TestScatterNdFuncNet(func, lock, inputx, indices, updates) + return net() + + +def scatter_nd_func_use_locking_false_net(func, inputx, indices, updates): + lock = False + net = TestScatterNdFuncNet(func, lock, inputx, indices, updates) + return net() + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_traning +@pytest.mark.env_onecard +def test_scatter_nd_func_small_float32(): + inputx = Tensor(np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]]), mstype.float32) + indices = Tensor(np.array([[0, 0], [1, 1]]), mstype.int32) + updates = Tensor(np.array([1.0, 2.2]), mstype.float32) + + # update + output = scatter_nd_func_net("update", inputx, indices, updates) + expected = np.array([[1.0, 0.3, 3.6], [0.4, 2.2, -3.2]]) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # add + output = scatter_nd_func_net("add", inputx, indices, updates) + expected = np.array([[0.9, 0.3, 3.6], [0.4, 2.7, -3.2]]) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # sub + output = scatter_nd_func_net("sub", inputx, indices, updates) + expected = np.array([[-1.1, 0.3, 3.6], [0.4, -1.7, -3.2]]) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_scatter_nd_func_input_updated(): + inputx = Tensor(np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]]), mstype.float32) + indices = Tensor(np.array([[0, 0], [1, 1]]), mstype.int32) + updates = Tensor(np.array([1.0, 2.2]), mstype.float32) + lock = True + + # update + net = TestScatterNdFuncNet("update", lock, inputx, indices, updates) + net() + expected = np.array([[1.0, 0.3, 3.6], [0.4, 2.2, -3.2]]) + np.testing.assert_array_almost_equal(net.inputx.asnumpy(), expected) + + # add + net = TestScatterNdFuncNet("add", lock, inputx, indices, updates) + net() + expected = np.array([[0.9, 0.3, 3.6], [0.4, 2.7, -3.2]]) + np.testing.assert_array_almost_equal(net.inputx.asnumpy(), expected) + + # sub + net = TestScatterNdFuncNet("sub", lock, inputx, indices, updates) + net() + expected = np.array([[-1.1, 0.3, 3.6], [0.4, -1.7, -3.2]]) + np.testing.assert_array_almost_equal(net.inputx.asnumpy(), expected) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_traning +@pytest.mark.env_onecard +def test_scatter_nd_func_small_float32_using_locking_false(): + inputx = Tensor(np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]]), mstype.float32) + indices = Tensor(np.array([[0, 0], [1, 1]]), mstype.int32) + updates = Tensor(np.array([1.0, 2.2]), mstype.float32) + + # update + output = scatter_nd_func_use_locking_false_net("update", inputx, indices, updates) + expected = np.array([[1.0, 0.3, 3.6], [0.4, 2.2, -3.2]]) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # add + output = scatter_nd_func_use_locking_false_net("add", inputx, indices, updates) + expected = np.array([[0.9, 0.3, 3.6], [0.4, 2.7, -3.2]]) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # sub + output = scatter_nd_func_use_locking_false_net("sub", inputx, indices, updates) + expected = np.array([[-1.1, 0.3, 3.6], [0.4, -1.7, -3.2]]) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_traning +@pytest.mark.env_onecard +def test_scatter_nd_func_small_int32(): + inputx = Tensor(np.array([1, 2, 3, 4, 5, 6, 7, 8]), mstype.float32) + indices = Tensor(np.array([[4], [3], [1], [7]]), mstype.int32) + updates = Tensor(np.array([9, 10, 11, 12]), mstype.float32) + + # update + output = scatter_nd_func_net("update", inputx, indices, updates) + expected = np.array([1, 11, 3, 10, 9, 6, 7, 12]) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # add + output = scatter_nd_func_net("add", inputx, indices, updates) + expected = np.array([1, 13, 3, 14, 14, 6, 7, 20]) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # sub + output = scatter_nd_func_net("sub", inputx, indices, updates) + expected = np.array([1, -9, 3, -6, -4, 6, 7, -4]) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_traning +@pytest.mark.env_onecard +def test_scatter_nd_func_multi_dims(): + inputx = Tensor(np.zeros((4, 4, 4)), mstype.float32) + indices = Tensor(np.array([[0], [2]]), mstype.int32) + updates = Tensor( + np.array( + [ + [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], + [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], + ] + ), + mstype.float32, + ) + + # update + output = scatter_nd_func_net("update", inputx, indices, updates) + expected = np.array( + [ + [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], + [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], + [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], + [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], + ] + ) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # add + output = scatter_nd_func_net("add", inputx, indices, updates) + expected = np.array( + [ + [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], + [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], + [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], + [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], + ] + ) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # sub + output = scatter_nd_func_net("sub", inputx, indices, updates) + expected = np.array( + [ + [[-5, -5, -5, -5], [-6, -6, -6, -6], [-7, -7, -7, -7], [-8, -8, -8, -8]], + [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], + [[-5, -5, -5, -5], [-6, -6, -6, -6], [-7, -7, -7, -7], [-8, -8, -8, -8]], + [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], + ] + ) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_traning +@pytest.mark.env_onecard +def test_scatter_nd_func_one_value(): + inputx = Tensor(np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]]), mstype.float32) + indices = Tensor(np.array([[0, 1]]), mstype.int32) + updates = Tensor(np.array([1.0]), mstype.float32) + + # update + output = scatter_nd_func_net("update", inputx, indices, updates) + expected = np.array([[-0.1, 1.0, 3.6], [0.4, 0.5, -3.2]]) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # add + output = scatter_nd_func_net("add", inputx, indices, updates) + expected = np.array([[-0.1, 1.3, 3.6], [0.4, 0.5, -3.2]]) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # sub + output = scatter_nd_func_net("sub", inputx, indices, updates) + expected = np.array([[-0.1, -0.7, 3.6], [0.4, 0.5, -3.2]]) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) diff --git a/tests/st/ops/gpu/test_scatter_nd_update_op.py b/tests/st/ops/gpu/test_scatter_nd_update_op.py deleted file mode 100644 index c993cc06cb6..00000000000 --- a/tests/st/ops/gpu/test_scatter_nd_update_op.py +++ /dev/null @@ -1,131 +0,0 @@ -# Copyright 2021 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import numpy as np -import pytest - -import mindspore.context as context -import mindspore.nn as nn -from mindspore import Tensor, Parameter -import mindspore.common.dtype as mstype -import mindspore.ops as ops - -context.set_context(mode=context.GRAPH_MODE, device_target="GPU") - - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_traning -@pytest.mark.env_onecard -def test_op1(): - class ScatterNdUpdate(nn.Cell): - def __init__(self): - super(ScatterNdUpdate, self).__init__() - self.scatter_nd_update = ops.ScatterNdUpdate() - self.x = Parameter( - Tensor(np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]]), mstype.float32), name="x" - ) - - def construct(self, indices, update): - return self.scatter_nd_update(self.x, indices, update) - - indices = Tensor(np.array([[0, 0], [1, 1]]), mstype.int32) - update = Tensor(np.array([1.0, 2.2]), mstype.float32) - - scatter_nd_update = ScatterNdUpdate() - scatter_nd_update(indices, update) - expect = [[1.0, 0.3, 3.6], [0.4, 2.2, -3.2]] - assert np.allclose(scatter_nd_update.x.data.asnumpy(), np.array(expect, np.float)) - - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_traning -@pytest.mark.env_onecard -def test_op2(): - class ScatterNdUpdate(nn.Cell): - def __init__(self): - super(ScatterNdUpdate, self).__init__() - self.scatter_nd_update = ops.ScatterNdUpdate() - self.x = Parameter(Tensor(np.array([1, 2, 3, 4, 5, 6, 7, 8]), mstype.float32), name="x") - - def construct(self, indices, update): - return self.scatter_nd_update(self.x, indices, update) - - indices = Tensor(np.array([[4], [3], [1], [7]]), mstype.int32) - update = Tensor(np.array([9, 10, 11, 12]), mstype.float32) - - scatter_nd_update = ScatterNdUpdate() - scatter_nd_update(indices, update) - expect = [1, 11, 3, 10, 9, 6, 7, 12] - assert np.allclose(scatter_nd_update.x.data.asnumpy(), np.array(expect, dtype=float)) - - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_traning -@pytest.mark.env_onecard -def test_op3(): - class ScatterNdUpdate(nn.Cell): - def __init__(self): - super(ScatterNdUpdate, self).__init__() - self.scatter_nd_update = ops.ScatterNdUpdate() - self.x = Parameter(Tensor(np.zeros((4, 4, 4)), mstype.float32), name="x") - - def construct(self, indices, update): - return self.scatter_nd_update(self.x, indices, update) - - indices = Tensor(np.array([[0], [2]]), mstype.int32) - update = Tensor( - np.array( - [ - [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], - [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], - ] - ), - mstype.float32, - ) - - scatter_nd_update = ScatterNdUpdate() - scatter_nd_update(indices, update) - expect = [ - [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], - [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], - [[5, 5, 5, 5], [6, 6, 6, 6], [7, 7, 7, 7], [8, 8, 8, 8]], - [[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]], - ] - assert np.allclose(scatter_nd_update.x.data.asnumpy(), np.array(expect, dtype=float)) - - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_traning -@pytest.mark.env_onecard -def test_op4(): - class ScatterNdUpdate(nn.Cell): - def __init__(self): - super(ScatterNdUpdate, self).__init__() - self.scatter_nd_update = ops.ScatterNdUpdate() - self.x = Parameter( - Tensor(np.array([[-0.1, 0.3, 3.6], [0.4, 0.5, -3.2]]), mstype.float32), name="x" - ) - - def construct(self, indices, update): - return self.scatter_nd_update(self.x, indices, update) - - indices = Tensor(np.array([[0, 1]]), mstype.int32) - update = Tensor(np.array([1.0]), mstype.float32) - - scatter_nd_update = ScatterNdUpdate() - out = scatter_nd_update(indices, update) - assert np.allclose(out.asnumpy(), scatter_nd_update.x.data.asnumpy()) - expect = [[-0.1, 1.0, 3.6], [0.4, 0.5, -3.2]] - assert np.allclose(out.asnumpy(), np.array(expect, np.float))