From 201f85f6364635c0ad74e08308d470a28b3d7a60 Mon Sep 17 00:00:00 2001 From: xcnick Date: Sun, 6 Jun 2021 22:51:13 +0800 Subject: [PATCH] Add scattersub op for gpu --- .../gpu/arrays/scatter_add_gpu_kernel.cc | 57 -- .../gpu/arrays/scatter_functor_gpu_kernel.cc | 237 ++++++ ..._kernel.h => scatter_functor_gpu_kernel.h} | 47 +- .../gpu/arrays/scatter_update_gpu_kernel.cc | 57 -- .../gpu/arrays/scatter_update_gpu_kernel.h | 109 --- .../gpu/cuda_impl/scatter_add_impl.cu | 49 -- .../gpu/cuda_impl/scatter_functor_impl.cu | 103 +++ ..._add_impl.cuh => scatter_functor_impl.cuh} | 21 +- .../gpu/cuda_impl/scatter_update_impl.cu | 48 -- .../gpu/cuda_impl/scatter_update_impl.cuh | 26 - mindspore/core/abstract/infer_functions.h | 2 + mindspore/core/abstract/prim_arrays.cc | 15 + .../core/abstract/primitive_infer_map.cc | 1 + mindspore/core/base/core_ops.h | 1 + mindspore/ops/operations/array_ops.py | 9 +- tests/st/ops/gpu/test_scatter_add_op.py | 338 -------- tests/st/ops/gpu/test_scatter_func_op.py | 720 ++++++++++++++++++ tests/st/ops/gpu/test_scatter_update_op.py | 359 --------- 18 files changed, 1134 insertions(+), 1065 deletions(-) delete mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_add_gpu_kernel.cc create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_functor_gpu_kernel.cc rename mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/{scatter_add_gpu_kernel.h => scatter_functor_gpu_kernel.h} (73%) delete mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_update_gpu_kernel.cc delete mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_update_gpu_kernel.h delete mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_add_impl.cu create mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_functor_impl.cu rename mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/{scatter_add_impl.cuh => scatter_functor_impl.cuh} (62%) delete mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_update_impl.cu delete mode 100644 mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_update_impl.cuh delete mode 100644 tests/st/ops/gpu/test_scatter_add_op.py create mode 100644 tests/st/ops/gpu/test_scatter_func_op.py delete mode 100644 tests/st/ops/gpu/test_scatter_update_op.py diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_add_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_add_gpu_kernel.cc deleted file mode 100644 index 9051d617f00..00000000000 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_add_gpu_kernel.cc +++ /dev/null @@ -1,57 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "backend/kernel_compiler/gpu/arrays/scatter_add_gpu_kernel.h" - -namespace mindspore { -namespace kernel { -MS_REG_GPU_KERNEL_ONE(ScatterAdd, - KernelAttr() - .AddInputAttr(kNumberTypeFloat32) - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeFloat32) - .AddOutputAttr(kNumberTypeFloat32), - ScatterAddKernel, float) -MS_REG_GPU_KERNEL_ONE(ScatterAdd, - KernelAttr() - .AddInputAttr(kNumberTypeFloat16) - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeFloat16) - .AddOutputAttr(kNumberTypeFloat16), - ScatterAddKernel, half) -MS_REG_GPU_KERNEL_ONE(ScatterAdd, - KernelAttr() - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeInt32) - .AddOutputAttr(kNumberTypeInt32), - ScatterAddKernel, int) -MS_REG_GPU_KERNEL_ONE(ScatterAdd, - KernelAttr() - .AddInputAttr(kNumberTypeInt8) - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeInt8) - .AddOutputAttr(kNumberTypeInt8), - ScatterAddKernel, int8_t) -MS_REG_GPU_KERNEL_ONE(ScatterAdd, - KernelAttr() - .AddInputAttr(kNumberTypeUInt8) - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeUInt8) - .AddOutputAttr(kNumberTypeUInt8), - ScatterAddKernel, uint8_t) -} // namespace kernel -} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_functor_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_functor_gpu_kernel.cc new file mode 100644 index 00000000000..0d7f89e3421 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_functor_gpu_kernel.cc @@ -0,0 +1,237 @@ +/** + * Copyright 2020-2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/arrays/scatter_functor_gpu_kernel.h" + +namespace mindspore { +namespace kernel { +// ScatterUpdate +MS_REG_GPU_KERNEL_TWO(ScatterUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + ScatterFunctorKernel, float, int) +MS_REG_GPU_KERNEL_TWO(ScatterUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + ScatterFunctorKernel, float, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16), + ScatterFunctorKernel, half, int) +MS_REG_GPU_KERNEL_TWO(ScatterUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16), + ScatterFunctorKernel, half, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32), + ScatterFunctorKernel, int, int) +MS_REG_GPU_KERNEL_TWO(ScatterUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32), + ScatterFunctorKernel, int, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeInt8) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt8) + .AddOutputAttr(kNumberTypeInt8), + ScatterFunctorKernel, int8_t, int) +MS_REG_GPU_KERNEL_TWO(ScatterUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt8) + .AddOutputAttr(kNumberTypeInt8), + ScatterFunctorKernel, int8_t, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeUInt8) + .AddOutputAttr(kNumberTypeUInt8), + ScatterFunctorKernel, uint8_t, int) +MS_REG_GPU_KERNEL_TWO(ScatterUpdate, + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeUInt8) + .AddOutputAttr(kNumberTypeUInt8), + ScatterFunctorKernel, uint8_t, int64_t) + +// ScatterAdd +MS_REG_GPU_KERNEL_TWO(ScatterAdd, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + ScatterFunctorKernel, float, int) +MS_REG_GPU_KERNEL_TWO(ScatterAdd, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + ScatterFunctorKernel, float, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterAdd, + KernelAttr() + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16), + ScatterFunctorKernel, half, int) +MS_REG_GPU_KERNEL_TWO(ScatterAdd, + KernelAttr() + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16), + ScatterFunctorKernel, half, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterAdd, + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32), + ScatterFunctorKernel, int, int) +MS_REG_GPU_KERNEL_TWO(ScatterAdd, + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32), + ScatterFunctorKernel, int, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterAdd, + KernelAttr() + .AddInputAttr(kNumberTypeInt8) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt8) + .AddOutputAttr(kNumberTypeInt8), + ScatterFunctorKernel, int8_t, int) +MS_REG_GPU_KERNEL_TWO(ScatterAdd, + KernelAttr() + .AddInputAttr(kNumberTypeInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt8) + .AddOutputAttr(kNumberTypeInt8), + ScatterFunctorKernel, int8_t, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterAdd, + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeUInt8) + .AddOutputAttr(kNumberTypeUInt8), + ScatterFunctorKernel, uint8_t, int) +MS_REG_GPU_KERNEL_TWO(ScatterAdd, + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeUInt8) + .AddOutputAttr(kNumberTypeUInt8), + ScatterFunctorKernel, uint8_t, int64_t) + +// ScatterSub +MS_REG_GPU_KERNEL_TWO(ScatterSub, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + ScatterFunctorKernel, float, int) +MS_REG_GPU_KERNEL_TWO(ScatterSub, + KernelAttr() + .AddInputAttr(kNumberTypeFloat32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeFloat32) + .AddOutputAttr(kNumberTypeFloat32), + ScatterFunctorKernel, float, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterSub, + KernelAttr() + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16), + ScatterFunctorKernel, half, int) +MS_REG_GPU_KERNEL_TWO(ScatterSub, + KernelAttr() + .AddInputAttr(kNumberTypeFloat16) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeFloat16) + .AddOutputAttr(kNumberTypeFloat16), + ScatterFunctorKernel, half, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterSub, + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32), + ScatterFunctorKernel, int, int) +MS_REG_GPU_KERNEL_TWO(ScatterSub, + KernelAttr() + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt32) + .AddOutputAttr(kNumberTypeInt32), + ScatterFunctorKernel, int, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterSub, + KernelAttr() + .AddInputAttr(kNumberTypeInt8) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeInt8) + .AddOutputAttr(kNumberTypeInt8), + ScatterFunctorKernel, int8_t, int) +MS_REG_GPU_KERNEL_TWO(ScatterSub, + KernelAttr() + .AddInputAttr(kNumberTypeInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeInt8) + .AddOutputAttr(kNumberTypeInt8), + ScatterFunctorKernel, int8_t, int64_t) +MS_REG_GPU_KERNEL_TWO(ScatterSub, + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt32) + .AddInputAttr(kNumberTypeUInt8) + .AddOutputAttr(kNumberTypeUInt8), + ScatterFunctorKernel, uint8_t, int) +MS_REG_GPU_KERNEL_TWO(ScatterSub, + KernelAttr() + .AddInputAttr(kNumberTypeUInt8) + .AddInputAttr(kNumberTypeInt64) + .AddInputAttr(kNumberTypeUInt8) + .AddOutputAttr(kNumberTypeUInt8), + ScatterFunctorKernel, uint8_t, int64_t) +} // namespace kernel +} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_add_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_functor_gpu_kernel.h similarity index 73% rename from mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_add_gpu_kernel.h rename to mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_functor_gpu_kernel.h index fd1ada10579..e74fa433e7c 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_add_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_functor_gpu_kernel.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,21 +14,30 @@ * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_ADD_GPU_KERNEL_H_ -#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_ADD_GPU_KERNEL_H_ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_FUNCTOR_GPU_KERNEL_H_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_FUNCTOR_GPU_KERNEL_H_ #include +#include +#include #include "backend/kernel_compiler/gpu/gpu_kernel.h" #include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" -#include "backend/kernel_compiler/gpu/cuda_impl/scatter_add_impl.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/scatter_functor_impl.cuh" namespace mindspore { namespace kernel { -template -class ScatterAddKernel : public GpuKernel { + +static const std::map kScatterFunctorTypeMap = { + {"ScatterUpdate", SCATTER_FUNC_UPDATE}, + {"ScatterAdd", SCATTER_FUNC_ADD}, + {"ScatterSub", SCATTER_FUNC_SUB}, +}; + +template +class ScatterFunctorKernel : public GpuKernel { public: - ScatterAddKernel() { ResetResource(); } - ~ScatterAddKernel() override = default; + ScatterFunctorKernel() { ResetResource(); } + ~ScatterFunctorKernel() override = default; const std::vector &GetInputSizeList() const override { return input_size_list_; } const std::vector &GetOutputSizeList() const override { return output_size_list_; } @@ -37,10 +46,12 @@ class ScatterAddKernel : public GpuKernel { bool Launch(const std::vector &inputs, const std::vector &workspace, const std::vector &outputs, void *stream_ptr) override { T *input = GetDeviceAddress(inputs, 0); - int *indices = GetDeviceAddress(inputs, 1); + S *indices = GetDeviceAddress(inputs, 1); T *updates = GetDeviceAddress(inputs, 2); T *output = GetDeviceAddress(outputs, 0); - CalScatterAdd(inner_size_, indices_size_, indices, updates, input, reinterpret_cast(stream_ptr)); + + ScatterFunc(scatter_functor_type_, inner_size_, indices_size_, indices, updates, input, + reinterpret_cast(stream_ptr)); CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, cudaMemcpyAsync(&output[0], &input[0], input_size_ * sizeof(T), cudaMemcpyDeviceToDevice, reinterpret_cast(stream_ptr)), @@ -49,15 +60,22 @@ class ScatterAddKernel : public GpuKernel { } bool Init(const CNodePtr &kernel_node) override { + std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node); + auto iter = kScatterFunctorTypeMap.find(kernel_name); + if (iter == kScatterFunctorTypeMap.end()) { + MS_LOG(EXCEPTION) << "Scatter functor " << kernel_name << " is not supported."; + } else { + scatter_functor_type_ = iter->second; + } kernel_node_ = kernel_node; size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); if (input_num != 3) { - MS_LOG(ERROR) << "Input number is " << input_num << ", but ScatterAdd needs 3 inputs."; + MS_LOG(ERROR) << "Input number is " << input_num << ", but " << kernel_name << " needs 3 inputs."; return false; } size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); if (output_num != 1) { - MS_LOG(ERROR) << "Output number is " << output_num << ", but ScatterAdd has 1 output."; + MS_LOG(ERROR) << "Output number is " << output_num << ", but " << kernel_name << " has 1 output."; return false; } auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); @@ -90,12 +108,13 @@ class ScatterAddKernel : public GpuKernel { protected: void InitSizeLists() override { input_size_list_.push_back(input_size_ * sizeof(T)); - input_size_list_.push_back(indices_size_ * sizeof(int)); + input_size_list_.push_back(indices_size_ * sizeof(S)); input_size_list_.push_back(updates_size_ * sizeof(T)); output_size_list_.push_back(input_size_ * sizeof(T)); } private: + ScatterFunctorType scatter_functor_type_; size_t input_size_; size_t inner_size_; size_t indices_size_; @@ -106,4 +125,4 @@ class ScatterAddKernel : public GpuKernel { }; } // namespace kernel } // namespace mindspore -#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_ADD_GPU_KERNEL_H_ +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_FUNCTOR_GPU_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_update_gpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_update_gpu_kernel.cc deleted file mode 100644 index d990d1e0926..00000000000 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_update_gpu_kernel.cc +++ /dev/null @@ -1,57 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "backend/kernel_compiler/gpu/arrays/scatter_update_gpu_kernel.h" - -namespace mindspore { -namespace kernel { -MS_REG_GPU_KERNEL_ONE(ScatterUpdate, - KernelAttr() - .AddInputAttr(kNumberTypeFloat32) - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeFloat32) - .AddOutputAttr(kNumberTypeFloat32), - ScatterUpdateKernel, float) -MS_REG_GPU_KERNEL_ONE(ScatterUpdate, - KernelAttr() - .AddInputAttr(kNumberTypeFloat16) - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeFloat16) - .AddOutputAttr(kNumberTypeFloat16), - ScatterUpdateKernel, half) -MS_REG_GPU_KERNEL_ONE(ScatterUpdate, - KernelAttr() - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeInt32) - .AddOutputAttr(kNumberTypeInt32), - ScatterUpdateKernel, int) -MS_REG_GPU_KERNEL_ONE(ScatterUpdate, - KernelAttr() - .AddInputAttr(kNumberTypeInt8) - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeInt8) - .AddOutputAttr(kNumberTypeInt8), - ScatterUpdateKernel, int8_t) -MS_REG_GPU_KERNEL_ONE(ScatterUpdate, - KernelAttr() - .AddInputAttr(kNumberTypeUInt8) - .AddInputAttr(kNumberTypeInt32) - .AddInputAttr(kNumberTypeUInt8) - .AddOutputAttr(kNumberTypeUInt8), - ScatterUpdateKernel, uint8_t) -} // namespace kernel -} // namespace mindspore diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_update_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_update_gpu_kernel.h deleted file mode 100644 index b5b673999cf..00000000000 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/scatter_update_gpu_kernel.h +++ /dev/null @@ -1,109 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_UPDATE_GPU_KERNEL_H_ -#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_UPDATE_GPU_KERNEL_H_ - -#include -#include "backend/kernel_compiler/gpu/gpu_kernel.h" -#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h" -#include "backend/kernel_compiler/gpu/cuda_impl/scatter_update_impl.cuh" - -namespace mindspore { -namespace kernel { -template -class ScatterUpdateKernel : public GpuKernel { - public: - ScatterUpdateKernel() { ResetResource(); } - ~ScatterUpdateKernel() override = default; - - const std::vector &GetInputSizeList() const override { return input_size_list_; } - const std::vector &GetOutputSizeList() const override { return output_size_list_; } - const std::vector &GetWorkspaceSizeList() const override { return workspace_size_list_; } - - bool Launch(const std::vector &inputs, const std::vector &workspace, - const std::vector &outputs, void *stream_ptr) override { - T *input = GetDeviceAddress(inputs, 0); - int *indices = GetDeviceAddress(inputs, 1); - T *updates = GetDeviceAddress(inputs, 2); - T *output = GetDeviceAddress(outputs, 0); - CalScatterUpdate(inner_size_, indices_size_, indices, updates, input, reinterpret_cast(stream_ptr)); - CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_, - cudaMemcpyAsync(&output[0], &input[0], input_size_ * sizeof(T), cudaMemcpyDeviceToDevice, - reinterpret_cast(stream_ptr)), - "cudaMemcpyAsync output failed"); - return true; - } - - bool Init(const CNodePtr &kernel_node) override { - kernel_node_ = kernel_node; - size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node); - if (input_num != 3) { - MS_LOG(ERROR) << "Input number is " << input_num << ", but ScatterUpdate needs 3 inputs."; - return false; - } - size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node); - if (output_num != 1) { - MS_LOG(ERROR) << "Output number is " << output_num << ", but ScatterUpdate has 1 output."; - return false; - } - auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); - input_size_ = 1; - inner_size_ = 1; - for (size_t i = 1; i < input_shape.size(); i++) { - inner_size_ *= input_shape[i]; - } - input_size_ = input_shape[0] * inner_size_; - auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1); - indices_size_ = 1; - for (size_t i = 0; i < indices_shape.size(); i++) { - indices_size_ *= indices_shape[i]; - } - updates_size_ = indices_size_ * inner_size_; - InitSizeLists(); - return true; - } - - void ResetResource() noexcept override { - input_size_ = 0; - inner_size_ = 0; - indices_size_ = 0; - updates_size_ = 0; - input_size_list_.clear(); - output_size_list_.clear(); - workspace_size_list_.clear(); - } - - protected: - void InitSizeLists() override { - input_size_list_.push_back(input_size_ * sizeof(T)); - input_size_list_.push_back(indices_size_ * sizeof(int)); - input_size_list_.push_back(updates_size_ * sizeof(T)); - output_size_list_.push_back(input_size_ * sizeof(T)); - } - - private: - size_t input_size_; - size_t inner_size_; - size_t indices_size_; - size_t updates_size_; - std::vector input_size_list_; - std::vector output_size_list_; - std::vector workspace_size_list_; -}; -} // namespace kernel -} // namespace mindspore -#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_UPDATE_GPU_KERNEL_H_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_add_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_add_impl.cu deleted file mode 100644 index 05bcf15610a..00000000000 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_add_impl.cu +++ /dev/null @@ -1,49 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "backend/kernel_compiler/gpu/cuda_impl/util.cuh" -#include "backend/kernel_compiler/gpu/cuda_impl/scatter_add_impl.cuh" - -template -__global__ void ScatterAdd(const size_t inner_size, const size_t updates_size, const int *indices, const T *updates, - T *input) { - for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < updates_size; pos += blockDim.x * gridDim.x) { - const size_t index = pos / inner_size; - const size_t offset = pos % inner_size; - const size_t current_pos = indices[index] * inner_size + offset; - MsAtomicAdd(&input[current_pos], updates[pos]); - } -} - -template -void CalScatterAdd(const size_t &inner_size, const size_t &indices_size, const int *indices, const T *updates, T *input, - cudaStream_t cuda_stream) { - const size_t updates_size = inner_size * indices_size; - ScatterAdd<<>>(inner_size, updates_size, indices, updates, - input); -} - -template void CalScatterAdd(const size_t &inner_size, const size_t &indices_size, const int *indices, - const float *updates, float *input, cudaStream_t cuda_stream); -template void CalScatterAdd(const size_t &inner_size, const size_t &indices_size, const int *indices, - const half *updates, half *input, cudaStream_t cuda_stream); -template void CalScatterAdd(const size_t &inner_size, const size_t &indices_size, const int *indices, - const int *updates, int *input, cudaStream_t cuda_stream); -template void CalScatterAdd(const size_t &inner_size, const size_t &indices_size, const int *indices, - const unsigned char *updates, unsigned char *input, - cudaStream_t cuda_stream); -template void CalScatterAdd(const size_t &inner_size, const size_t &indices_size, const int *indices, - const int8_t *updates, int8_t *input, cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_functor_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_functor_impl.cu new file mode 100644 index 00000000000..58d31e1fee8 --- /dev/null +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_functor_impl.cu @@ -0,0 +1,103 @@ +/** + * Copyright 2020-2021 Huawei Technologies Co., Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "backend/kernel_compiler/gpu/cuda_impl/util.cuh" +#include "backend/kernel_compiler/gpu/cuda_impl/scatter_functor_impl.cuh" + +template +__global__ void ScatterUpdateKernel(const size_t inner_size, const size_t updates_size, const S *indices, + const T *updates, T *input) { + for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < updates_size; pos += blockDim.x * gridDim.x) { + const size_t index = pos / inner_size; + const size_t offset = pos % inner_size; + const size_t current_pos = indices[index] * inner_size + offset; + input[current_pos] = updates[pos]; + } +} + +template +__global__ void ScatterAddKernel(const size_t inner_size, const size_t updates_size, const S *indices, const T *updates, + T *input) { + for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < updates_size; pos += blockDim.x * gridDim.x) { + const size_t index = pos / inner_size; + const size_t offset = pos % inner_size; + const size_t current_pos = indices[index] * inner_size + offset; + MsAtomicAdd(&input[current_pos], updates[pos]); + } +} + +template +__global__ void ScatterSubKernel(const size_t inner_size, const size_t updates_size, const S *indices, const T *updates, + T *input) { + for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < updates_size; pos += blockDim.x * gridDim.x) { + const size_t index = pos / inner_size; + const size_t offset = pos % inner_size; + const size_t current_pos = indices[index] * inner_size + offset; + MsAtomicAdd(&input[current_pos], -updates[pos]); + } +} + +template +void ScatterFunc(enum ScatterFunctorType func_type, const size_t &inner_size, const size_t &indices_size, + const S *indices, const T *updates, T *input, cudaStream_t cuda_stream) { + const size_t updates_size = inner_size * indices_size; + switch (func_type) { + case SCATTER_FUNC_UPDATE: + return ScatterUpdateKernel<<>>(inner_size, updates_size, + indices, updates, input); + case SCATTER_FUNC_ADD: + return ScatterAddKernel<<>>(inner_size, updates_size, + indices, updates, input); + case SCATTER_FUNC_SUB: + return ScatterSubKernel<<>>(inner_size, updates_size, + indices, updates, input); + default: + break; + } +} + +template void ScatterFunc(enum ScatterFunctorType func_type, const size_t &inner_size, + const size_t &indices_size, const int *indices, const float *updates, + float *input, cudaStream_t cuda_stream); +template void ScatterFunc(enum ScatterFunctorType func_type, const size_t &inner_size, + const size_t &indices_size, const int64_t *indices, const float *updates, + float *input, cudaStream_t cuda_stream); +template void ScatterFunc(enum ScatterFunctorType func_type, const size_t &inner_size, + const size_t &indices_size, const int *indices, const half *updates, half *input, + cudaStream_t cuda_stream); +template void ScatterFunc(enum ScatterFunctorType func_type, const size_t &inner_size, + const size_t &indices_size, const int64_t *indices, const half *updates, + half *input, cudaStream_t cuda_stream); +template void ScatterFunc(enum ScatterFunctorType func_type, const size_t &inner_size, + const size_t &indices_size, const int *indices, const int *updates, int *input, + cudaStream_t cuda_stream); +template void ScatterFunc(enum ScatterFunctorType func_type, const size_t &inner_size, + const size_t &indices_size, const int64_t *indices, const int *updates, + int *input, cudaStream_t cuda_stream); +template void ScatterFunc(enum ScatterFunctorType func_type, const size_t &inner_size, + const size_t &indices_size, const int *indices, + const unsigned char *updates, unsigned char *input, + cudaStream_t cuda_stream); +template void ScatterFunc(enum ScatterFunctorType func_type, const size_t &inner_size, + const size_t &indices_size, const int64_t *indices, + const unsigned char *updates, unsigned char *input, + cudaStream_t cuda_stream); +template void ScatterFunc(enum ScatterFunctorType func_type, const size_t &inner_size, + const size_t &indices_size, const int *indices, const int8_t *updates, + int8_t *input, cudaStream_t cuda_stream); +template void ScatterFunc(enum ScatterFunctorType func_type, const size_t &inner_size, + const size_t &indices_size, const int64_t *indices, const int8_t *updates, + int8_t *input, cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_add_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_functor_impl.cuh similarity index 62% rename from mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_add_impl.cuh rename to mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_functor_impl.cuh index 1c54816563a..f33da6f7da4 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_add_impl.cuh +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_functor_impl.cuh @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,13 +14,20 @@ * limitations under the License. */ -#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_ADD_IMPL_CUH_ -#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_ADD_IMPL_CUH_ +#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_FUNCTOR_IMPL_CUH_ +#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_FUNCTOR_IMPL_CUH_ #include "runtime/device/gpu/cuda_common.h" -template -void CalScatterAdd(const size_t &inner_size, const size_t &indices_size, const int *indices, const T *updates, T *input, - cudaStream_t cuda_stream); +enum ScatterFunctorType { + SCATTER_FUNC_UPDATE = 0, + SCATTER_FUNC_ADD, + SCATTER_FUNC_SUB, + SCATTER_FUNC_INVALID_TYPE = 255 +}; -#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_ADD_IMPL_CUH_ +template +void ScatterFunc(enum ScatterFunctorType func_type, const size_t &inner_size, const size_t &indices_size, + const S *indices, const T *updates, T *input, cudaStream_t cuda_stream); + +#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_FUNCTOR_IMPL_CUH_ diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_update_impl.cu b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_update_impl.cu deleted file mode 100644 index b93c4a68b4c..00000000000 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_update_impl.cu +++ /dev/null @@ -1,48 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include "backend/kernel_compiler/gpu/cuda_impl/scatter_update_impl.cuh" - -template -__global__ void ScatterUpdate(const size_t inner_size, const size_t updates_size, const int *indices, const T *updates, - T *input) { - for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < updates_size; pos += blockDim.x * gridDim.x) { - const size_t index = pos / inner_size; - const size_t offset = pos % inner_size; - const size_t current_pos = indices[index] * inner_size + offset; - input[current_pos] = updates[pos]; - } -} - -template -void CalScatterUpdate(const size_t &inner_size, const size_t &indices_size, const int *indices, const T *updates, - T *input, cudaStream_t cuda_stream) { - const size_t updates_size = inner_size * indices_size; - ScatterUpdate<<>>(inner_size, updates_size, indices, updates, - input); -} - -template void CalScatterUpdate(const size_t &inner_size, const size_t &indices_size, const int *indices, - const float *updates, float *input, cudaStream_t cuda_stream); -template void CalScatterUpdate(const size_t &inner_size, const size_t &indices_size, const int *indices, - const half *updates, half *input, cudaStream_t cuda_stream); -template void CalScatterUpdate(const size_t &inner_size, const size_t &indices_size, const int *indices, - const int *updates, int *input, cudaStream_t cuda_stream); -template void CalScatterUpdate(const size_t &inner_size, const size_t &indices_size, const int *indices, - const unsigned char *updates, unsigned char *input, - cudaStream_t cuda_stream); -template void CalScatterUpdate(const size_t &inner_size, const size_t &indices_size, const int *indices, - const int8_t *updates, int8_t *input, cudaStream_t cuda_stream); diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_update_impl.cuh b/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_update_impl.cuh deleted file mode 100644 index 94e1b31d478..00000000000 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/cuda_impl/scatter_update_impl.cuh +++ /dev/null @@ -1,26 +0,0 @@ -/** - * Copyright 2020 Huawei Technologies Co., Ltd - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_UPDATE_IMPL_CUH_ -#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_UPDATE_IMPL_CUH_ - -#include "runtime/device/gpu/cuda_common.h" - -template -void CalScatterUpdate(const size_t &inner_size, const size_t &indices_size, const int *indices, const T *updates, - T *input, cudaStream_t cuda_stream); - -#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_UPDATE_IMPL_CUH_ diff --git a/mindspore/core/abstract/infer_functions.h b/mindspore/core/abstract/infer_functions.h index 5f06d31b8bf..d0f65b2e232 100644 --- a/mindspore/core/abstract/infer_functions.h +++ b/mindspore/core/abstract/infer_functions.h @@ -185,6 +185,8 @@ AbstractBasePtr InferImplDynamicStitch(const AnalysisEnginePtr &, const Primitiv const AbstractBasePtrList &args_spec_list); AbstractBasePtr InferImplScatterAdd(const AnalysisEnginePtr &, const PrimitivePtr &primitive, const AbstractBasePtrList &args_spec_list); +AbstractBasePtr InferImplScatterSub(const AnalysisEnginePtr &, const PrimitivePtr &primitive, + const AbstractBasePtrList &args_spec_list); AbstractBasePtr InferImplScatterUpdate(const AnalysisEnginePtr &, const PrimitivePtr &primitive, const AbstractBasePtrList &args_spec_list); AbstractBasePtr InferImplDiv(const AnalysisEnginePtr &, const PrimitivePtr &primitive, diff --git a/mindspore/core/abstract/prim_arrays.cc b/mindspore/core/abstract/prim_arrays.cc index aeeafabbbad..3f7ec79a1a0 100644 --- a/mindspore/core/abstract/prim_arrays.cc +++ b/mindspore/core/abstract/prim_arrays.cc @@ -391,6 +391,21 @@ AbstractBasePtr InferImplScatterAdd(const AnalysisEnginePtr &, const PrimitivePt return std::make_shared(x->element(), std::make_shared(shape, min_shape, max_shape)); } +AbstractBasePtr InferImplScatterSub(const AnalysisEnginePtr &, const PrimitivePtr &primitive, + const AbstractBasePtrList &args_spec_list) { + constexpr auto kScatterSubInputNum = 3; + const std::string op_name = primitive->name(); + CheckRequiredArgsSize(op_name, args_spec_list, kScatterSubInputNum); + auto x = CheckArg(op_name, args_spec_list, 0); + MS_EXCEPTION_IF_NULL(x); + MS_EXCEPTION_IF_NULL(x->shape()); + ShapeVector shape = x->shape()->shape(); + ShapeVector min_shape = x->shape()->min_shape(); + ShapeVector max_shape = x->shape()->max_shape(); + CheckMinMaxShape(shape, &min_shape, &max_shape); + return std::make_shared(x->element(), std::make_shared(shape, min_shape, max_shape)); +} + AbstractBasePtr InferImplScatterUpdate(const AnalysisEnginePtr &, const PrimitivePtr &primitive, const AbstractBasePtrList &args_spec_list) { const std::string op_name = primitive->name(); diff --git a/mindspore/core/abstract/primitive_infer_map.cc b/mindspore/core/abstract/primitive_infer_map.cc index dc3525276ee..ce46a71137f 100644 --- a/mindspore/core/abstract/primitive_infer_map.cc +++ b/mindspore/core/abstract/primitive_infer_map.cc @@ -92,6 +92,7 @@ PrimitiveEvalImplMap &GetPrimitiveToEvalImplMap() { {prim::kPrimUnsortedSegmentMax, {InferImplUnsortedSegmentMax, nullptr, true}}, {prim::kPrimUnsortedSegmentMin, {InferImplUnsortedSegmentMin, nullptr, true}}, {prim::kPrimScatterAdd, {InferImplScatterAdd, nullptr, true}}, + {prim::kPrimScatterSub, {InferImplScatterSub, nullptr, true}}, {prim::kPrimSubAndFilter, {InferImplSubAndFilter, nullptr, true}}, {prim::kPrimScatterUpdate, {InferImplScatterUpdate, nullptr, true}}, {prim::kPrimMapCacheIdx, {InferImplMapCacheIdx, nullptr, true}}, diff --git a/mindspore/core/base/core_ops.h b/mindspore/core/base/core_ops.h index aec1bba1f2d..c8b3252cacc 100644 --- a/mindspore/core/base/core_ops.h +++ b/mindspore/core/base/core_ops.h @@ -214,6 +214,7 @@ inline const PrimitivePtr kPrimDynamicRNNGrad = std::make_shared("Dyn inline const PrimitivePtr kPrimDynamicGRUV2 = std::make_shared("DynamicGRUV2"); inline const PrimitivePtr kPrimDynamicGRUV2Grad = std::make_shared("DynamicGRUV2Grad"); inline const PrimitivePtr kPrimScatterAdd = std::make_shared("ScatterAdd"); +inline const PrimitivePtr kPrimScatterSub = std::make_shared("ScatterSub"); inline const PrimitivePtr kPrimScatterUpdate = std::make_shared("ScatterUpdate"); inline const PrimitivePtr kPrimTensorCopySlices = std::make_shared("TensorCopySlices"); inline const PrimitivePtr kPrimMapUniform = std::make_shared("MapUniform"); diff --git a/mindspore/ops/operations/array_ops.py b/mindspore/ops/operations/array_ops.py index 6ec5981485e..ee1e4e3aba5 100755 --- a/mindspore/ops/operations/array_ops.py +++ b/mindspore/ops/operations/array_ops.py @@ -4129,7 +4129,7 @@ class ScatterAdd(_ScatterOpDynamic): self.add_prim_attr('side_effect_mem', True) -class ScatterSub(_ScatterOp): +class ScatterSub(_ScatterOpDynamic): r""" Updates the value of the input tensor through the subtraction operation. @@ -4230,6 +4230,13 @@ class ScatterSub(_ScatterOp): [-12. -12. -12.]] """ + @prim_attr_register + def __init__(self, use_locking=False): + """Initialize ScatterSub""" + validator.check_value_type('use_locking', use_locking, [bool], self.name) + self.init_prim_io_names(inputs=['x', 'indices', 'updates'], outputs=['y']) + self.add_prim_attr('side_effect_mem', True) + class ScatterMul(_ScatterOp): r""" diff --git a/tests/st/ops/gpu/test_scatter_add_op.py b/tests/st/ops/gpu/test_scatter_add_op.py deleted file mode 100644 index 5041a633f3a..00000000000 --- a/tests/st/ops/gpu/test_scatter_add_op.py +++ /dev/null @@ -1,338 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import numpy as np -import pytest -import mindspore.context as context -import mindspore.nn as nn -from mindspore import Tensor, Parameter -from mindspore.ops import operations as P -from mindspore.ops.operations import _inner_ops as inner - -context.set_context(mode=context.GRAPH_MODE, device_target="GPU") -# all cases tested against dchip - -class TestScatterAddNet(nn.Cell): - def __init__(self, lock, inputx, indices, updates): - super(TestScatterAddNet, self).__init__() - self.scatter_add = P.ScatterAdd(use_locking=lock) - self.inputx = Parameter(inputx, name="inputx") - self.indices = Parameter(indices, name="indices") - self.updates = Parameter(updates, name="updates") - - def construct(self): - out = self.scatter_add(self.inputx, self.indices, self.updates) - return out - -def scatter_add_net(inputx, indices, updates): - lock = True - net = TestScatterAddNet(lock, inputx, indices, updates) - return net() - -def scatter_add_use_locking_false_net(inputx, indices, updates): - lock = False - net = TestScatterAddNet(lock, inputx, indices, updates) - return net() - -class TestScatterAddDynamicNet(nn.Cell): - def __init__(self, inputx, indices, updates): - super(TestScatterAddDynamicNet, self).__init__() - self.scatter_add = P.ScatterAdd() - self.test_dynamic = inner.GpuConvertToDynamicShape() - self.inputx = Parameter(inputx, name="inputx") - self.indices = Parameter(indices, name="indices") - self.updates = Parameter(updates, name="updates") - - def construct(self): - indices = self.test_dynamic(self.indices) - updates = self.test_dynamic(self.updates) - out = self.scatter_add(self.inputx, indices, updates) - return out - -def scatter_add_d_net(inputx, indices, updates): - context.set_context(mode=context.GRAPH_MODE, device_target="GPU") - net = TestScatterAddDynamicNet(inputx, indices, updates) - return net() - -class TestScatterAddDynamicNet2(nn.Cell): - def __init__(self, inputx): - super(TestScatterAddDynamicNet2, self).__init__() - self.scatter_add = P.ScatterAdd() - self.test_dynamic = inner.GpuConvertToDynamicShape() - self.inputx = Parameter(inputx, name="inputx") - - def construct(self, indices, updates): - indices = self.test_dynamic(indices) - updates = self.test_dynamic(updates) - out = self.scatter_add(self.inputx, indices, updates) - return out - -def scatter_add_d2_net(inputx, indices_1, updates_1, - indices_2, updates_2): - context.set_context(mode=context.GRAPH_MODE, device_target="GPU") - net = TestScatterAddDynamicNet2(inputx) - out1 = net(indices_1, updates_1) - out2 = net(indices_2, updates_2) - return (out1, out2) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_add_small_float32(): - inputx = Tensor(np.zeros((2, 3)).astype(np.float32)) - indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32)) - updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32)) - output = scatter_add_net(inputx, indices, updates) - expected = np.array([[6., 8., 10.], - [12., 14., 16.]]) - np.testing.assert_array_almost_equal(output.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_add_input_updated(): - inputx = Tensor(np.zeros((2, 3)).astype(np.float32)) - indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32)) - updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32)) - lock = True - net = TestScatterAddNet(lock, inputx, indices, updates) - net() - expected = np.array([[6., 8., 10.], - [12., 14., 16.]]) - np.testing.assert_array_almost_equal(net.inputx.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_add_large_shape_float32(): - inputx = Tensor(np.ones((4, 2, 3, 4)).astype(np.float32)) - indices = Tensor(np.array([[0, 2], [3, 1]]).astype(np.int32)) - updates = Tensor(np.arange(96).reshape((2, 2, 2, 3, 4)).astype(np.float32)) - output = scatter_add_net(inputx, indices, updates) - expected = np.array([[[[1., 2., 3., 4.], - [5., 6., 7., 8.], - [9., 10., 11., 12.]], - [[13., 14., 15., 16.], - [17., 18., 19., 20.], - [21., 22., 23., 24.]]], - [[[73., 74., 75., 76.], - [77., 78., 79., 80.], - [81., 82., 83., 84.]], - [[85., 86., 87., 88.], - [89., 90., 91., 92.], - [93., 94., 95., 96.]]], - [[[25., 26., 27., 28.], - [29., 30., 31., 32.], - [33., 34., 35., 36.]], - [[37., 38., 39., 40.], - [41., 42., 43., 44.], - [45., 46., 47., 48.]]], - [[[49., 50., 51., 52.], - [53., 54., 55., 56.], - [57., 58., 59., 60.]], - [[61., 62., 63., 64.], - [65., 66., 67., 68.], - [69., 70., 71., 72.]]]]) - np.testing.assert_array_almost_equal(output.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_add_small_float32_use_locking_false(): - inputx = Tensor(np.zeros((2, 3)).astype(np.float32)) - indices = Tensor(np.array([1, 0]).astype(np.int32)) - updates = Tensor(np.arange(6).reshape((2, 3)).astype(np.float32)) - output = scatter_add_use_locking_false_net(inputx, indices, updates) - expected = np.array([[3., 4., 5.], - [0., 1., 2.]]) - np.testing.assert_array_almost_equal(output.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_add_input_less_than_1_float32(): - inputx = Tensor(np.array([[0.214141, 0.415151, 0.51516], - [0.876542, 0.451611, 0.55112], - [0.111244, 0.633333, 0.34444]]).astype(np.float32)) - indices = Tensor(np.array([[[1, 0, 2], - [2, 2, 0]], - [[1, 0, 1], - [2, 1, 2]]]).astype(np.int32)) - updates = Tensor(np.arange(34, 70).reshape((2, 2, 3, 3)).astype(np.float32)) - output = scatter_add_net(inputx, indices, updates) - expected = np.array([[141.21414, 144.41515, 147.51517], - [208.87654, 212.45161, 216.55112], - [257.11124, 262.63333, 267.34442]], dtype=np.float32) - np.testing.assert_array_almost_equal(output.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_add_float16(): - inputx = Tensor(np.zeros((2, 3)).astype(np.float16)) - indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32)) - updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float16)) - output = scatter_add_net(inputx, indices, updates) - expected = np.array([[6., 8., 10.], - [12., 14., 16.]]) - np.testing.assert_array_almost_equal(output.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_add_large_float16(): - inputx = Tensor(np.zeros((2, 3, 4)).astype(np.float16)) - indices = Tensor(np.array([[0, 0], [1, 1]]).astype(np.int32)) - updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.float16)) - output = scatter_add_net(inputx, indices, updates) - expected = np.array([[[138., 140., 142., 144.], - [146., 148., 150., 152.], - [154., 156., 158., 160.]], - [[186., 188., 190., 192.], - [194., 196., 198., 200.], - [202., 204., 206., 208.]]]) - np.testing.assert_array_almost_equal(output.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_add_disordered_float16(): - inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.float16))) - indices = Tensor(np.array([[[0, 1, 2], - [2, 1, 0]], - [[0, 0, 0], - [2, 2, 2]]]).astype(np.int32)) - updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.float16)) - output = scatter_add_net(inputx, indices, updates) - expected = np.array([[464., 468., 472., 476.], - [187., 188., 189., 190.], - [492., 496., 500., 504.]]) - np.testing.assert_array_almost_equal(output.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_add_large_int32(): - inputx = Tensor(np.zeros((2, 3, 4)).astype(np.int32)) - indices = Tensor(np.array([[0, 0], [1, 1]]).astype(np.int32)) - updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.int32)) - output = scatter_add_net(inputx, indices, updates) - expected = np.array([[[138., 140., 142., 144.], - [146., 148., 150., 152.], - [154., 156., 158., 160.]], - [[186., 188., 190., 192.], - [194., 196., 198., 200.], - [202., 204., 206., 208.]]]).astype(np.int32) - np.testing.assert_array_almost_equal(output.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_add_disordered_int32(): - inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int32))) - indices = Tensor(np.array([[[0, 1, 2], - [2, 1, 0]], - [[0, 0, 0], - [2, 2, 2]]]).astype(np.int32)) - updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.int32)) - output = scatter_add_net(inputx, indices, updates) - expected = np.array([[464., 468., 472., 476.], - [187., 188., 189., 190.], - [492., 496., 500., 504.]]).astype(np.int32) - np.testing.assert_array_almost_equal(output.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_add_disordered_dynamic_int32(): - inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int32))) - indices = Tensor(np.array([[[0, 1, 2], - [2, 1, 0]], - [[0, 0, 0], - [2, 2, 2]]]).astype(np.int32)) - updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.int32)) - output = scatter_add_d_net(inputx, indices, updates) - expected = np.array([[464., 468., 472., 476.], - [187., 188., 189., 190.], - [492., 496., 500., 504.]]).astype(np.int32) - np.testing.assert_array_almost_equal(output.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_add_disordered_dynamic_int8(): - inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int8))) - indices = Tensor(np.array([[[0, 1, 2], - [2, 1, 0]], - [[0, 0, 0], - [2, 2, 2]]]).astype(np.int32)) - updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.int8)) - output = scatter_add_d_net(inputx, indices, updates) - expected = np.array([[464., 468., 472., 476.], - [187., 188., 189., 190.], - [492., 496., 500., 504.]]).astype(np.int8) - np.testing.assert_array_almost_equal(output.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_add_disordered_dynamic_uint8(): - inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.uint8))) - indices = Tensor(np.array([[[0, 1, 2], - [2, 1, 0]], - [[0, 0, 0], - [2, 2, 2]]]).astype(np.int32)) - updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.uint8)) - output = scatter_add_d_net(inputx, indices, updates) - expected = np.array([[464., 468., 472., 476.], - [187., 188., 189., 190.], - [492., 496., 500., 504.]]).astype(np.uint8) - np.testing.assert_array_almost_equal(output.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_add_input_less_than_1_dynamic_float32(): - inputx = Tensor(np.array([[0.214141, 0.415151, 0.51516], - [0.876542, 0.451611, 0.55112], - [0.111244, 0.633333, 0.34444]]).astype(np.float32)) - indices = Tensor(np.array([[[1, 0, 2], - [2, 2, 0]], - [[1, 0, 1], - [2, 1, 2]]]).astype(np.int32)) - updates = Tensor(np.arange(34, 70).reshape((2, 2, 3, 3)).astype(np.float32)) - output = scatter_add_d_net(inputx, indices, updates) - expected = np.array([[141.21414, 144.41515, 147.51517], - [208.87654, 212.45161, 216.55112], - [257.11124, 262.63333, 267.34442]], dtype=np.float32) - np.testing.assert_array_almost_equal(output.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_add_dynamic_two_inputs(): - inputx = Tensor(np.zeros((2, 3)).astype(np.float32)) - indices_1 = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32)) - updates_1 = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32)) - indices_2 = Tensor(np.array([[0, 0], [1, 1], [1, 0]]).astype(np.int32)) - updates_2 = Tensor(np.flip(np.arange(18).reshape((3, 2, 3)).astype(np.float32))) - output_1, output_2 = scatter_add_d2_net(inputx, indices_1, updates_1, - indices_2, updates_2) - expected_1 = np.array([[6., 8., 10.], - [12., 14., 16.]]) - expected_2 = np.array([[39., 38., 37.], - [36., 35., 34.]]) - np.testing.assert_array_almost_equal(output_1.asnumpy(), expected_1) - np.testing.assert_array_almost_equal(output_2.asnumpy(), expected_2) diff --git a/tests/st/ops/gpu/test_scatter_func_op.py b/tests/st/ops/gpu/test_scatter_func_op.py new file mode 100644 index 00000000000..7cdddbfedf9 --- /dev/null +++ b/tests/st/ops/gpu/test_scatter_func_op.py @@ -0,0 +1,720 @@ +# Copyright 2020-2021 Huawei Technologies Co., Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================ + +import numpy as np +import pytest +import mindspore.context as context +import mindspore.nn as nn +from mindspore import Tensor, Parameter +from mindspore.ops import operations as P +from mindspore.ops.operations import _inner_ops as inner + +context.set_context(mode=context.GRAPH_MODE, device_target="GPU") +# all cases tested against dchip + +func_map = { + "update": P.ScatterUpdate, + "add": P.ScatterAdd, + "sub": P.ScatterSub, +} + + +class TestScatterFuncNet(nn.Cell): + def __init__(self, func, lock, inputx, indices, updates): + super(TestScatterFuncNet, self).__init__() + + self.scatter_func = func_map[func](use_locking=lock) + self.inputx = Parameter(inputx, name="inputx") + self.indices = Parameter(indices, name="indices") + self.updates = Parameter(updates, name="updates") + + def construct(self): + out = self.scatter_func(self.inputx, self.indices, self.updates) + return out + + +def scatter_func_net(func, inputx, indices, updates): + lock = True + net = TestScatterFuncNet(func, lock, inputx, indices, updates) + return net() + + +def scatter_func_use_locking_false_net(func, inputx, indices, updates): + lock = False + net = TestScatterFuncNet(func, lock, inputx, indices, updates) + return net() + + +class TestScatterFuncDynamicNet(nn.Cell): + def __init__(self, func, inputx, indices, updates): + super(TestScatterFuncDynamicNet, self).__init__() + self.scatter_func = func_map[func]() + self.test_dynamic = inner.GpuConvertToDynamicShape() + self.inputx = Parameter(inputx, name="inputx") + self.indices = Parameter(indices, name="indices") + self.updates = Parameter(updates, name="updates") + + def construct(self): + indices = self.test_dynamic(self.indices) + updates = self.test_dynamic(self.updates) + out = self.scatter_func(self.inputx, indices, updates) + return out + + +def scatter_func_d_net(func, inputx, indices, updates): + context.set_context(mode=context.GRAPH_MODE, device_target="GPU") + net = TestScatterFuncDynamicNet(func, inputx, indices, updates) + return net() + + +class TestScatterFuncDynamicNet2(nn.Cell): + def __init__(self, func, inputx): + super(TestScatterFuncDynamicNet2, self).__init__() + self.scatter_func = func_map[func]() + self.test_dynamic = inner.GpuConvertToDynamicShape() + self.inputx = Parameter(inputx, name="inputx") + + def construct(self, indices, updates): + indices = self.test_dynamic(indices) + updates = self.test_dynamic(updates) + out = self.scatter_func(self.inputx, indices, updates) + return out + + +def scatter_func_d2_net(func, inputx, indices_1, updates_1, indices_2, updates_2): + context.set_context(mode=context.GRAPH_MODE, device_target="GPU") + net = TestScatterFuncDynamicNet2(func, inputx) + out1 = net(indices_1, updates_1) + out2 = net(indices_2, updates_2) + return (out1, out2) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_scatter_func_small_float32(): + inputx = Tensor(np.zeros((2, 3)).astype(np.float32)) + indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32)) + updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32)) + + # update + output = scatter_func_net("update", inputx, indices, updates) + expected = np.array([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # add + output = scatter_func_net("add", inputx, indices, updates) + expected = np.array([[6.0, 8.0, 10.0], [12.0, 14.0, 16.0]]) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # sub + output = scatter_func_net("sub", inputx, indices, updates) + expected = np.array([[-6.0, -8.0, -10.0], [-12.0, -14.0, -16.0]]) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_scatter_func_input_updated(): + inputx = Tensor(np.zeros((2, 3)).astype(np.float32)) + indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32)) + updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32)) + lock = True + + # update + net = TestScatterFuncNet("update", lock, inputx, indices, updates) + net() + expected = np.array([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]) + np.testing.assert_array_almost_equal(net.inputx.asnumpy(), expected) + + # add + net = TestScatterFuncNet("add", lock, inputx, indices, updates) + net() + expected = np.array([[6.0, 8.0, 10.0], [12.0, 14.0, 16.0]]) + np.testing.assert_array_almost_equal(net.inputx.asnumpy(), expected) + + # sub + net = TestScatterFuncNet("sub", lock, inputx, indices, updates) + net() + expected = np.array([[-6.0, -8.0, -10.0], [-12.0, -14.0, -16.0]]) + np.testing.assert_array_almost_equal(net.inputx.asnumpy(), expected) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_scatter_func_large_shape_float32(): + inputx = Tensor(np.ones((4, 2, 3, 4)).astype(np.float32)) + indices = Tensor(np.array([[0, 2], [3, 1]]).astype(np.int32)) + updates = Tensor(np.arange(96).reshape((2, 2, 2, 3, 4)).astype(np.float32)) + + # update + output = scatter_func_net("update", inputx, indices, updates) + expected = np.array( + [ + [ + [[0.0, 1.0, 2.0, 3.0], [4.0, 5.0, 6.0, 7.0], [8.0, 9.0, 10.0, 11.0]], + [[12.0, 13.0, 14.0, 15.0], [16.0, 17.0, 18.0, 19.0], [20.0, 21.0, 22.0, 23.0]], + ], + [ + [[72.0, 73.0, 74.0, 75.0], [76.0, 77.0, 78.0, 79.0], [80.0, 81.0, 82.0, 83.0]], + [[84.0, 85.0, 86.0, 87.0], [88.0, 89.0, 90.0, 91.0], [92.0, 93.0, 94.0, 95.0]], + ], + [ + [[24.0, 25.0, 26.0, 27.0], [28.0, 29.0, 30.0, 31.0], [32.0, 33.0, 34.0, 35.0]], + [[36.0, 37.0, 38.0, 39.0], [40.0, 41.0, 42.0, 43.0], [44.0, 45.0, 46.0, 47.0]], + ], + [ + [[48.0, 49.0, 50.0, 51.0], [52.0, 53.0, 54.0, 55.0], [56.0, 57.0, 58.0, 59.0]], + [[60.0, 61.0, 62.0, 63.0], [64.0, 65.0, 66.0, 67.0], [68.0, 69.0, 70.0, 71.0]], + ], + ] + ) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # add + output = scatter_func_net("add", inputx, indices, updates) + expected = np.array( + [ + [ + [[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0]], + [[13.0, 14.0, 15.0, 16.0], [17.0, 18.0, 19.0, 20.0], [21.0, 22.0, 23.0, 24.0]], + ], + [ + [[73.0, 74.0, 75.0, 76.0], [77.0, 78.0, 79.0, 80.0], [81.0, 82.0, 83.0, 84.0]], + [[85.0, 86.0, 87.0, 88.0], [89.0, 90.0, 91.0, 92.0], [93.0, 94.0, 95.0, 96.0]], + ], + [ + [[25.0, 26.0, 27.0, 28.0], [29.0, 30.0, 31.0, 32.0], [33.0, 34.0, 35.0, 36.0]], + [[37.0, 38.0, 39.0, 40.0], [41.0, 42.0, 43.0, 44.0], [45.0, 46.0, 47.0, 48.0]], + ], + [ + [[49.0, 50.0, 51.0, 52.0], [53.0, 54.0, 55.0, 56.0], [57.0, 58.0, 59.0, 60.0]], + [[61.0, 62.0, 63.0, 64.0], [65.0, 66.0, 67.0, 68.0], [69.0, 70.0, 71.0, 72.0]], + ], + ] + ) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # sub + output = scatter_func_net("sub", inputx, indices, updates) + expected = np.array( + [ + [ + [[1.0, 0.0, -1.0, -2.0], [-3.0, -4.0, -5.0, -6.0], [-7.0, -8.0, -9.0, -10.0]], + [ + [-11.0, -12.0, -13.0, -14.0], + [-15.0, -16.0, -17.0, -18.0], + [-19.0, -20.0, -21.0, -22.0], + ], + ], + [ + [ + [-71.0, -72.0, -73.0, -74.0], + [-75.0, -76.0, -77.0, -78.0], + [-79.0, -80.0, -81.0, -82.0], + ], + [ + [-83.0, -84.0, -85.0, -86.0], + [-87.0, -88.0, -89.0, -90.0], + [-91.0, -92.0, -93.0, -94.0], + ], + ], + [ + [ + [-23.0, -24.0, -25.0, -26.0], + [-27.0, -28.0, -29.0, -30.0], + [-31.0, -32.0, -33.0, -34.0], + ], + [ + [-35.0, -36.0, -37.0, -38.0], + [-39.0, -40.0, -41.0, -42.0], + [-43.0, -44.0, -45.0, -46.0], + ], + ], + [ + [ + [-47.0, -48.0, -49.0, -50.0], + [-51.0, -52.0, -53.0, -54.0], + [-55.0, -56.0, -57.0, -58.0], + ], + [ + [-59.0, -60.0, -61.0, -62.0], + [-63.0, -64.0, -65.0, -66.0], + [-67.0, -68.0, -69.0, -70.0], + ], + ], + ] + ) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_scatter_func_small_float32_use_locking_false(): + inputx = Tensor(np.zeros((2, 3)).astype(np.float32)) + indices = Tensor(np.array([1, 0]).astype(np.int32)) + updates = Tensor(np.arange(6).reshape((2, 3)).astype(np.float32)) + + # update + output = scatter_func_use_locking_false_net("update", inputx, indices, updates) + expected = np.array([[3.0, 4.0, 5.0], [0.0, 1.0, 2.0]]) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # add + output = scatter_func_use_locking_false_net("add", inputx, indices, updates) + expected = np.array([[3.0, 4.0, 5.0], [0.0, 1.0, 2.0]]) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # sub + output = scatter_func_use_locking_false_net("sub", inputx, indices, updates) + expected = np.array([[-3.0, -4.0, -5.0], [0.0, -1.0, -2.0]]) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_scatter_func_input_less_than_1_float32(): + inputx = Tensor( + np.array( + [ + [0.214141, 0.415151, 0.51516], + [0.876542, 0.451611, 0.55112], + [0.111244, 0.633333, 0.34444], + ] + ).astype(np.float32) + ) + indices = Tensor(np.array([[[1, 0, 2], [2, 2, 0]], [[1, 0, 1], [2, 1, 2]]]).astype(np.int32)) + updates = Tensor(np.arange(34, 70).reshape((2, 2, 3, 3)).astype(np.float32)) + + # update + output = scatter_func_net("update", inputx, indices, updates) + expected = np.array( + [[37.0, 38.0, 39.0], [34.0, 35.0, 66.0], [67.0, 68.0, 69.0],], dtype=np.float32, + ) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # add + output = scatter_func_net("add", inputx, indices, updates) + expected = np.array( + [ + [141.21414, 144.41515, 147.51517], + [208.87654, 212.45161, 216.55112], + [257.11124, 262.63333, 267.34442], + ], + dtype=np.float32, + ) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # sub + output = scatter_func_net("sub", inputx, indices, updates) + expected = np.array( + [ + [-140.78586, -143.58485, -146.48483], + [-207.12346, -211.54839, -215.44888], + [-256.88876, -261.36667, -266.65558], + ], + dtype=np.float32, + ) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_scatter_func_float16(): + inputx = Tensor(np.zeros((2, 3)).astype(np.float16)) + indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32)) + updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float16)) + + # update + output = scatter_func_net("update", inputx, indices, updates) + expected = np.array([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # add + output = scatter_func_net("add", inputx, indices, updates) + expected = np.array([[6.0, 8.0, 10.0], [12.0, 14.0, 16.0]]) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # sub + output = scatter_func_net("sub", inputx, indices, updates) + expected = np.array([[-6.0, -8.0, -10.0], [-12.0, -14.0, -16.0]]) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_scatter_func_large_float16(): + inputx = Tensor(np.zeros((2, 3, 4)).astype(np.float16)) + indices = Tensor(np.array([[0, 0], [1, 1]]).astype(np.int32)) + updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.float16)) + + # update + output = scatter_func_net("update", inputx, indices, updates) + expected = np.array( + [ + [[63.0, 64.0, 65.0, 66.0], [67.0, 68.0, 69.0, 70.0], [71.0, 72.0, 73.0, 74.0],], + [[99.0, 100.0, 101.0, 102.0], [103.0, 104.0, 105.0, 106.0], [95.0, 96.0, 97.0, 98.0],], + ] + ) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # add + output = scatter_func_net("add", inputx, indices, updates) + expected = np.array( + [ + [ + [138.0, 140.0, 142.0, 144.0], + [146.0, 148.0, 150.0, 152.0], + [154.0, 156.0, 158.0, 160.0], + ], + [ + [186.0, 188.0, 190.0, 192.0], + [194.0, 196.0, 198.0, 200.0], + [202.0, 204.0, 206.0, 208.0], + ], + ] + ) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # sub + output = scatter_func_net("sub", inputx, indices, updates) + expected = np.array( + [ + [ + [-138.0, -140.0, -142.0, -144.0], + [-146.0, -148.0, -150.0, -152.0], + [-154.0, -156.0, -158.0, -160.0], + ], + [ + [-186.0, -188.0, -190.0, -192.0], + [-194.0, -196.0, -198.0, -200.0], + [-202.0, -204.0, -206.0, -208.0], + ], + ] + ) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_scatter_func_disordered_float16(): + inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.float16))) + indices = Tensor(np.array([[[0, 1, 2], [2, 1, 0]], [[0, 0, 0], [2, 2, 2]]]).astype(np.int32)) + updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.float16)) + + # update + output = scatter_func_net("update", inputx, indices, updates) + expected = np.array( + [[95.0, 96.0, 97.0, 98.0], [67.0, 68.0, 69.0, 70.0], [99.0, 100.0, 101.0, 102.0]] + ) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # add + output = scatter_func_net("add", inputx, indices, updates) + expected = np.array( + [[464.0, 468.0, 472.0, 476.0], [187.0, 188.0, 189.0, 190.0], [492.0, 496.0, 500.0, 504.0]] + ) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # sub + output = scatter_func_net("sub", inputx, indices, updates) + expected = np.array( + [ + [-374.0, -380.0, -386.0, -392.0], + [-105.0, -108.0, -111.0, -114.0], + [-418.0, -424.0, -430.0, -436.0], + ] + ) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_scatter_func_large_int32(): + inputx = Tensor(np.zeros((2, 3, 4)).astype(np.int32)) + indices = Tensor(np.array([[0, 0], [1, 1]]).astype(np.int32)) + updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.int32)) + + # update + output = scatter_func_net("update", inputx, indices, updates) + expected = np.array( + [ + [[63.0, 64.0, 65.0, 66.0], [67.0, 68.0, 69.0, 70.0], [71.0, 72.0, 73.0, 74.0],], + [[99.0, 100.0, 101.0, 102.0], [103.0, 104.0, 105.0, 106.0], [95.0, 96.0, 97.0, 98.0],], + ] + ).astype(np.int32) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # add + output = scatter_func_net("add", inputx, indices, updates) + expected = np.array( + [ + [ + [138.0, 140.0, 142.0, 144.0], + [146.0, 148.0, 150.0, 152.0], + [154.0, 156.0, 158.0, 160.0], + ], + [ + [186.0, 188.0, 190.0, 192.0], + [194.0, 196.0, 198.0, 200.0], + [202.0, 204.0, 206.0, 208.0], + ], + ] + ).astype(np.int32) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # sub + output = scatter_func_net("sub", inputx, indices, updates) + expected = np.array( + [ + [ + [-138.0, -140.0, -142.0, -144.0], + [-146.0, -148.0, -150.0, -152.0], + [-154.0, -156.0, -158.0, -160.0], + ], + [ + [-186.0, -188.0, -190.0, -192.0], + [-194.0, -196.0, -198.0, -200.0], + [-202.0, -204.0, -206.0, -208.0], + ], + ] + ).astype(np.int32) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_scatter_func_disordered_int32(): + inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int32))) + indices = Tensor(np.array([[[0, 1, 2], [2, 1, 0]], [[0, 0, 0], [2, 2, 2]]]).astype(np.int32)) + updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.int32)) + + # update + output = scatter_func_net("update", inputx, indices, updates) + expected = np.array( + [[95.0, 96.0, 97.0, 98.0], [67.0, 68.0, 69.0, 70.0], [99.0, 100.0, 101.0, 102.0]] + ).astype(np.int32) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # add + output = scatter_func_net("add", inputx, indices, updates) + expected = np.array( + [[464.0, 468.0, 472.0, 476.0], [187.0, 188.0, 189.0, 190.0], [492.0, 496.0, 500.0, 504.0]] + ).astype(np.int32) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # sub + output = scatter_func_net("sub", inputx, indices, updates) + expected = np.array( + [ + [-374.0, -380.0, -386.0, -392.0], + [-105.0, -108.0, -111.0, -114.0], + [-418.0, -424.0, -430.0, -436.0], + ] + ).astype(np.int32) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_scatter_func_disordered_dynamic_int32(): + inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int32))) + indices = Tensor(np.array([[[0, 1, 2], [2, 1, 0]], [[0, 0, 0], [2, 2, 2]]]).astype(np.int32)) + updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.int32)) + + # update + output = scatter_func_d_net("update", inputx, indices, updates) + expected = np.array( + [[95.0, 96.0, 97.0, 98.0], [67.0, 68.0, 69.0, 70.0], [99.0, 100.0, 101.0, 102.0]] + ).astype(np.int32) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # add + output = scatter_func_d_net("add", inputx, indices, updates) + expected = np.array( + [[464.0, 468.0, 472.0, 476.0], [187.0, 188.0, 189.0, 190.0], [492.0, 496.0, 500.0, 504.0]] + ).astype(np.int32) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # sub + output = scatter_func_d_net("sub", inputx, indices, updates) + expected = np.array( + [ + [-374.0, -380.0, -386.0, -392.0], + [-105.0, -108.0, -111.0, -114.0], + [-418.0, -424.0, -430.0, -436.0], + ] + ).astype(np.int32) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_scatter_func_disordered_dynamic_int8(): + inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int8))) + indices = Tensor(np.array([[[0, 1, 2], [2, 1, 0]], [[0, 0, 0], [2, 2, 2]]]).astype(np.int32)) + updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.int8)) + + # update + output = scatter_func_d_net("update", inputx, indices, updates) + expected = np.array( + [[95.0, 96.0, 97.0, 98.0], [67.0, 68.0, 69.0, 70.0], [99.0, 100.0, 101.0, 102.0]] + ).astype(np.int8) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # add + output = scatter_func_d_net("add", inputx, indices, updates) + expected = np.array( + [[464.0, 468.0, 472.0, 476.0], [187.0, 188.0, 189.0, 190.0], [492.0, 496.0, 500.0, 504.0]] + ).astype(np.int8) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # sub + output = scatter_func_d_net("sub", inputx, indices, updates) + expected = np.array( + [ + [-118.0, -124.0, 126.0, 120.0], + [-105.0, -108.0, -111.0, -114.0], + [94.0, 88.0, 82.0, 76.0], + ] + ).astype(np.int8) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_scatter_func_disordered_dynamic_uint8(): + inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.uint8))) + indices = Tensor(np.array([[[0, 1, 2], [2, 1, 0]], [[0, 0, 0], [2, 2, 2]]]).astype(np.int32)) + updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.uint8)) + + # update + output = scatter_func_d_net("update", inputx, indices, updates) + expected = np.array( + [[95.0, 96.0, 97.0, 98.0], [67.0, 68.0, 69.0, 70.0], [99.0, 100.0, 101.0, 102.0]] + ).astype(np.uint8) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # add + output = scatter_func_d_net("add", inputx, indices, updates) + expected = np.array( + [[464.0, 468.0, 472.0, 476.0], [187.0, 188.0, 189.0, 190.0], [492.0, 496.0, 500.0, 504.0]] + ).astype(np.uint8) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # sub + output = scatter_func_d_net("sub", inputx, indices, updates) + expected = np.array( + [[138.0, 132.0, 126.0, 120.0], [151.0, 148.0, 145.0, 142.0], [94.0, 88.0, 82.0, 76.0]] + ).astype(np.uint8) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_scatter_func_input_less_than_1_dynamic_float32(): + inputx = Tensor( + np.array( + [ + [0.214141, 0.415151, 0.51516], + [0.876542, 0.451611, 0.55112], + [0.111244, 0.633333, 0.34444], + ] + ).astype(np.float32) + ) + indices = Tensor(np.array([[[1, 0, 2], [2, 2, 0]], [[1, 0, 1], [2, 1, 2]]]).astype(np.int32)) + updates = Tensor(np.arange(34, 70).reshape((2, 2, 3, 3)).astype(np.float32)) + + # update + output = scatter_func_d_net("update", inputx, indices, updates) + expected = np.array( + [[37.0, 38.0, 39.0], [34.0, 35.0, 66.0], [67.0, 68.0, 69.0],], dtype=np.float32, + ) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # add + output = scatter_func_d_net("add", inputx, indices, updates) + expected = np.array( + [ + [141.21414, 144.41515, 147.51517], + [208.87654, 212.45161, 216.55112], + [257.11124, 262.63333, 267.34442], + ], + dtype=np.float32, + ) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + # sub + output = scatter_func_d_net("sub", inputx, indices, updates) + expected = np.array( + [ + [-140.78586, -143.58485, -146.48483], + [-207.12346, -211.54839, -215.44888], + [-256.88876, -261.36667, -266.65558], + ], + dtype=np.float32, + ) + np.testing.assert_array_almost_equal(output.asnumpy(), expected) + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_scatter_func_dynamic_two_inputs(): + inputx = Tensor(np.zeros((2, 3)).astype(np.float32)) + indices_1 = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32)) + updates_1 = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32)) + indices_2 = Tensor(np.array([[0, 0], [1, 1], [1, 0]]).astype(np.int32)) + updates_2 = Tensor(np.flip(np.arange(18).reshape((3, 2, 3)).astype(np.float32))) + + # update + output_1, output_2 = scatter_func_d2_net( + "update", inputx, indices_1, updates_1, indices_2, updates_2 + ) + expected_1 = np.array([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]]) + expected_2 = np.array([[17.0, 16.0, 15.0], [11.0, 10.0, 9.0]]) + np.testing.assert_array_almost_equal(output_1.asnumpy(), expected_1) + np.testing.assert_array_almost_equal(output_2.asnumpy(), expected_2) + + # add + output_1, output_2 = scatter_func_d2_net( + "add", inputx, indices_1, updates_1, indices_2, updates_2 + ) + expected_1 = np.array([[6.0, 8.0, 10.0], [12.0, 14.0, 16.0]]) + expected_2 = np.array([[39.0, 38.0, 37.0], [36.0, 35.0, 34.0]]) + np.testing.assert_array_almost_equal(output_1.asnumpy(), expected_1) + np.testing.assert_array_almost_equal(output_2.asnumpy(), expected_2) + + # sub + output_1, output_2 = scatter_func_d2_net( + "sub", inputx, indices_1, updates_1, indices_2, updates_2 + ) + expected_1 = np.array([[-6.0, -8.0, -10.0], [-12.0, -14.0, -16.0]]) + expected_2 = np.array([[-39.0, -38.0, -37.0], [-36.0, -35.0, -34.0]]) + np.testing.assert_array_almost_equal(output_1.asnumpy(), expected_1) + np.testing.assert_array_almost_equal(output_2.asnumpy(), expected_2) diff --git a/tests/st/ops/gpu/test_scatter_update_op.py b/tests/st/ops/gpu/test_scatter_update_op.py deleted file mode 100644 index 3c04bb815f1..00000000000 --- a/tests/st/ops/gpu/test_scatter_update_op.py +++ /dev/null @@ -1,359 +0,0 @@ -# Copyright 2020 Huawei Technologies Co., Ltd -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================ - -import numpy as np -import pytest -import mindspore.context as context -import mindspore.nn as nn -from mindspore import Tensor, Parameter -from mindspore.ops import operations as P -from mindspore.ops.operations import _inner_ops as inner - -context.set_context(mode=context.GRAPH_MODE, device_target="GPU") -# all cases tested against dchip - -class TestScatterUpdateNet(nn.Cell): - def __init__(self, inputx, indices, updates): - super(TestScatterUpdateNet, self).__init__() - self.scatter_update = P.ScatterUpdate() - self.inputx = Parameter(inputx, name="inputx") - self.indices = Parameter(indices, name="indices") - self.updates = Parameter(updates, name="updates") - - def construct(self): - out = self.scatter_update(self.inputx, self.indices, self.updates) - return out - -def scatter_update_net(inputx, indices, updates): - net = TestScatterUpdateNet(inputx, indices, updates) - return net() - -class TestScatterUpdateDynamicNet(nn.Cell): - def __init__(self, inputx, indices, updates): - super(TestScatterUpdateDynamicNet, self).__init__() - self.scatter_update = P.ScatterUpdate() - self.test_dynamic = inner.GpuConvertToDynamicShape() - self.inputx = Parameter(inputx, name="inputx") - self.indices = Parameter(indices, name="indices") - self.updates = Parameter(updates, name="updates") - - def construct(self): - indices = self.test_dynamic(self.indices) - updates = self.test_dynamic(self.updates) - out = self.scatter_update(self.inputx, indices, updates) - return out - -def scatter_update_d_net(inputx, indices, updates): - context.set_context(mode=context.GRAPH_MODE, device_target="GPU") - net = TestScatterUpdateDynamicNet(inputx, indices, updates) - return net() - -class TestScatterUpdateDynamicNet2(nn.Cell): - def __init__(self, inputx): - super(TestScatterUpdateDynamicNet2, self).__init__() - self.scatter_update = P.ScatterUpdate() - self.test_dynamic = inner.GpuConvertToDynamicShape() - self.inputx = Parameter(inputx, name="inputx") - - def construct(self, indices, updates): - indices = self.test_dynamic(indices) - updates = self.test_dynamic(updates) - out = self.scatter_update(self.inputx, indices, updates) - return out - -def scatter_update_d2_net(inputx, indices_1, updates_1, - indices_2, updates_2): - context.set_context(mode=context.GRAPH_MODE, device_target="GPU") - net = TestScatterUpdateDynamicNet2(inputx) - out1 = net(indices_1, updates_1) - out2 = net(indices_2, updates_2) - return (out1, out2) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_update_small_float32(): - inputx = Tensor(np.zeros((2, 3)).astype(np.float32)) - indices = Tensor(np.array([0, 1]).astype(np.int32)) - updates = Tensor(np.arange(6).reshape((2, 3)).astype(np.float32)) - output = scatter_update_net(inputx, indices, updates) - expected = np.array([[0., 1., 2.], - [3., 4., 5.]]) - np.testing.assert_array_almost_equal(output.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_update_input_updated(): - inputx = Tensor(np.zeros((2, 3)).astype(np.float32)) - indices = Tensor(np.array([0, 1]).astype(np.int32)) - updates = Tensor(np.arange(6).reshape((2, 3)).astype(np.float32)) - net = TestScatterUpdateNet(inputx, indices, updates) - net() - expected = np.array([[0., 1., 2.], - [3., 4., 5.]]) - np.testing.assert_array_almost_equal(net.inputx.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_update_input_less_than_1_float32(): - inputx = Tensor(np.array([[0.214141, 0.415151, 0.51516], - [0.876542, 0.451611, 0.55112], - [0.111244, 0.633333, 0.34444]]).astype(np.float32)) - indices = Tensor(np.array([1, 0, 2]).astype(np.int32)) - updates = Tensor(np.arange(34, 43).reshape((3, 3)).astype(np.float32)) - output = scatter_update_net(inputx, indices, updates) - expected = np.array([[37., 38., 39.], - [34., 35., 36.], - [40., 41., 42.]], dtype=np.float32) - np.testing.assert_array_almost_equal(output.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_update_float16(): - inputx = Tensor(np.zeros((2, 3)).astype(np.float16)) - indices = Tensor(np.array([0, 1]).astype(np.int32)) - updates = Tensor(np.arange(6).reshape((2, 3)).astype(np.float16)) - output = scatter_update_net(inputx, indices, updates) - expected = np.array([[0., 1., 2.], - [3., 4., 5.]]).astype(np.float16) - np.testing.assert_array_almost_equal(output.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_update_int32(): - inputx = Tensor(np.zeros((2, 3)).astype(np.int32)) - indices = Tensor(np.array([0, 1]).astype(np.int32)) - updates = Tensor(np.arange(6).reshape((2, 3)).astype(np.int32)) - output = scatter_update_net(inputx, indices, updates) - expected = np.array([[0., 1., 2.], - [3., 4., 5.]]).astype(np.int32) - np.testing.assert_array_almost_equal(output.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_update_large_float16(): - inputx = Tensor(np.zeros((4, 3)).astype(np.float16)) - indices = Tensor(np.array([[2, 1], [0, 3]]).astype(np.int32)) - updates = Tensor(np.arange(63, 75).reshape((2, 2, 3)).astype(np.float16)) - output = scatter_update_net(inputx, indices, updates) - expected = np.array([[69., 70., 71.], - [66., 67., 68.], - [63., 64., 65.], - [72., 73., 74.]]).astype(np.float16) - np.testing.assert_array_almost_equal(output.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_update_disordered_float16(): - inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.float16))) - indices = Tensor(np.array([1, 2]).astype(np.int32)) - updates = Tensor(np.arange(63, 71).reshape((2, 4)).astype(np.float16)) - output = scatter_update_net(inputx, indices, updates) - expected = np.array([[45., 44., 43., 42.], - [63., 64., 65., 66.], - [67., 68., 69., 70.]]).astype(np.float16) - np.testing.assert_array_almost_equal(output.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_update_disordered_int32(): - inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int32))) - indices = Tensor(np.array([1, 2]).astype(np.int32)) - updates = Tensor(np.arange(63, 71).reshape((2, 4)).astype(np.int32)) - output = scatter_update_net(inputx, indices, updates) - expected = np.array([[45., 44., 43., 42.], - [63., 64., 65., 66.], - [67., 68., 69., 70.]]).astype(np.int32) - np.testing.assert_array_almost_equal(output.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_update_large_shape_float16(): - inputx = Tensor(np.arange(96).reshape((4, 2, 3, 4)).astype(np.float16)) - indices = Tensor(np.array([1, 0]).astype(np.int32)) - updates = Tensor(np.flip(np.arange(48).reshape((2, 2, 3, 4)).astype(np.float16))) - output = scatter_update_net(inputx, indices, updates) - expected = np.array([[[[23., 22., 21., 20.], - [19., 18., 17., 16.], - [15., 14., 13., 12.]], - [[11., 10., 9., 8.], - [7., 6., 5., 4.], - [3., 2., 1., 0.]]], - [[[47., 46., 45., 44.], - [43., 42., 41., 40.], - [39., 38., 37., 36.]], - [[35., 34., 33., 32.], - [31., 30., 29., 28.], - [27., 26., 25., 24.]]], - [[[48., 49., 50., 51.], - [52., 53., 54., 55.], - [56., 57., 58., 59.]], - [[60., 61., 62., 63.], - [64., 65., 66., 67.], - [68., 69., 70., 71.]]], - [[[72., 73., 74., 75.], - [76., 77., 78., 79.], - [80., 81., 82., 83.]], - [[84., 85., 86., 87.], - [88., 89., 90., 91.], - [92., 93., 94., 95.]]]]).astype(np.float16) - np.testing.assert_array_almost_equal(output.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_update_disordered_int8(): - inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int8))) - indices = Tensor(np.array([1, 2]).astype(np.int32)) - updates = Tensor(np.arange(63, 71).reshape((2, 4)).astype(np.int8)) - output = scatter_update_net(inputx, indices, updates) - expected = np.array([[45., 44., 43., 42.], - [63., 64., 65., 66.], - [67., 68., 69., 70.]]).astype(np.int8) - np.testing.assert_array_almost_equal(output.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_update_large_shape_int8(): - inputx = Tensor(np.arange(96).reshape((4, 2, 3, 4)).astype(np.int8)) - indices = Tensor(np.array([1, 0]).astype(np.int32)) - updates = Tensor(np.flip(np.arange(48).reshape((2, 2, 3, 4)).astype(np.int8))) - output = scatter_update_net(inputx, indices, updates) - expected = np.array([[[[23., 22., 21., 20.], - [19., 18., 17., 16.], - [15., 14., 13., 12.]], - [[11., 10., 9., 8.], - [7., 6., 5., 4.], - [3., 2., 1., 0.]]], - [[[47., 46., 45., 44.], - [43., 42., 41., 40.], - [39., 38., 37., 36.]], - [[35., 34., 33., 32.], - [31., 30., 29., 28.], - [27., 26., 25., 24.]]], - [[[48., 49., 50., 51.], - [52., 53., 54., 55.], - [56., 57., 58., 59.]], - [[60., 61., 62., 63.], - [64., 65., 66., 67.], - [68., 69., 70., 71.]]], - [[[72., 73., 74., 75.], - [76., 77., 78., 79.], - [80., 81., 82., 83.]], - [[84., 85., 86., 87.], - [88., 89., 90., 91.], - [92., 93., 94., 95.]]]]).astype(np.int8) - np.testing.assert_array_almost_equal(output.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_update_large_uint8(): - inputx = Tensor(np.zeros((4, 3)).astype(np.uint8)) - indices = Tensor(np.array([[2, 1], [0, 3]]).astype(np.int32)) - updates = Tensor(np.arange(63, 75).reshape((2, 2, 3)).astype(np.uint8)) - output = scatter_update_net(inputx, indices, updates) - expected = np.array([[69., 70., 71.], - [66., 67., 68.], - [63., 64., 65.], - [72., 73., 74.]]).astype(np.uint8) - np.testing.assert_array_almost_equal(output.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_update_disordered_uint8(): - inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.uint8))) - indices = Tensor(np.array([1, 2]).astype(np.int32)) - updates = Tensor(np.arange(63, 71).reshape((2, 4)).astype(np.uint8)) - output = scatter_update_net(inputx, indices, updates) - expected = np.array([[45., 44., 43., 42.], - [63., 64., 65., 66.], - [67., 68., 69., 70.]]).astype(np.uint8) - np.testing.assert_array_almost_equal(output.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_update_large_shape_dynamic_int8(): - inputx = Tensor(np.arange(96).reshape((4, 2, 3, 4)).astype(np.int8)) - indices = Tensor(np.array([1, 0]).astype(np.int32)) - updates = Tensor(np.flip(np.arange(48).reshape((2, 2, 3, 4)).astype(np.int8))) - output = scatter_update_d_net(inputx, indices, updates) - expected = np.array([[[[23., 22., 21., 20.], - [19., 18., 17., 16.], - [15., 14., 13., 12.]], - [[11., 10., 9., 8.], - [7., 6., 5., 4.], - [3., 2., 1., 0.]]], - [[[47., 46., 45., 44.], - [43., 42., 41., 40.], - [39., 38., 37., 36.]], - [[35., 34., 33., 32.], - [31., 30., 29., 28.], - [27., 26., 25., 24.]]], - [[[48., 49., 50., 51.], - [52., 53., 54., 55.], - [56., 57., 58., 59.]], - [[60., 61., 62., 63.], - [64., 65., 66., 67.], - [68., 69., 70., 71.]]], - [[[72., 73., 74., 75.], - [76., 77., 78., 79.], - [80., 81., 82., 83.]], - [[84., 85., 86., 87.], - [88., 89., 90., 91.], - [92., 93., 94., 95.]]]]).astype(np.int8) - np.testing.assert_array_almost_equal(output.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_update_disordered_dynamic_int32(): - inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int32))) - indices = Tensor(np.array([1, 2]).astype(np.int32)) - updates = Tensor(np.arange(63, 71).reshape((2, 4)).astype(np.int32)) - output = scatter_update_d_net(inputx, indices, updates) - expected = np.array([[45., 44., 43., 42.], - [63., 64., 65., 66.], - [67., 68., 69., 70.]]).astype(np.int32) - np.testing.assert_array_almost_equal(output.asnumpy(), expected) - -@pytest.mark.level0 -@pytest.mark.platform_x86_gpu_training -@pytest.mark.env_onecard -def test_scatter_update_two_inputs(): - inputx = Tensor(np.zeros((2, 3)).astype(np.float32)) - indices_1 = Tensor(np.array([0, 1]).astype(np.int32)) - updates_1 = Tensor(np.arange(6).reshape((2, 3)).astype(np.float32)) - indices_2 = Tensor(np.array([1]).astype(np.int32)) - updates_2 = Tensor(np.arange(34, 37).reshape((1, 3)).astype(np.float32)) - output_1, output_2 = scatter_update_d2_net(inputx, indices_1, updates_1, - indices_2, updates_2) - expected_1 = np.array([[0., 1., 2.], - [3., 4., 5.]], dtype=np.float32) - expected_2 = np.array([[0., 1., 2.], - [34., 35., 36.]], dtype=np.float32) - np.testing.assert_array_almost_equal(output_1.asnumpy(), expected_1) - np.testing.assert_array_almost_equal(output_2.asnumpy(), expected_2)