forked from mindspore-Ecosystem/mindspore
Add scattersub op for gpu
This commit is contained in:
parent
00dee4e3bd
commit
201f85f636
|
@ -1,57 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/gpu/arrays/scatter_add_gpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
MS_REG_GPU_KERNEL_ONE(ScatterAdd,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
ScatterAddKernel, float)
|
||||
MS_REG_GPU_KERNEL_ONE(ScatterAdd,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddOutputAttr(kNumberTypeFloat16),
|
||||
ScatterAddKernel, half)
|
||||
MS_REG_GPU_KERNEL_ONE(ScatterAdd,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeInt32),
|
||||
ScatterAddKernel, int)
|
||||
MS_REG_GPU_KERNEL_ONE(ScatterAdd,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt8)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt8)
|
||||
.AddOutputAttr(kNumberTypeInt8),
|
||||
ScatterAddKernel, int8_t)
|
||||
MS_REG_GPU_KERNEL_ONE(ScatterAdd,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddOutputAttr(kNumberTypeUInt8),
|
||||
ScatterAddKernel, uint8_t)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,237 @@
|
|||
/**
|
||||
* Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/gpu/arrays/scatter_functor_gpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
// ScatterUpdate
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterUpdate,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
ScatterFunctorKernel, float, int)
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterUpdate,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
ScatterFunctorKernel, float, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterUpdate,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddOutputAttr(kNumberTypeFloat16),
|
||||
ScatterFunctorKernel, half, int)
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterUpdate,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddOutputAttr(kNumberTypeFloat16),
|
||||
ScatterFunctorKernel, half, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterUpdate,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeInt32),
|
||||
ScatterFunctorKernel, int, int)
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterUpdate,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeInt32),
|
||||
ScatterFunctorKernel, int, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterUpdate,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt8)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt8)
|
||||
.AddOutputAttr(kNumberTypeInt8),
|
||||
ScatterFunctorKernel, int8_t, int)
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterUpdate,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt8)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt8)
|
||||
.AddOutputAttr(kNumberTypeInt8),
|
||||
ScatterFunctorKernel, int8_t, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterUpdate,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddOutputAttr(kNumberTypeUInt8),
|
||||
ScatterFunctorKernel, uint8_t, int)
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterUpdate,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddOutputAttr(kNumberTypeUInt8),
|
||||
ScatterFunctorKernel, uint8_t, int64_t)
|
||||
|
||||
// ScatterAdd
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterAdd,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
ScatterFunctorKernel, float, int)
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterAdd,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
ScatterFunctorKernel, float, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterAdd,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddOutputAttr(kNumberTypeFloat16),
|
||||
ScatterFunctorKernel, half, int)
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterAdd,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddOutputAttr(kNumberTypeFloat16),
|
||||
ScatterFunctorKernel, half, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterAdd,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeInt32),
|
||||
ScatterFunctorKernel, int, int)
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterAdd,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeInt32),
|
||||
ScatterFunctorKernel, int, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterAdd,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt8)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt8)
|
||||
.AddOutputAttr(kNumberTypeInt8),
|
||||
ScatterFunctorKernel, int8_t, int)
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterAdd,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt8)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt8)
|
||||
.AddOutputAttr(kNumberTypeInt8),
|
||||
ScatterFunctorKernel, int8_t, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterAdd,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddOutputAttr(kNumberTypeUInt8),
|
||||
ScatterFunctorKernel, uint8_t, int)
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterAdd,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddOutputAttr(kNumberTypeUInt8),
|
||||
ScatterFunctorKernel, uint8_t, int64_t)
|
||||
|
||||
// ScatterSub
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterSub,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
ScatterFunctorKernel, float, int)
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterSub,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
ScatterFunctorKernel, float, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterSub,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddOutputAttr(kNumberTypeFloat16),
|
||||
ScatterFunctorKernel, half, int)
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterSub,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddOutputAttr(kNumberTypeFloat16),
|
||||
ScatterFunctorKernel, half, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterSub,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeInt32),
|
||||
ScatterFunctorKernel, int, int)
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterSub,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeInt32),
|
||||
ScatterFunctorKernel, int, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterSub,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt8)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt8)
|
||||
.AddOutputAttr(kNumberTypeInt8),
|
||||
ScatterFunctorKernel, int8_t, int)
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterSub,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt8)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeInt8)
|
||||
.AddOutputAttr(kNumberTypeInt8),
|
||||
ScatterFunctorKernel, int8_t, int64_t)
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterSub,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddOutputAttr(kNumberTypeUInt8),
|
||||
ScatterFunctorKernel, uint8_t, int)
|
||||
MS_REG_GPU_KERNEL_TWO(ScatterSub,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeInt64)
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddOutputAttr(kNumberTypeUInt8),
|
||||
ScatterFunctorKernel, uint8_t, int64_t)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
* Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -14,21 +14,30 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_ADD_GPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_ADD_GPU_KERNEL_H_
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_FUNCTOR_GPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_FUNCTOR_GPU_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/scatter_add_impl.cuh"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/scatter_functor_impl.cuh"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename T>
|
||||
class ScatterAddKernel : public GpuKernel {
|
||||
|
||||
static const std::map<std::string, ScatterFunctorType> kScatterFunctorTypeMap = {
|
||||
{"ScatterUpdate", SCATTER_FUNC_UPDATE},
|
||||
{"ScatterAdd", SCATTER_FUNC_ADD},
|
||||
{"ScatterSub", SCATTER_FUNC_SUB},
|
||||
};
|
||||
|
||||
template <typename T, typename S>
|
||||
class ScatterFunctorKernel : public GpuKernel {
|
||||
public:
|
||||
ScatterAddKernel() { ResetResource(); }
|
||||
~ScatterAddKernel() override = default;
|
||||
ScatterFunctorKernel() { ResetResource(); }
|
||||
~ScatterFunctorKernel() override = default;
|
||||
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
|
||||
|
@ -37,10 +46,12 @@ class ScatterAddKernel : public GpuKernel {
|
|||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
T *input = GetDeviceAddress<T>(inputs, 0);
|
||||
int *indices = GetDeviceAddress<int>(inputs, 1);
|
||||
S *indices = GetDeviceAddress<S>(inputs, 1);
|
||||
T *updates = GetDeviceAddress<T>(inputs, 2);
|
||||
T *output = GetDeviceAddress<T>(outputs, 0);
|
||||
CalScatterAdd(inner_size_, indices_size_, indices, updates, input, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
|
||||
ScatterFunc(scatter_functor_type_, inner_size_, indices_size_, indices, updates, input,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
|
||||
cudaMemcpyAsync(&output[0], &input[0], input_size_ * sizeof(T), cudaMemcpyDeviceToDevice,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr)),
|
||||
|
@ -49,15 +60,22 @@ class ScatterAddKernel : public GpuKernel {
|
|||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
|
||||
auto iter = kScatterFunctorTypeMap.find(kernel_name);
|
||||
if (iter == kScatterFunctorTypeMap.end()) {
|
||||
MS_LOG(EXCEPTION) << "Scatter functor " << kernel_name << " is not supported.";
|
||||
} else {
|
||||
scatter_functor_type_ = iter->second;
|
||||
}
|
||||
kernel_node_ = kernel_node;
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 3) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but ScatterAdd needs 3 inputs.";
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but " << kernel_name << " needs 3 inputs.";
|
||||
return false;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but ScatterAdd has 1 output.";
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but " << kernel_name << " has 1 output.";
|
||||
return false;
|
||||
}
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
|
@ -90,12 +108,13 @@ class ScatterAddKernel : public GpuKernel {
|
|||
protected:
|
||||
void InitSizeLists() override {
|
||||
input_size_list_.push_back(input_size_ * sizeof(T));
|
||||
input_size_list_.push_back(indices_size_ * sizeof(int));
|
||||
input_size_list_.push_back(indices_size_ * sizeof(S));
|
||||
input_size_list_.push_back(updates_size_ * sizeof(T));
|
||||
output_size_list_.push_back(input_size_ * sizeof(T));
|
||||
}
|
||||
|
||||
private:
|
||||
ScatterFunctorType scatter_functor_type_;
|
||||
size_t input_size_;
|
||||
size_t inner_size_;
|
||||
size_t indices_size_;
|
||||
|
@ -106,4 +125,4 @@ class ScatterAddKernel : public GpuKernel {
|
|||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_ADD_GPU_KERNEL_H_
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_FUNCTOR_GPU_KERNEL_H_
|
|
@ -1,57 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/gpu/arrays/scatter_update_gpu_kernel.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
MS_REG_GPU_KERNEL_ONE(ScatterUpdate,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeFloat32)
|
||||
.AddOutputAttr(kNumberTypeFloat32),
|
||||
ScatterUpdateKernel, float)
|
||||
MS_REG_GPU_KERNEL_ONE(ScatterUpdate,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeFloat16)
|
||||
.AddOutputAttr(kNumberTypeFloat16),
|
||||
ScatterUpdateKernel, half)
|
||||
MS_REG_GPU_KERNEL_ONE(ScatterUpdate,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddOutputAttr(kNumberTypeInt32),
|
||||
ScatterUpdateKernel, int)
|
||||
MS_REG_GPU_KERNEL_ONE(ScatterUpdate,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeInt8)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeInt8)
|
||||
.AddOutputAttr(kNumberTypeInt8),
|
||||
ScatterUpdateKernel, int8_t)
|
||||
MS_REG_GPU_KERNEL_ONE(ScatterUpdate,
|
||||
KernelAttr()
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddInputAttr(kNumberTypeInt32)
|
||||
.AddInputAttr(kNumberTypeUInt8)
|
||||
.AddOutputAttr(kNumberTypeUInt8),
|
||||
ScatterUpdateKernel, uint8_t)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
|
@ -1,109 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_UPDATE_GPU_KERNEL_H_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_UPDATE_GPU_KERNEL_H_
|
||||
|
||||
#include <vector>
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
|
||||
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/scatter_update_impl.cuh"
|
||||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename T>
|
||||
class ScatterUpdateKernel : public GpuKernel {
|
||||
public:
|
||||
ScatterUpdateKernel() { ResetResource(); }
|
||||
~ScatterUpdateKernel() override = default;
|
||||
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
|
||||
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
|
||||
|
||||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
T *input = GetDeviceAddress<T>(inputs, 0);
|
||||
int *indices = GetDeviceAddress<int>(inputs, 1);
|
||||
T *updates = GetDeviceAddress<T>(inputs, 2);
|
||||
T *output = GetDeviceAddress<T>(outputs, 0);
|
||||
CalScatterUpdate(inner_size_, indices_size_, indices, updates, input, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
|
||||
cudaMemcpyAsync(&output[0], &input[0], input_size_ * sizeof(T), cudaMemcpyDeviceToDevice,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr)),
|
||||
"cudaMemcpyAsync output failed");
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
kernel_node_ = kernel_node;
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 3) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but ScatterUpdate needs 3 inputs.";
|
||||
return false;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but ScatterUpdate has 1 output.";
|
||||
return false;
|
||||
}
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
input_size_ = 1;
|
||||
inner_size_ = 1;
|
||||
for (size_t i = 1; i < input_shape.size(); i++) {
|
||||
inner_size_ *= input_shape[i];
|
||||
}
|
||||
input_size_ = input_shape[0] * inner_size_;
|
||||
auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
|
||||
indices_size_ = 1;
|
||||
for (size_t i = 0; i < indices_shape.size(); i++) {
|
||||
indices_size_ *= indices_shape[i];
|
||||
}
|
||||
updates_size_ = indices_size_ * inner_size_;
|
||||
InitSizeLists();
|
||||
return true;
|
||||
}
|
||||
|
||||
void ResetResource() noexcept override {
|
||||
input_size_ = 0;
|
||||
inner_size_ = 0;
|
||||
indices_size_ = 0;
|
||||
updates_size_ = 0;
|
||||
input_size_list_.clear();
|
||||
output_size_list_.clear();
|
||||
workspace_size_list_.clear();
|
||||
}
|
||||
|
||||
protected:
|
||||
void InitSizeLists() override {
|
||||
input_size_list_.push_back(input_size_ * sizeof(T));
|
||||
input_size_list_.push_back(indices_size_ * sizeof(int));
|
||||
input_size_list_.push_back(updates_size_ * sizeof(T));
|
||||
output_size_list_.push_back(input_size_ * sizeof(T));
|
||||
}
|
||||
|
||||
private:
|
||||
size_t input_size_;
|
||||
size_t inner_size_;
|
||||
size_t indices_size_;
|
||||
size_t updates_size_;
|
||||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_UPDATE_GPU_KERNEL_H_
|
|
@ -1,49 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/util.cuh"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/scatter_add_impl.cuh"
|
||||
|
||||
template <typename T>
|
||||
__global__ void ScatterAdd(const size_t inner_size, const size_t updates_size, const int *indices, const T *updates,
|
||||
T *input) {
|
||||
for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < updates_size; pos += blockDim.x * gridDim.x) {
|
||||
const size_t index = pos / inner_size;
|
||||
const size_t offset = pos % inner_size;
|
||||
const size_t current_pos = indices[index] * inner_size + offset;
|
||||
MsAtomicAdd(&input[current_pos], updates[pos]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void CalScatterAdd(const size_t &inner_size, const size_t &indices_size, const int *indices, const T *updates, T *input,
|
||||
cudaStream_t cuda_stream) {
|
||||
const size_t updates_size = inner_size * indices_size;
|
||||
ScatterAdd<<<GET_BLOCKS(updates_size), GET_THREADS, 0, cuda_stream>>>(inner_size, updates_size, indices, updates,
|
||||
input);
|
||||
}
|
||||
|
||||
template void CalScatterAdd<float>(const size_t &inner_size, const size_t &indices_size, const int *indices,
|
||||
const float *updates, float *input, cudaStream_t cuda_stream);
|
||||
template void CalScatterAdd<half>(const size_t &inner_size, const size_t &indices_size, const int *indices,
|
||||
const half *updates, half *input, cudaStream_t cuda_stream);
|
||||
template void CalScatterAdd<int>(const size_t &inner_size, const size_t &indices_size, const int *indices,
|
||||
const int *updates, int *input, cudaStream_t cuda_stream);
|
||||
template void CalScatterAdd<unsigned char>(const size_t &inner_size, const size_t &indices_size, const int *indices,
|
||||
const unsigned char *updates, unsigned char *input,
|
||||
cudaStream_t cuda_stream);
|
||||
template void CalScatterAdd<int8_t>(const size_t &inner_size, const size_t &indices_size, const int *indices,
|
||||
const int8_t *updates, int8_t *input, cudaStream_t cuda_stream);
|
|
@ -0,0 +1,103 @@
|
|||
/**
|
||||
* Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/util.cuh"
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/scatter_functor_impl.cuh"
|
||||
|
||||
template <typename T, typename S>
|
||||
__global__ void ScatterUpdateKernel(const size_t inner_size, const size_t updates_size, const S *indices,
|
||||
const T *updates, T *input) {
|
||||
for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < updates_size; pos += blockDim.x * gridDim.x) {
|
||||
const size_t index = pos / inner_size;
|
||||
const size_t offset = pos % inner_size;
|
||||
const size_t current_pos = indices[index] * inner_size + offset;
|
||||
input[current_pos] = updates[pos];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename S>
|
||||
__global__ void ScatterAddKernel(const size_t inner_size, const size_t updates_size, const S *indices, const T *updates,
|
||||
T *input) {
|
||||
for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < updates_size; pos += blockDim.x * gridDim.x) {
|
||||
const size_t index = pos / inner_size;
|
||||
const size_t offset = pos % inner_size;
|
||||
const size_t current_pos = indices[index] * inner_size + offset;
|
||||
MsAtomicAdd(&input[current_pos], updates[pos]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename S>
|
||||
__global__ void ScatterSubKernel(const size_t inner_size, const size_t updates_size, const S *indices, const T *updates,
|
||||
T *input) {
|
||||
for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < updates_size; pos += blockDim.x * gridDim.x) {
|
||||
const size_t index = pos / inner_size;
|
||||
const size_t offset = pos % inner_size;
|
||||
const size_t current_pos = indices[index] * inner_size + offset;
|
||||
MsAtomicAdd(&input[current_pos], -updates[pos]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename S>
|
||||
void ScatterFunc(enum ScatterFunctorType func_type, const size_t &inner_size, const size_t &indices_size,
|
||||
const S *indices, const T *updates, T *input, cudaStream_t cuda_stream) {
|
||||
const size_t updates_size = inner_size * indices_size;
|
||||
switch (func_type) {
|
||||
case SCATTER_FUNC_UPDATE:
|
||||
return ScatterUpdateKernel<<<GET_BLOCKS(updates_size), GET_THREADS, 0, cuda_stream>>>(inner_size, updates_size,
|
||||
indices, updates, input);
|
||||
case SCATTER_FUNC_ADD:
|
||||
return ScatterAddKernel<<<GET_BLOCKS(updates_size), GET_THREADS, 0, cuda_stream>>>(inner_size, updates_size,
|
||||
indices, updates, input);
|
||||
case SCATTER_FUNC_SUB:
|
||||
return ScatterSubKernel<<<GET_BLOCKS(updates_size), GET_THREADS, 0, cuda_stream>>>(inner_size, updates_size,
|
||||
indices, updates, input);
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
template void ScatterFunc<float, int>(enum ScatterFunctorType func_type, const size_t &inner_size,
|
||||
const size_t &indices_size, const int *indices, const float *updates,
|
||||
float *input, cudaStream_t cuda_stream);
|
||||
template void ScatterFunc<float, int64_t>(enum ScatterFunctorType func_type, const size_t &inner_size,
|
||||
const size_t &indices_size, const int64_t *indices, const float *updates,
|
||||
float *input, cudaStream_t cuda_stream);
|
||||
template void ScatterFunc<half, int>(enum ScatterFunctorType func_type, const size_t &inner_size,
|
||||
const size_t &indices_size, const int *indices, const half *updates, half *input,
|
||||
cudaStream_t cuda_stream);
|
||||
template void ScatterFunc<half, int64_t>(enum ScatterFunctorType func_type, const size_t &inner_size,
|
||||
const size_t &indices_size, const int64_t *indices, const half *updates,
|
||||
half *input, cudaStream_t cuda_stream);
|
||||
template void ScatterFunc<int, int>(enum ScatterFunctorType func_type, const size_t &inner_size,
|
||||
const size_t &indices_size, const int *indices, const int *updates, int *input,
|
||||
cudaStream_t cuda_stream);
|
||||
template void ScatterFunc<int, int64_t>(enum ScatterFunctorType func_type, const size_t &inner_size,
|
||||
const size_t &indices_size, const int64_t *indices, const int *updates,
|
||||
int *input, cudaStream_t cuda_stream);
|
||||
template void ScatterFunc<unsigned char, int>(enum ScatterFunctorType func_type, const size_t &inner_size,
|
||||
const size_t &indices_size, const int *indices,
|
||||
const unsigned char *updates, unsigned char *input,
|
||||
cudaStream_t cuda_stream);
|
||||
template void ScatterFunc<unsigned char, int64_t>(enum ScatterFunctorType func_type, const size_t &inner_size,
|
||||
const size_t &indices_size, const int64_t *indices,
|
||||
const unsigned char *updates, unsigned char *input,
|
||||
cudaStream_t cuda_stream);
|
||||
template void ScatterFunc<int8_t, int>(enum ScatterFunctorType func_type, const size_t &inner_size,
|
||||
const size_t &indices_size, const int *indices, const int8_t *updates,
|
||||
int8_t *input, cudaStream_t cuda_stream);
|
||||
template void ScatterFunc<int8_t, int64_t>(enum ScatterFunctorType func_type, const size_t &inner_size,
|
||||
const size_t &indices_size, const int64_t *indices, const int8_t *updates,
|
||||
int8_t *input, cudaStream_t cuda_stream);
|
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
* Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -14,13 +14,20 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_ADD_IMPL_CUH_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_ADD_IMPL_CUH_
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_FUNCTOR_IMPL_CUH_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_FUNCTOR_IMPL_CUH_
|
||||
|
||||
#include "runtime/device/gpu/cuda_common.h"
|
||||
|
||||
template <typename T>
|
||||
void CalScatterAdd(const size_t &inner_size, const size_t &indices_size, const int *indices, const T *updates, T *input,
|
||||
cudaStream_t cuda_stream);
|
||||
enum ScatterFunctorType {
|
||||
SCATTER_FUNC_UPDATE = 0,
|
||||
SCATTER_FUNC_ADD,
|
||||
SCATTER_FUNC_SUB,
|
||||
SCATTER_FUNC_INVALID_TYPE = 255
|
||||
};
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_ADD_IMPL_CUH_
|
||||
template <typename T, typename S>
|
||||
void ScatterFunc(enum ScatterFunctorType func_type, const size_t &inner_size, const size_t &indices_size,
|
||||
const S *indices, const T *updates, T *input, cudaStream_t cuda_stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_FUNCTOR_IMPL_CUH_
|
|
@ -1,48 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "backend/kernel_compiler/gpu/cuda_impl/scatter_update_impl.cuh"
|
||||
|
||||
template <typename T>
|
||||
__global__ void ScatterUpdate(const size_t inner_size, const size_t updates_size, const int *indices, const T *updates,
|
||||
T *input) {
|
||||
for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < updates_size; pos += blockDim.x * gridDim.x) {
|
||||
const size_t index = pos / inner_size;
|
||||
const size_t offset = pos % inner_size;
|
||||
const size_t current_pos = indices[index] * inner_size + offset;
|
||||
input[current_pos] = updates[pos];
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void CalScatterUpdate(const size_t &inner_size, const size_t &indices_size, const int *indices, const T *updates,
|
||||
T *input, cudaStream_t cuda_stream) {
|
||||
const size_t updates_size = inner_size * indices_size;
|
||||
ScatterUpdate<<<GET_BLOCKS(updates_size), GET_THREADS, 0, cuda_stream>>>(inner_size, updates_size, indices, updates,
|
||||
input);
|
||||
}
|
||||
|
||||
template void CalScatterUpdate<float>(const size_t &inner_size, const size_t &indices_size, const int *indices,
|
||||
const float *updates, float *input, cudaStream_t cuda_stream);
|
||||
template void CalScatterUpdate<half>(const size_t &inner_size, const size_t &indices_size, const int *indices,
|
||||
const half *updates, half *input, cudaStream_t cuda_stream);
|
||||
template void CalScatterUpdate<int>(const size_t &inner_size, const size_t &indices_size, const int *indices,
|
||||
const int *updates, int *input, cudaStream_t cuda_stream);
|
||||
template void CalScatterUpdate<unsigned char>(const size_t &inner_size, const size_t &indices_size, const int *indices,
|
||||
const unsigned char *updates, unsigned char *input,
|
||||
cudaStream_t cuda_stream);
|
||||
template void CalScatterUpdate<int8_t>(const size_t &inner_size, const size_t &indices_size, const int *indices,
|
||||
const int8_t *updates, int8_t *input, cudaStream_t cuda_stream);
|
|
@ -1,26 +0,0 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_UPDATE_IMPL_CUH_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_UPDATE_IMPL_CUH_
|
||||
|
||||
#include "runtime/device/gpu/cuda_common.h"
|
||||
|
||||
template <typename T>
|
||||
void CalScatterUpdate(const size_t &inner_size, const size_t &indices_size, const int *indices, const T *updates,
|
||||
T *input, cudaStream_t cuda_stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_UPDATE_IMPL_CUH_
|
|
@ -185,6 +185,8 @@ AbstractBasePtr InferImplDynamicStitch(const AnalysisEnginePtr &, const Primitiv
|
|||
const AbstractBasePtrList &args_spec_list);
|
||||
AbstractBasePtr InferImplScatterAdd(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
|
||||
const AbstractBasePtrList &args_spec_list);
|
||||
AbstractBasePtr InferImplScatterSub(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
|
||||
const AbstractBasePtrList &args_spec_list);
|
||||
AbstractBasePtr InferImplScatterUpdate(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
|
||||
const AbstractBasePtrList &args_spec_list);
|
||||
AbstractBasePtr InferImplDiv(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
|
||||
|
|
|
@ -391,6 +391,21 @@ AbstractBasePtr InferImplScatterAdd(const AnalysisEnginePtr &, const PrimitivePt
|
|||
return std::make_shared<AbstractTensor>(x->element(), std::make_shared<Shape>(shape, min_shape, max_shape));
|
||||
}
|
||||
|
||||
AbstractBasePtr InferImplScatterSub(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
|
||||
const AbstractBasePtrList &args_spec_list) {
|
||||
constexpr auto kScatterSubInputNum = 3;
|
||||
const std::string op_name = primitive->name();
|
||||
CheckRequiredArgsSize(op_name, args_spec_list, kScatterSubInputNum);
|
||||
auto x = CheckArg<AbstractTensor>(op_name, args_spec_list, 0);
|
||||
MS_EXCEPTION_IF_NULL(x);
|
||||
MS_EXCEPTION_IF_NULL(x->shape());
|
||||
ShapeVector shape = x->shape()->shape();
|
||||
ShapeVector min_shape = x->shape()->min_shape();
|
||||
ShapeVector max_shape = x->shape()->max_shape();
|
||||
CheckMinMaxShape(shape, &min_shape, &max_shape);
|
||||
return std::make_shared<AbstractTensor>(x->element(), std::make_shared<Shape>(shape, min_shape, max_shape));
|
||||
}
|
||||
|
||||
AbstractBasePtr InferImplScatterUpdate(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
|
||||
const AbstractBasePtrList &args_spec_list) {
|
||||
const std::string op_name = primitive->name();
|
||||
|
|
|
@ -92,6 +92,7 @@ PrimitiveEvalImplMap &GetPrimitiveToEvalImplMap() {
|
|||
{prim::kPrimUnsortedSegmentMax, {InferImplUnsortedSegmentMax, nullptr, true}},
|
||||
{prim::kPrimUnsortedSegmentMin, {InferImplUnsortedSegmentMin, nullptr, true}},
|
||||
{prim::kPrimScatterAdd, {InferImplScatterAdd, nullptr, true}},
|
||||
{prim::kPrimScatterSub, {InferImplScatterSub, nullptr, true}},
|
||||
{prim::kPrimSubAndFilter, {InferImplSubAndFilter, nullptr, true}},
|
||||
{prim::kPrimScatterUpdate, {InferImplScatterUpdate, nullptr, true}},
|
||||
{prim::kPrimMapCacheIdx, {InferImplMapCacheIdx, nullptr, true}},
|
||||
|
|
|
@ -214,6 +214,7 @@ inline const PrimitivePtr kPrimDynamicRNNGrad = std::make_shared<Primitive>("Dyn
|
|||
inline const PrimitivePtr kPrimDynamicGRUV2 = std::make_shared<Primitive>("DynamicGRUV2");
|
||||
inline const PrimitivePtr kPrimDynamicGRUV2Grad = std::make_shared<Primitive>("DynamicGRUV2Grad");
|
||||
inline const PrimitivePtr kPrimScatterAdd = std::make_shared<Primitive>("ScatterAdd");
|
||||
inline const PrimitivePtr kPrimScatterSub = std::make_shared<Primitive>("ScatterSub");
|
||||
inline const PrimitivePtr kPrimScatterUpdate = std::make_shared<Primitive>("ScatterUpdate");
|
||||
inline const PrimitivePtr kPrimTensorCopySlices = std::make_shared<Primitive>("TensorCopySlices");
|
||||
inline const PrimitivePtr kPrimMapUniform = std::make_shared<Primitive>("MapUniform");
|
||||
|
|
|
@ -4129,7 +4129,7 @@ class ScatterAdd(_ScatterOpDynamic):
|
|||
self.add_prim_attr('side_effect_mem', True)
|
||||
|
||||
|
||||
class ScatterSub(_ScatterOp):
|
||||
class ScatterSub(_ScatterOpDynamic):
|
||||
r"""
|
||||
Updates the value of the input tensor through the subtraction operation.
|
||||
|
||||
|
@ -4230,6 +4230,13 @@ class ScatterSub(_ScatterOp):
|
|||
[-12. -12. -12.]]
|
||||
"""
|
||||
|
||||
@prim_attr_register
|
||||
def __init__(self, use_locking=False):
|
||||
"""Initialize ScatterSub"""
|
||||
validator.check_value_type('use_locking', use_locking, [bool], self.name)
|
||||
self.init_prim_io_names(inputs=['x', 'indices', 'updates'], outputs=['y'])
|
||||
self.add_prim_attr('side_effect_mem', True)
|
||||
|
||||
|
||||
class ScatterMul(_ScatterOp):
|
||||
r"""
|
||||
|
|
|
@ -1,338 +0,0 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import mindspore.context as context
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor, Parameter
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops.operations import _inner_ops as inner
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
|
||||
# all cases tested against dchip
|
||||
|
||||
class TestScatterAddNet(nn.Cell):
|
||||
def __init__(self, lock, inputx, indices, updates):
|
||||
super(TestScatterAddNet, self).__init__()
|
||||
self.scatter_add = P.ScatterAdd(use_locking=lock)
|
||||
self.inputx = Parameter(inputx, name="inputx")
|
||||
self.indices = Parameter(indices, name="indices")
|
||||
self.updates = Parameter(updates, name="updates")
|
||||
|
||||
def construct(self):
|
||||
out = self.scatter_add(self.inputx, self.indices, self.updates)
|
||||
return out
|
||||
|
||||
def scatter_add_net(inputx, indices, updates):
|
||||
lock = True
|
||||
net = TestScatterAddNet(lock, inputx, indices, updates)
|
||||
return net()
|
||||
|
||||
def scatter_add_use_locking_false_net(inputx, indices, updates):
|
||||
lock = False
|
||||
net = TestScatterAddNet(lock, inputx, indices, updates)
|
||||
return net()
|
||||
|
||||
class TestScatterAddDynamicNet(nn.Cell):
|
||||
def __init__(self, inputx, indices, updates):
|
||||
super(TestScatterAddDynamicNet, self).__init__()
|
||||
self.scatter_add = P.ScatterAdd()
|
||||
self.test_dynamic = inner.GpuConvertToDynamicShape()
|
||||
self.inputx = Parameter(inputx, name="inputx")
|
||||
self.indices = Parameter(indices, name="indices")
|
||||
self.updates = Parameter(updates, name="updates")
|
||||
|
||||
def construct(self):
|
||||
indices = self.test_dynamic(self.indices)
|
||||
updates = self.test_dynamic(self.updates)
|
||||
out = self.scatter_add(self.inputx, indices, updates)
|
||||
return out
|
||||
|
||||
def scatter_add_d_net(inputx, indices, updates):
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
|
||||
net = TestScatterAddDynamicNet(inputx, indices, updates)
|
||||
return net()
|
||||
|
||||
class TestScatterAddDynamicNet2(nn.Cell):
|
||||
def __init__(self, inputx):
|
||||
super(TestScatterAddDynamicNet2, self).__init__()
|
||||
self.scatter_add = P.ScatterAdd()
|
||||
self.test_dynamic = inner.GpuConvertToDynamicShape()
|
||||
self.inputx = Parameter(inputx, name="inputx")
|
||||
|
||||
def construct(self, indices, updates):
|
||||
indices = self.test_dynamic(indices)
|
||||
updates = self.test_dynamic(updates)
|
||||
out = self.scatter_add(self.inputx, indices, updates)
|
||||
return out
|
||||
|
||||
def scatter_add_d2_net(inputx, indices_1, updates_1,
|
||||
indices_2, updates_2):
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
|
||||
net = TestScatterAddDynamicNet2(inputx)
|
||||
out1 = net(indices_1, updates_1)
|
||||
out2 = net(indices_2, updates_2)
|
||||
return (out1, out2)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_add_small_float32():
|
||||
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
|
||||
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
|
||||
output = scatter_add_net(inputx, indices, updates)
|
||||
expected = np.array([[6., 8., 10.],
|
||||
[12., 14., 16.]])
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_add_input_updated():
|
||||
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
|
||||
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
|
||||
lock = True
|
||||
net = TestScatterAddNet(lock, inputx, indices, updates)
|
||||
net()
|
||||
expected = np.array([[6., 8., 10.],
|
||||
[12., 14., 16.]])
|
||||
np.testing.assert_array_almost_equal(net.inputx.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_add_large_shape_float32():
|
||||
inputx = Tensor(np.ones((4, 2, 3, 4)).astype(np.float32))
|
||||
indices = Tensor(np.array([[0, 2], [3, 1]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(96).reshape((2, 2, 2, 3, 4)).astype(np.float32))
|
||||
output = scatter_add_net(inputx, indices, updates)
|
||||
expected = np.array([[[[1., 2., 3., 4.],
|
||||
[5., 6., 7., 8.],
|
||||
[9., 10., 11., 12.]],
|
||||
[[13., 14., 15., 16.],
|
||||
[17., 18., 19., 20.],
|
||||
[21., 22., 23., 24.]]],
|
||||
[[[73., 74., 75., 76.],
|
||||
[77., 78., 79., 80.],
|
||||
[81., 82., 83., 84.]],
|
||||
[[85., 86., 87., 88.],
|
||||
[89., 90., 91., 92.],
|
||||
[93., 94., 95., 96.]]],
|
||||
[[[25., 26., 27., 28.],
|
||||
[29., 30., 31., 32.],
|
||||
[33., 34., 35., 36.]],
|
||||
[[37., 38., 39., 40.],
|
||||
[41., 42., 43., 44.],
|
||||
[45., 46., 47., 48.]]],
|
||||
[[[49., 50., 51., 52.],
|
||||
[53., 54., 55., 56.],
|
||||
[57., 58., 59., 60.]],
|
||||
[[61., 62., 63., 64.],
|
||||
[65., 66., 67., 68.],
|
||||
[69., 70., 71., 72.]]]])
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_add_small_float32_use_locking_false():
|
||||
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
|
||||
indices = Tensor(np.array([1, 0]).astype(np.int32))
|
||||
updates = Tensor(np.arange(6).reshape((2, 3)).astype(np.float32))
|
||||
output = scatter_add_use_locking_false_net(inputx, indices, updates)
|
||||
expected = np.array([[3., 4., 5.],
|
||||
[0., 1., 2.]])
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_add_input_less_than_1_float32():
|
||||
inputx = Tensor(np.array([[0.214141, 0.415151, 0.51516],
|
||||
[0.876542, 0.451611, 0.55112],
|
||||
[0.111244, 0.633333, 0.34444]]).astype(np.float32))
|
||||
indices = Tensor(np.array([[[1, 0, 2],
|
||||
[2, 2, 0]],
|
||||
[[1, 0, 1],
|
||||
[2, 1, 2]]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(34, 70).reshape((2, 2, 3, 3)).astype(np.float32))
|
||||
output = scatter_add_net(inputx, indices, updates)
|
||||
expected = np.array([[141.21414, 144.41515, 147.51517],
|
||||
[208.87654, 212.45161, 216.55112],
|
||||
[257.11124, 262.63333, 267.34442]], dtype=np.float32)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_add_float16():
|
||||
inputx = Tensor(np.zeros((2, 3)).astype(np.float16))
|
||||
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float16))
|
||||
output = scatter_add_net(inputx, indices, updates)
|
||||
expected = np.array([[6., 8., 10.],
|
||||
[12., 14., 16.]])
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_add_large_float16():
|
||||
inputx = Tensor(np.zeros((2, 3, 4)).astype(np.float16))
|
||||
indices = Tensor(np.array([[0, 0], [1, 1]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.float16))
|
||||
output = scatter_add_net(inputx, indices, updates)
|
||||
expected = np.array([[[138., 140., 142., 144.],
|
||||
[146., 148., 150., 152.],
|
||||
[154., 156., 158., 160.]],
|
||||
[[186., 188., 190., 192.],
|
||||
[194., 196., 198., 200.],
|
||||
[202., 204., 206., 208.]]])
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_add_disordered_float16():
|
||||
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.float16)))
|
||||
indices = Tensor(np.array([[[0, 1, 2],
|
||||
[2, 1, 0]],
|
||||
[[0, 0, 0],
|
||||
[2, 2, 2]]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.float16))
|
||||
output = scatter_add_net(inputx, indices, updates)
|
||||
expected = np.array([[464., 468., 472., 476.],
|
||||
[187., 188., 189., 190.],
|
||||
[492., 496., 500., 504.]])
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_add_large_int32():
|
||||
inputx = Tensor(np.zeros((2, 3, 4)).astype(np.int32))
|
||||
indices = Tensor(np.array([[0, 0], [1, 1]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.int32))
|
||||
output = scatter_add_net(inputx, indices, updates)
|
||||
expected = np.array([[[138., 140., 142., 144.],
|
||||
[146., 148., 150., 152.],
|
||||
[154., 156., 158., 160.]],
|
||||
[[186., 188., 190., 192.],
|
||||
[194., 196., 198., 200.],
|
||||
[202., 204., 206., 208.]]]).astype(np.int32)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_add_disordered_int32():
|
||||
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int32)))
|
||||
indices = Tensor(np.array([[[0, 1, 2],
|
||||
[2, 1, 0]],
|
||||
[[0, 0, 0],
|
||||
[2, 2, 2]]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.int32))
|
||||
output = scatter_add_net(inputx, indices, updates)
|
||||
expected = np.array([[464., 468., 472., 476.],
|
||||
[187., 188., 189., 190.],
|
||||
[492., 496., 500., 504.]]).astype(np.int32)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_add_disordered_dynamic_int32():
|
||||
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int32)))
|
||||
indices = Tensor(np.array([[[0, 1, 2],
|
||||
[2, 1, 0]],
|
||||
[[0, 0, 0],
|
||||
[2, 2, 2]]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.int32))
|
||||
output = scatter_add_d_net(inputx, indices, updates)
|
||||
expected = np.array([[464., 468., 472., 476.],
|
||||
[187., 188., 189., 190.],
|
||||
[492., 496., 500., 504.]]).astype(np.int32)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_add_disordered_dynamic_int8():
|
||||
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int8)))
|
||||
indices = Tensor(np.array([[[0, 1, 2],
|
||||
[2, 1, 0]],
|
||||
[[0, 0, 0],
|
||||
[2, 2, 2]]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.int8))
|
||||
output = scatter_add_d_net(inputx, indices, updates)
|
||||
expected = np.array([[464., 468., 472., 476.],
|
||||
[187., 188., 189., 190.],
|
||||
[492., 496., 500., 504.]]).astype(np.int8)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_add_disordered_dynamic_uint8():
|
||||
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.uint8)))
|
||||
indices = Tensor(np.array([[[0, 1, 2],
|
||||
[2, 1, 0]],
|
||||
[[0, 0, 0],
|
||||
[2, 2, 2]]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.uint8))
|
||||
output = scatter_add_d_net(inputx, indices, updates)
|
||||
expected = np.array([[464., 468., 472., 476.],
|
||||
[187., 188., 189., 190.],
|
||||
[492., 496., 500., 504.]]).astype(np.uint8)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_add_input_less_than_1_dynamic_float32():
|
||||
inputx = Tensor(np.array([[0.214141, 0.415151, 0.51516],
|
||||
[0.876542, 0.451611, 0.55112],
|
||||
[0.111244, 0.633333, 0.34444]]).astype(np.float32))
|
||||
indices = Tensor(np.array([[[1, 0, 2],
|
||||
[2, 2, 0]],
|
||||
[[1, 0, 1],
|
||||
[2, 1, 2]]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(34, 70).reshape((2, 2, 3, 3)).astype(np.float32))
|
||||
output = scatter_add_d_net(inputx, indices, updates)
|
||||
expected = np.array([[141.21414, 144.41515, 147.51517],
|
||||
[208.87654, 212.45161, 216.55112],
|
||||
[257.11124, 262.63333, 267.34442]], dtype=np.float32)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_add_dynamic_two_inputs():
|
||||
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
|
||||
indices_1 = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
|
||||
updates_1 = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
|
||||
indices_2 = Tensor(np.array([[0, 0], [1, 1], [1, 0]]).astype(np.int32))
|
||||
updates_2 = Tensor(np.flip(np.arange(18).reshape((3, 2, 3)).astype(np.float32)))
|
||||
output_1, output_2 = scatter_add_d2_net(inputx, indices_1, updates_1,
|
||||
indices_2, updates_2)
|
||||
expected_1 = np.array([[6., 8., 10.],
|
||||
[12., 14., 16.]])
|
||||
expected_2 = np.array([[39., 38., 37.],
|
||||
[36., 35., 34.]])
|
||||
np.testing.assert_array_almost_equal(output_1.asnumpy(), expected_1)
|
||||
np.testing.assert_array_almost_equal(output_2.asnumpy(), expected_2)
|
|
@ -0,0 +1,720 @@
|
|||
# Copyright 2020-2021 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import mindspore.context as context
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor, Parameter
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops.operations import _inner_ops as inner
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
|
||||
# all cases tested against dchip
|
||||
|
||||
func_map = {
|
||||
"update": P.ScatterUpdate,
|
||||
"add": P.ScatterAdd,
|
||||
"sub": P.ScatterSub,
|
||||
}
|
||||
|
||||
|
||||
class TestScatterFuncNet(nn.Cell):
|
||||
def __init__(self, func, lock, inputx, indices, updates):
|
||||
super(TestScatterFuncNet, self).__init__()
|
||||
|
||||
self.scatter_func = func_map[func](use_locking=lock)
|
||||
self.inputx = Parameter(inputx, name="inputx")
|
||||
self.indices = Parameter(indices, name="indices")
|
||||
self.updates = Parameter(updates, name="updates")
|
||||
|
||||
def construct(self):
|
||||
out = self.scatter_func(self.inputx, self.indices, self.updates)
|
||||
return out
|
||||
|
||||
|
||||
def scatter_func_net(func, inputx, indices, updates):
|
||||
lock = True
|
||||
net = TestScatterFuncNet(func, lock, inputx, indices, updates)
|
||||
return net()
|
||||
|
||||
|
||||
def scatter_func_use_locking_false_net(func, inputx, indices, updates):
|
||||
lock = False
|
||||
net = TestScatterFuncNet(func, lock, inputx, indices, updates)
|
||||
return net()
|
||||
|
||||
|
||||
class TestScatterFuncDynamicNet(nn.Cell):
|
||||
def __init__(self, func, inputx, indices, updates):
|
||||
super(TestScatterFuncDynamicNet, self).__init__()
|
||||
self.scatter_func = func_map[func]()
|
||||
self.test_dynamic = inner.GpuConvertToDynamicShape()
|
||||
self.inputx = Parameter(inputx, name="inputx")
|
||||
self.indices = Parameter(indices, name="indices")
|
||||
self.updates = Parameter(updates, name="updates")
|
||||
|
||||
def construct(self):
|
||||
indices = self.test_dynamic(self.indices)
|
||||
updates = self.test_dynamic(self.updates)
|
||||
out = self.scatter_func(self.inputx, indices, updates)
|
||||
return out
|
||||
|
||||
|
||||
def scatter_func_d_net(func, inputx, indices, updates):
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
|
||||
net = TestScatterFuncDynamicNet(func, inputx, indices, updates)
|
||||
return net()
|
||||
|
||||
|
||||
class TestScatterFuncDynamicNet2(nn.Cell):
|
||||
def __init__(self, func, inputx):
|
||||
super(TestScatterFuncDynamicNet2, self).__init__()
|
||||
self.scatter_func = func_map[func]()
|
||||
self.test_dynamic = inner.GpuConvertToDynamicShape()
|
||||
self.inputx = Parameter(inputx, name="inputx")
|
||||
|
||||
def construct(self, indices, updates):
|
||||
indices = self.test_dynamic(indices)
|
||||
updates = self.test_dynamic(updates)
|
||||
out = self.scatter_func(self.inputx, indices, updates)
|
||||
return out
|
||||
|
||||
|
||||
def scatter_func_d2_net(func, inputx, indices_1, updates_1, indices_2, updates_2):
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
|
||||
net = TestScatterFuncDynamicNet2(func, inputx)
|
||||
out1 = net(indices_1, updates_1)
|
||||
out2 = net(indices_2, updates_2)
|
||||
return (out1, out2)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_func_small_float32():
|
||||
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
|
||||
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
|
||||
|
||||
# update
|
||||
output = scatter_func_net("update", inputx, indices, updates)
|
||||
expected = np.array([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]])
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
# add
|
||||
output = scatter_func_net("add", inputx, indices, updates)
|
||||
expected = np.array([[6.0, 8.0, 10.0], [12.0, 14.0, 16.0]])
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
# sub
|
||||
output = scatter_func_net("sub", inputx, indices, updates)
|
||||
expected = np.array([[-6.0, -8.0, -10.0], [-12.0, -14.0, -16.0]])
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_func_input_updated():
|
||||
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
|
||||
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
|
||||
lock = True
|
||||
|
||||
# update
|
||||
net = TestScatterFuncNet("update", lock, inputx, indices, updates)
|
||||
net()
|
||||
expected = np.array([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]])
|
||||
np.testing.assert_array_almost_equal(net.inputx.asnumpy(), expected)
|
||||
|
||||
# add
|
||||
net = TestScatterFuncNet("add", lock, inputx, indices, updates)
|
||||
net()
|
||||
expected = np.array([[6.0, 8.0, 10.0], [12.0, 14.0, 16.0]])
|
||||
np.testing.assert_array_almost_equal(net.inputx.asnumpy(), expected)
|
||||
|
||||
# sub
|
||||
net = TestScatterFuncNet("sub", lock, inputx, indices, updates)
|
||||
net()
|
||||
expected = np.array([[-6.0, -8.0, -10.0], [-12.0, -14.0, -16.0]])
|
||||
np.testing.assert_array_almost_equal(net.inputx.asnumpy(), expected)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_func_large_shape_float32():
|
||||
inputx = Tensor(np.ones((4, 2, 3, 4)).astype(np.float32))
|
||||
indices = Tensor(np.array([[0, 2], [3, 1]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(96).reshape((2, 2, 2, 3, 4)).astype(np.float32))
|
||||
|
||||
# update
|
||||
output = scatter_func_net("update", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[
|
||||
[
|
||||
[[0.0, 1.0, 2.0, 3.0], [4.0, 5.0, 6.0, 7.0], [8.0, 9.0, 10.0, 11.0]],
|
||||
[[12.0, 13.0, 14.0, 15.0], [16.0, 17.0, 18.0, 19.0], [20.0, 21.0, 22.0, 23.0]],
|
||||
],
|
||||
[
|
||||
[[72.0, 73.0, 74.0, 75.0], [76.0, 77.0, 78.0, 79.0], [80.0, 81.0, 82.0, 83.0]],
|
||||
[[84.0, 85.0, 86.0, 87.0], [88.0, 89.0, 90.0, 91.0], [92.0, 93.0, 94.0, 95.0]],
|
||||
],
|
||||
[
|
||||
[[24.0, 25.0, 26.0, 27.0], [28.0, 29.0, 30.0, 31.0], [32.0, 33.0, 34.0, 35.0]],
|
||||
[[36.0, 37.0, 38.0, 39.0], [40.0, 41.0, 42.0, 43.0], [44.0, 45.0, 46.0, 47.0]],
|
||||
],
|
||||
[
|
||||
[[48.0, 49.0, 50.0, 51.0], [52.0, 53.0, 54.0, 55.0], [56.0, 57.0, 58.0, 59.0]],
|
||||
[[60.0, 61.0, 62.0, 63.0], [64.0, 65.0, 66.0, 67.0], [68.0, 69.0, 70.0, 71.0]],
|
||||
],
|
||||
]
|
||||
)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
# add
|
||||
output = scatter_func_net("add", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[
|
||||
[
|
||||
[[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0]],
|
||||
[[13.0, 14.0, 15.0, 16.0], [17.0, 18.0, 19.0, 20.0], [21.0, 22.0, 23.0, 24.0]],
|
||||
],
|
||||
[
|
||||
[[73.0, 74.0, 75.0, 76.0], [77.0, 78.0, 79.0, 80.0], [81.0, 82.0, 83.0, 84.0]],
|
||||
[[85.0, 86.0, 87.0, 88.0], [89.0, 90.0, 91.0, 92.0], [93.0, 94.0, 95.0, 96.0]],
|
||||
],
|
||||
[
|
||||
[[25.0, 26.0, 27.0, 28.0], [29.0, 30.0, 31.0, 32.0], [33.0, 34.0, 35.0, 36.0]],
|
||||
[[37.0, 38.0, 39.0, 40.0], [41.0, 42.0, 43.0, 44.0], [45.0, 46.0, 47.0, 48.0]],
|
||||
],
|
||||
[
|
||||
[[49.0, 50.0, 51.0, 52.0], [53.0, 54.0, 55.0, 56.0], [57.0, 58.0, 59.0, 60.0]],
|
||||
[[61.0, 62.0, 63.0, 64.0], [65.0, 66.0, 67.0, 68.0], [69.0, 70.0, 71.0, 72.0]],
|
||||
],
|
||||
]
|
||||
)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
# sub
|
||||
output = scatter_func_net("sub", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[
|
||||
[
|
||||
[[1.0, 0.0, -1.0, -2.0], [-3.0, -4.0, -5.0, -6.0], [-7.0, -8.0, -9.0, -10.0]],
|
||||
[
|
||||
[-11.0, -12.0, -13.0, -14.0],
|
||||
[-15.0, -16.0, -17.0, -18.0],
|
||||
[-19.0, -20.0, -21.0, -22.0],
|
||||
],
|
||||
],
|
||||
[
|
||||
[
|
||||
[-71.0, -72.0, -73.0, -74.0],
|
||||
[-75.0, -76.0, -77.0, -78.0],
|
||||
[-79.0, -80.0, -81.0, -82.0],
|
||||
],
|
||||
[
|
||||
[-83.0, -84.0, -85.0, -86.0],
|
||||
[-87.0, -88.0, -89.0, -90.0],
|
||||
[-91.0, -92.0, -93.0, -94.0],
|
||||
],
|
||||
],
|
||||
[
|
||||
[
|
||||
[-23.0, -24.0, -25.0, -26.0],
|
||||
[-27.0, -28.0, -29.0, -30.0],
|
||||
[-31.0, -32.0, -33.0, -34.0],
|
||||
],
|
||||
[
|
||||
[-35.0, -36.0, -37.0, -38.0],
|
||||
[-39.0, -40.0, -41.0, -42.0],
|
||||
[-43.0, -44.0, -45.0, -46.0],
|
||||
],
|
||||
],
|
||||
[
|
||||
[
|
||||
[-47.0, -48.0, -49.0, -50.0],
|
||||
[-51.0, -52.0, -53.0, -54.0],
|
||||
[-55.0, -56.0, -57.0, -58.0],
|
||||
],
|
||||
[
|
||||
[-59.0, -60.0, -61.0, -62.0],
|
||||
[-63.0, -64.0, -65.0, -66.0],
|
||||
[-67.0, -68.0, -69.0, -70.0],
|
||||
],
|
||||
],
|
||||
]
|
||||
)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_func_small_float32_use_locking_false():
|
||||
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
|
||||
indices = Tensor(np.array([1, 0]).astype(np.int32))
|
||||
updates = Tensor(np.arange(6).reshape((2, 3)).astype(np.float32))
|
||||
|
||||
# update
|
||||
output = scatter_func_use_locking_false_net("update", inputx, indices, updates)
|
||||
expected = np.array([[3.0, 4.0, 5.0], [0.0, 1.0, 2.0]])
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
# add
|
||||
output = scatter_func_use_locking_false_net("add", inputx, indices, updates)
|
||||
expected = np.array([[3.0, 4.0, 5.0], [0.0, 1.0, 2.0]])
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
# sub
|
||||
output = scatter_func_use_locking_false_net("sub", inputx, indices, updates)
|
||||
expected = np.array([[-3.0, -4.0, -5.0], [0.0, -1.0, -2.0]])
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_func_input_less_than_1_float32():
|
||||
inputx = Tensor(
|
||||
np.array(
|
||||
[
|
||||
[0.214141, 0.415151, 0.51516],
|
||||
[0.876542, 0.451611, 0.55112],
|
||||
[0.111244, 0.633333, 0.34444],
|
||||
]
|
||||
).astype(np.float32)
|
||||
)
|
||||
indices = Tensor(np.array([[[1, 0, 2], [2, 2, 0]], [[1, 0, 1], [2, 1, 2]]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(34, 70).reshape((2, 2, 3, 3)).astype(np.float32))
|
||||
|
||||
# update
|
||||
output = scatter_func_net("update", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[[37.0, 38.0, 39.0], [34.0, 35.0, 66.0], [67.0, 68.0, 69.0],], dtype=np.float32,
|
||||
)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
# add
|
||||
output = scatter_func_net("add", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[
|
||||
[141.21414, 144.41515, 147.51517],
|
||||
[208.87654, 212.45161, 216.55112],
|
||||
[257.11124, 262.63333, 267.34442],
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
# sub
|
||||
output = scatter_func_net("sub", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[
|
||||
[-140.78586, -143.58485, -146.48483],
|
||||
[-207.12346, -211.54839, -215.44888],
|
||||
[-256.88876, -261.36667, -266.65558],
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_func_float16():
|
||||
inputx = Tensor(np.zeros((2, 3)).astype(np.float16))
|
||||
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float16))
|
||||
|
||||
# update
|
||||
output = scatter_func_net("update", inputx, indices, updates)
|
||||
expected = np.array([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]])
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
# add
|
||||
output = scatter_func_net("add", inputx, indices, updates)
|
||||
expected = np.array([[6.0, 8.0, 10.0], [12.0, 14.0, 16.0]])
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
# sub
|
||||
output = scatter_func_net("sub", inputx, indices, updates)
|
||||
expected = np.array([[-6.0, -8.0, -10.0], [-12.0, -14.0, -16.0]])
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_func_large_float16():
|
||||
inputx = Tensor(np.zeros((2, 3, 4)).astype(np.float16))
|
||||
indices = Tensor(np.array([[0, 0], [1, 1]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.float16))
|
||||
|
||||
# update
|
||||
output = scatter_func_net("update", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[
|
||||
[[63.0, 64.0, 65.0, 66.0], [67.0, 68.0, 69.0, 70.0], [71.0, 72.0, 73.0, 74.0],],
|
||||
[[99.0, 100.0, 101.0, 102.0], [103.0, 104.0, 105.0, 106.0], [95.0, 96.0, 97.0, 98.0],],
|
||||
]
|
||||
)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
# add
|
||||
output = scatter_func_net("add", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[
|
||||
[
|
||||
[138.0, 140.0, 142.0, 144.0],
|
||||
[146.0, 148.0, 150.0, 152.0],
|
||||
[154.0, 156.0, 158.0, 160.0],
|
||||
],
|
||||
[
|
||||
[186.0, 188.0, 190.0, 192.0],
|
||||
[194.0, 196.0, 198.0, 200.0],
|
||||
[202.0, 204.0, 206.0, 208.0],
|
||||
],
|
||||
]
|
||||
)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
# sub
|
||||
output = scatter_func_net("sub", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[
|
||||
[
|
||||
[-138.0, -140.0, -142.0, -144.0],
|
||||
[-146.0, -148.0, -150.0, -152.0],
|
||||
[-154.0, -156.0, -158.0, -160.0],
|
||||
],
|
||||
[
|
||||
[-186.0, -188.0, -190.0, -192.0],
|
||||
[-194.0, -196.0, -198.0, -200.0],
|
||||
[-202.0, -204.0, -206.0, -208.0],
|
||||
],
|
||||
]
|
||||
)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_func_disordered_float16():
|
||||
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.float16)))
|
||||
indices = Tensor(np.array([[[0, 1, 2], [2, 1, 0]], [[0, 0, 0], [2, 2, 2]]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.float16))
|
||||
|
||||
# update
|
||||
output = scatter_func_net("update", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[[95.0, 96.0, 97.0, 98.0], [67.0, 68.0, 69.0, 70.0], [99.0, 100.0, 101.0, 102.0]]
|
||||
)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
# add
|
||||
output = scatter_func_net("add", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[[464.0, 468.0, 472.0, 476.0], [187.0, 188.0, 189.0, 190.0], [492.0, 496.0, 500.0, 504.0]]
|
||||
)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
# sub
|
||||
output = scatter_func_net("sub", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[
|
||||
[-374.0, -380.0, -386.0, -392.0],
|
||||
[-105.0, -108.0, -111.0, -114.0],
|
||||
[-418.0, -424.0, -430.0, -436.0],
|
||||
]
|
||||
)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_func_large_int32():
|
||||
inputx = Tensor(np.zeros((2, 3, 4)).astype(np.int32))
|
||||
indices = Tensor(np.array([[0, 0], [1, 1]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.int32))
|
||||
|
||||
# update
|
||||
output = scatter_func_net("update", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[
|
||||
[[63.0, 64.0, 65.0, 66.0], [67.0, 68.0, 69.0, 70.0], [71.0, 72.0, 73.0, 74.0],],
|
||||
[[99.0, 100.0, 101.0, 102.0], [103.0, 104.0, 105.0, 106.0], [95.0, 96.0, 97.0, 98.0],],
|
||||
]
|
||||
).astype(np.int32)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
# add
|
||||
output = scatter_func_net("add", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[
|
||||
[
|
||||
[138.0, 140.0, 142.0, 144.0],
|
||||
[146.0, 148.0, 150.0, 152.0],
|
||||
[154.0, 156.0, 158.0, 160.0],
|
||||
],
|
||||
[
|
||||
[186.0, 188.0, 190.0, 192.0],
|
||||
[194.0, 196.0, 198.0, 200.0],
|
||||
[202.0, 204.0, 206.0, 208.0],
|
||||
],
|
||||
]
|
||||
).astype(np.int32)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
# sub
|
||||
output = scatter_func_net("sub", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[
|
||||
[
|
||||
[-138.0, -140.0, -142.0, -144.0],
|
||||
[-146.0, -148.0, -150.0, -152.0],
|
||||
[-154.0, -156.0, -158.0, -160.0],
|
||||
],
|
||||
[
|
||||
[-186.0, -188.0, -190.0, -192.0],
|
||||
[-194.0, -196.0, -198.0, -200.0],
|
||||
[-202.0, -204.0, -206.0, -208.0],
|
||||
],
|
||||
]
|
||||
).astype(np.int32)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_func_disordered_int32():
|
||||
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int32)))
|
||||
indices = Tensor(np.array([[[0, 1, 2], [2, 1, 0]], [[0, 0, 0], [2, 2, 2]]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.int32))
|
||||
|
||||
# update
|
||||
output = scatter_func_net("update", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[[95.0, 96.0, 97.0, 98.0], [67.0, 68.0, 69.0, 70.0], [99.0, 100.0, 101.0, 102.0]]
|
||||
).astype(np.int32)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
# add
|
||||
output = scatter_func_net("add", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[[464.0, 468.0, 472.0, 476.0], [187.0, 188.0, 189.0, 190.0], [492.0, 496.0, 500.0, 504.0]]
|
||||
).astype(np.int32)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
# sub
|
||||
output = scatter_func_net("sub", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[
|
||||
[-374.0, -380.0, -386.0, -392.0],
|
||||
[-105.0, -108.0, -111.0, -114.0],
|
||||
[-418.0, -424.0, -430.0, -436.0],
|
||||
]
|
||||
).astype(np.int32)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_func_disordered_dynamic_int32():
|
||||
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int32)))
|
||||
indices = Tensor(np.array([[[0, 1, 2], [2, 1, 0]], [[0, 0, 0], [2, 2, 2]]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.int32))
|
||||
|
||||
# update
|
||||
output = scatter_func_d_net("update", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[[95.0, 96.0, 97.0, 98.0], [67.0, 68.0, 69.0, 70.0], [99.0, 100.0, 101.0, 102.0]]
|
||||
).astype(np.int32)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
# add
|
||||
output = scatter_func_d_net("add", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[[464.0, 468.0, 472.0, 476.0], [187.0, 188.0, 189.0, 190.0], [492.0, 496.0, 500.0, 504.0]]
|
||||
).astype(np.int32)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
# sub
|
||||
output = scatter_func_d_net("sub", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[
|
||||
[-374.0, -380.0, -386.0, -392.0],
|
||||
[-105.0, -108.0, -111.0, -114.0],
|
||||
[-418.0, -424.0, -430.0, -436.0],
|
||||
]
|
||||
).astype(np.int32)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_func_disordered_dynamic_int8():
|
||||
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int8)))
|
||||
indices = Tensor(np.array([[[0, 1, 2], [2, 1, 0]], [[0, 0, 0], [2, 2, 2]]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.int8))
|
||||
|
||||
# update
|
||||
output = scatter_func_d_net("update", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[[95.0, 96.0, 97.0, 98.0], [67.0, 68.0, 69.0, 70.0], [99.0, 100.0, 101.0, 102.0]]
|
||||
).astype(np.int8)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
# add
|
||||
output = scatter_func_d_net("add", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[[464.0, 468.0, 472.0, 476.0], [187.0, 188.0, 189.0, 190.0], [492.0, 496.0, 500.0, 504.0]]
|
||||
).astype(np.int8)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
# sub
|
||||
output = scatter_func_d_net("sub", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[
|
||||
[-118.0, -124.0, 126.0, 120.0],
|
||||
[-105.0, -108.0, -111.0, -114.0],
|
||||
[94.0, 88.0, 82.0, 76.0],
|
||||
]
|
||||
).astype(np.int8)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_func_disordered_dynamic_uint8():
|
||||
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.uint8)))
|
||||
indices = Tensor(np.array([[[0, 1, 2], [2, 1, 0]], [[0, 0, 0], [2, 2, 2]]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.uint8))
|
||||
|
||||
# update
|
||||
output = scatter_func_d_net("update", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[[95.0, 96.0, 97.0, 98.0], [67.0, 68.0, 69.0, 70.0], [99.0, 100.0, 101.0, 102.0]]
|
||||
).astype(np.uint8)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
# add
|
||||
output = scatter_func_d_net("add", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[[464.0, 468.0, 472.0, 476.0], [187.0, 188.0, 189.0, 190.0], [492.0, 496.0, 500.0, 504.0]]
|
||||
).astype(np.uint8)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
# sub
|
||||
output = scatter_func_d_net("sub", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[[138.0, 132.0, 126.0, 120.0], [151.0, 148.0, 145.0, 142.0], [94.0, 88.0, 82.0, 76.0]]
|
||||
).astype(np.uint8)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_func_input_less_than_1_dynamic_float32():
|
||||
inputx = Tensor(
|
||||
np.array(
|
||||
[
|
||||
[0.214141, 0.415151, 0.51516],
|
||||
[0.876542, 0.451611, 0.55112],
|
||||
[0.111244, 0.633333, 0.34444],
|
||||
]
|
||||
).astype(np.float32)
|
||||
)
|
||||
indices = Tensor(np.array([[[1, 0, 2], [2, 2, 0]], [[1, 0, 1], [2, 1, 2]]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(34, 70).reshape((2, 2, 3, 3)).astype(np.float32))
|
||||
|
||||
# update
|
||||
output = scatter_func_d_net("update", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[[37.0, 38.0, 39.0], [34.0, 35.0, 66.0], [67.0, 68.0, 69.0],], dtype=np.float32,
|
||||
)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
# add
|
||||
output = scatter_func_d_net("add", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[
|
||||
[141.21414, 144.41515, 147.51517],
|
||||
[208.87654, 212.45161, 216.55112],
|
||||
[257.11124, 262.63333, 267.34442],
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
# sub
|
||||
output = scatter_func_d_net("sub", inputx, indices, updates)
|
||||
expected = np.array(
|
||||
[
|
||||
[-140.78586, -143.58485, -146.48483],
|
||||
[-207.12346, -211.54839, -215.44888],
|
||||
[-256.88876, -261.36667, -266.65558],
|
||||
],
|
||||
dtype=np.float32,
|
||||
)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_func_dynamic_two_inputs():
|
||||
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
|
||||
indices_1 = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
|
||||
updates_1 = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
|
||||
indices_2 = Tensor(np.array([[0, 0], [1, 1], [1, 0]]).astype(np.int32))
|
||||
updates_2 = Tensor(np.flip(np.arange(18).reshape((3, 2, 3)).astype(np.float32)))
|
||||
|
||||
# update
|
||||
output_1, output_2 = scatter_func_d2_net(
|
||||
"update", inputx, indices_1, updates_1, indices_2, updates_2
|
||||
)
|
||||
expected_1 = np.array([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]])
|
||||
expected_2 = np.array([[17.0, 16.0, 15.0], [11.0, 10.0, 9.0]])
|
||||
np.testing.assert_array_almost_equal(output_1.asnumpy(), expected_1)
|
||||
np.testing.assert_array_almost_equal(output_2.asnumpy(), expected_2)
|
||||
|
||||
# add
|
||||
output_1, output_2 = scatter_func_d2_net(
|
||||
"add", inputx, indices_1, updates_1, indices_2, updates_2
|
||||
)
|
||||
expected_1 = np.array([[6.0, 8.0, 10.0], [12.0, 14.0, 16.0]])
|
||||
expected_2 = np.array([[39.0, 38.0, 37.0], [36.0, 35.0, 34.0]])
|
||||
np.testing.assert_array_almost_equal(output_1.asnumpy(), expected_1)
|
||||
np.testing.assert_array_almost_equal(output_2.asnumpy(), expected_2)
|
||||
|
||||
# sub
|
||||
output_1, output_2 = scatter_func_d2_net(
|
||||
"sub", inputx, indices_1, updates_1, indices_2, updates_2
|
||||
)
|
||||
expected_1 = np.array([[-6.0, -8.0, -10.0], [-12.0, -14.0, -16.0]])
|
||||
expected_2 = np.array([[-39.0, -38.0, -37.0], [-36.0, -35.0, -34.0]])
|
||||
np.testing.assert_array_almost_equal(output_1.asnumpy(), expected_1)
|
||||
np.testing.assert_array_almost_equal(output_2.asnumpy(), expected_2)
|
|
@ -1,359 +0,0 @@
|
|||
# Copyright 2020 Huawei Technologies Co., Ltd
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
import mindspore.context as context
|
||||
import mindspore.nn as nn
|
||||
from mindspore import Tensor, Parameter
|
||||
from mindspore.ops import operations as P
|
||||
from mindspore.ops.operations import _inner_ops as inner
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
|
||||
# all cases tested against dchip
|
||||
|
||||
class TestScatterUpdateNet(nn.Cell):
|
||||
def __init__(self, inputx, indices, updates):
|
||||
super(TestScatterUpdateNet, self).__init__()
|
||||
self.scatter_update = P.ScatterUpdate()
|
||||
self.inputx = Parameter(inputx, name="inputx")
|
||||
self.indices = Parameter(indices, name="indices")
|
||||
self.updates = Parameter(updates, name="updates")
|
||||
|
||||
def construct(self):
|
||||
out = self.scatter_update(self.inputx, self.indices, self.updates)
|
||||
return out
|
||||
|
||||
def scatter_update_net(inputx, indices, updates):
|
||||
net = TestScatterUpdateNet(inputx, indices, updates)
|
||||
return net()
|
||||
|
||||
class TestScatterUpdateDynamicNet(nn.Cell):
|
||||
def __init__(self, inputx, indices, updates):
|
||||
super(TestScatterUpdateDynamicNet, self).__init__()
|
||||
self.scatter_update = P.ScatterUpdate()
|
||||
self.test_dynamic = inner.GpuConvertToDynamicShape()
|
||||
self.inputx = Parameter(inputx, name="inputx")
|
||||
self.indices = Parameter(indices, name="indices")
|
||||
self.updates = Parameter(updates, name="updates")
|
||||
|
||||
def construct(self):
|
||||
indices = self.test_dynamic(self.indices)
|
||||
updates = self.test_dynamic(self.updates)
|
||||
out = self.scatter_update(self.inputx, indices, updates)
|
||||
return out
|
||||
|
||||
def scatter_update_d_net(inputx, indices, updates):
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
|
||||
net = TestScatterUpdateDynamicNet(inputx, indices, updates)
|
||||
return net()
|
||||
|
||||
class TestScatterUpdateDynamicNet2(nn.Cell):
|
||||
def __init__(self, inputx):
|
||||
super(TestScatterUpdateDynamicNet2, self).__init__()
|
||||
self.scatter_update = P.ScatterUpdate()
|
||||
self.test_dynamic = inner.GpuConvertToDynamicShape()
|
||||
self.inputx = Parameter(inputx, name="inputx")
|
||||
|
||||
def construct(self, indices, updates):
|
||||
indices = self.test_dynamic(indices)
|
||||
updates = self.test_dynamic(updates)
|
||||
out = self.scatter_update(self.inputx, indices, updates)
|
||||
return out
|
||||
|
||||
def scatter_update_d2_net(inputx, indices_1, updates_1,
|
||||
indices_2, updates_2):
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
|
||||
net = TestScatterUpdateDynamicNet2(inputx)
|
||||
out1 = net(indices_1, updates_1)
|
||||
out2 = net(indices_2, updates_2)
|
||||
return (out1, out2)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_update_small_float32():
|
||||
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
|
||||
indices = Tensor(np.array([0, 1]).astype(np.int32))
|
||||
updates = Tensor(np.arange(6).reshape((2, 3)).astype(np.float32))
|
||||
output = scatter_update_net(inputx, indices, updates)
|
||||
expected = np.array([[0., 1., 2.],
|
||||
[3., 4., 5.]])
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_update_input_updated():
|
||||
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
|
||||
indices = Tensor(np.array([0, 1]).astype(np.int32))
|
||||
updates = Tensor(np.arange(6).reshape((2, 3)).astype(np.float32))
|
||||
net = TestScatterUpdateNet(inputx, indices, updates)
|
||||
net()
|
||||
expected = np.array([[0., 1., 2.],
|
||||
[3., 4., 5.]])
|
||||
np.testing.assert_array_almost_equal(net.inputx.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_update_input_less_than_1_float32():
|
||||
inputx = Tensor(np.array([[0.214141, 0.415151, 0.51516],
|
||||
[0.876542, 0.451611, 0.55112],
|
||||
[0.111244, 0.633333, 0.34444]]).astype(np.float32))
|
||||
indices = Tensor(np.array([1, 0, 2]).astype(np.int32))
|
||||
updates = Tensor(np.arange(34, 43).reshape((3, 3)).astype(np.float32))
|
||||
output = scatter_update_net(inputx, indices, updates)
|
||||
expected = np.array([[37., 38., 39.],
|
||||
[34., 35., 36.],
|
||||
[40., 41., 42.]], dtype=np.float32)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_update_float16():
|
||||
inputx = Tensor(np.zeros((2, 3)).astype(np.float16))
|
||||
indices = Tensor(np.array([0, 1]).astype(np.int32))
|
||||
updates = Tensor(np.arange(6).reshape((2, 3)).astype(np.float16))
|
||||
output = scatter_update_net(inputx, indices, updates)
|
||||
expected = np.array([[0., 1., 2.],
|
||||
[3., 4., 5.]]).astype(np.float16)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_update_int32():
|
||||
inputx = Tensor(np.zeros((2, 3)).astype(np.int32))
|
||||
indices = Tensor(np.array([0, 1]).astype(np.int32))
|
||||
updates = Tensor(np.arange(6).reshape((2, 3)).astype(np.int32))
|
||||
output = scatter_update_net(inputx, indices, updates)
|
||||
expected = np.array([[0., 1., 2.],
|
||||
[3., 4., 5.]]).astype(np.int32)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_update_large_float16():
|
||||
inputx = Tensor(np.zeros((4, 3)).astype(np.float16))
|
||||
indices = Tensor(np.array([[2, 1], [0, 3]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(63, 75).reshape((2, 2, 3)).astype(np.float16))
|
||||
output = scatter_update_net(inputx, indices, updates)
|
||||
expected = np.array([[69., 70., 71.],
|
||||
[66., 67., 68.],
|
||||
[63., 64., 65.],
|
||||
[72., 73., 74.]]).astype(np.float16)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_update_disordered_float16():
|
||||
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.float16)))
|
||||
indices = Tensor(np.array([1, 2]).astype(np.int32))
|
||||
updates = Tensor(np.arange(63, 71).reshape((2, 4)).astype(np.float16))
|
||||
output = scatter_update_net(inputx, indices, updates)
|
||||
expected = np.array([[45., 44., 43., 42.],
|
||||
[63., 64., 65., 66.],
|
||||
[67., 68., 69., 70.]]).astype(np.float16)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_update_disordered_int32():
|
||||
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int32)))
|
||||
indices = Tensor(np.array([1, 2]).astype(np.int32))
|
||||
updates = Tensor(np.arange(63, 71).reshape((2, 4)).astype(np.int32))
|
||||
output = scatter_update_net(inputx, indices, updates)
|
||||
expected = np.array([[45., 44., 43., 42.],
|
||||
[63., 64., 65., 66.],
|
||||
[67., 68., 69., 70.]]).astype(np.int32)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_update_large_shape_float16():
|
||||
inputx = Tensor(np.arange(96).reshape((4, 2, 3, 4)).astype(np.float16))
|
||||
indices = Tensor(np.array([1, 0]).astype(np.int32))
|
||||
updates = Tensor(np.flip(np.arange(48).reshape((2, 2, 3, 4)).astype(np.float16)))
|
||||
output = scatter_update_net(inputx, indices, updates)
|
||||
expected = np.array([[[[23., 22., 21., 20.],
|
||||
[19., 18., 17., 16.],
|
||||
[15., 14., 13., 12.]],
|
||||
[[11., 10., 9., 8.],
|
||||
[7., 6., 5., 4.],
|
||||
[3., 2., 1., 0.]]],
|
||||
[[[47., 46., 45., 44.],
|
||||
[43., 42., 41., 40.],
|
||||
[39., 38., 37., 36.]],
|
||||
[[35., 34., 33., 32.],
|
||||
[31., 30., 29., 28.],
|
||||
[27., 26., 25., 24.]]],
|
||||
[[[48., 49., 50., 51.],
|
||||
[52., 53., 54., 55.],
|
||||
[56., 57., 58., 59.]],
|
||||
[[60., 61., 62., 63.],
|
||||
[64., 65., 66., 67.],
|
||||
[68., 69., 70., 71.]]],
|
||||
[[[72., 73., 74., 75.],
|
||||
[76., 77., 78., 79.],
|
||||
[80., 81., 82., 83.]],
|
||||
[[84., 85., 86., 87.],
|
||||
[88., 89., 90., 91.],
|
||||
[92., 93., 94., 95.]]]]).astype(np.float16)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_update_disordered_int8():
|
||||
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int8)))
|
||||
indices = Tensor(np.array([1, 2]).astype(np.int32))
|
||||
updates = Tensor(np.arange(63, 71).reshape((2, 4)).astype(np.int8))
|
||||
output = scatter_update_net(inputx, indices, updates)
|
||||
expected = np.array([[45., 44., 43., 42.],
|
||||
[63., 64., 65., 66.],
|
||||
[67., 68., 69., 70.]]).astype(np.int8)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_update_large_shape_int8():
|
||||
inputx = Tensor(np.arange(96).reshape((4, 2, 3, 4)).astype(np.int8))
|
||||
indices = Tensor(np.array([1, 0]).astype(np.int32))
|
||||
updates = Tensor(np.flip(np.arange(48).reshape((2, 2, 3, 4)).astype(np.int8)))
|
||||
output = scatter_update_net(inputx, indices, updates)
|
||||
expected = np.array([[[[23., 22., 21., 20.],
|
||||
[19., 18., 17., 16.],
|
||||
[15., 14., 13., 12.]],
|
||||
[[11., 10., 9., 8.],
|
||||
[7., 6., 5., 4.],
|
||||
[3., 2., 1., 0.]]],
|
||||
[[[47., 46., 45., 44.],
|
||||
[43., 42., 41., 40.],
|
||||
[39., 38., 37., 36.]],
|
||||
[[35., 34., 33., 32.],
|
||||
[31., 30., 29., 28.],
|
||||
[27., 26., 25., 24.]]],
|
||||
[[[48., 49., 50., 51.],
|
||||
[52., 53., 54., 55.],
|
||||
[56., 57., 58., 59.]],
|
||||
[[60., 61., 62., 63.],
|
||||
[64., 65., 66., 67.],
|
||||
[68., 69., 70., 71.]]],
|
||||
[[[72., 73., 74., 75.],
|
||||
[76., 77., 78., 79.],
|
||||
[80., 81., 82., 83.]],
|
||||
[[84., 85., 86., 87.],
|
||||
[88., 89., 90., 91.],
|
||||
[92., 93., 94., 95.]]]]).astype(np.int8)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_update_large_uint8():
|
||||
inputx = Tensor(np.zeros((4, 3)).astype(np.uint8))
|
||||
indices = Tensor(np.array([[2, 1], [0, 3]]).astype(np.int32))
|
||||
updates = Tensor(np.arange(63, 75).reshape((2, 2, 3)).astype(np.uint8))
|
||||
output = scatter_update_net(inputx, indices, updates)
|
||||
expected = np.array([[69., 70., 71.],
|
||||
[66., 67., 68.],
|
||||
[63., 64., 65.],
|
||||
[72., 73., 74.]]).astype(np.uint8)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_update_disordered_uint8():
|
||||
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.uint8)))
|
||||
indices = Tensor(np.array([1, 2]).astype(np.int32))
|
||||
updates = Tensor(np.arange(63, 71).reshape((2, 4)).astype(np.uint8))
|
||||
output = scatter_update_net(inputx, indices, updates)
|
||||
expected = np.array([[45., 44., 43., 42.],
|
||||
[63., 64., 65., 66.],
|
||||
[67., 68., 69., 70.]]).astype(np.uint8)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_update_large_shape_dynamic_int8():
|
||||
inputx = Tensor(np.arange(96).reshape((4, 2, 3, 4)).astype(np.int8))
|
||||
indices = Tensor(np.array([1, 0]).astype(np.int32))
|
||||
updates = Tensor(np.flip(np.arange(48).reshape((2, 2, 3, 4)).astype(np.int8)))
|
||||
output = scatter_update_d_net(inputx, indices, updates)
|
||||
expected = np.array([[[[23., 22., 21., 20.],
|
||||
[19., 18., 17., 16.],
|
||||
[15., 14., 13., 12.]],
|
||||
[[11., 10., 9., 8.],
|
||||
[7., 6., 5., 4.],
|
||||
[3., 2., 1., 0.]]],
|
||||
[[[47., 46., 45., 44.],
|
||||
[43., 42., 41., 40.],
|
||||
[39., 38., 37., 36.]],
|
||||
[[35., 34., 33., 32.],
|
||||
[31., 30., 29., 28.],
|
||||
[27., 26., 25., 24.]]],
|
||||
[[[48., 49., 50., 51.],
|
||||
[52., 53., 54., 55.],
|
||||
[56., 57., 58., 59.]],
|
||||
[[60., 61., 62., 63.],
|
||||
[64., 65., 66., 67.],
|
||||
[68., 69., 70., 71.]]],
|
||||
[[[72., 73., 74., 75.],
|
||||
[76., 77., 78., 79.],
|
||||
[80., 81., 82., 83.]],
|
||||
[[84., 85., 86., 87.],
|
||||
[88., 89., 90., 91.],
|
||||
[92., 93., 94., 95.]]]]).astype(np.int8)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_update_disordered_dynamic_int32():
|
||||
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int32)))
|
||||
indices = Tensor(np.array([1, 2]).astype(np.int32))
|
||||
updates = Tensor(np.arange(63, 71).reshape((2, 4)).astype(np.int32))
|
||||
output = scatter_update_d_net(inputx, indices, updates)
|
||||
expected = np.array([[45., 44., 43., 42.],
|
||||
[63., 64., 65., 66.],
|
||||
[67., 68., 69., 70.]]).astype(np.int32)
|
||||
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_scatter_update_two_inputs():
|
||||
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
|
||||
indices_1 = Tensor(np.array([0, 1]).astype(np.int32))
|
||||
updates_1 = Tensor(np.arange(6).reshape((2, 3)).astype(np.float32))
|
||||
indices_2 = Tensor(np.array([1]).astype(np.int32))
|
||||
updates_2 = Tensor(np.arange(34, 37).reshape((1, 3)).astype(np.float32))
|
||||
output_1, output_2 = scatter_update_d2_net(inputx, indices_1, updates_1,
|
||||
indices_2, updates_2)
|
||||
expected_1 = np.array([[0., 1., 2.],
|
||||
[3., 4., 5.]], dtype=np.float32)
|
||||
expected_2 = np.array([[0., 1., 2.],
|
||||
[34., 35., 36.]], dtype=np.float32)
|
||||
np.testing.assert_array_almost_equal(output_1.asnumpy(), expected_1)
|
||||
np.testing.assert_array_almost_equal(output_2.asnumpy(), expected_2)
|
Loading…
Reference in New Issue