Add scattersub op for gpu

This commit is contained in:
xcnick 2021-06-06 22:51:13 +08:00
parent 00dee4e3bd
commit 201f85f636
18 changed files with 1134 additions and 1065 deletions

View File

@ -1,57 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/gpu/arrays/scatter_add_gpu_kernel.h"
namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_ONE(ScatterAdd,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
ScatterAddKernel, float)
MS_REG_GPU_KERNEL_ONE(ScatterAdd,
KernelAttr()
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat16)
.AddOutputAttr(kNumberTypeFloat16),
ScatterAddKernel, half)
MS_REG_GPU_KERNEL_ONE(ScatterAdd,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeInt32),
ScatterAddKernel, int)
MS_REG_GPU_KERNEL_ONE(ScatterAdd,
KernelAttr()
.AddInputAttr(kNumberTypeInt8)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt8)
.AddOutputAttr(kNumberTypeInt8),
ScatterAddKernel, int8_t)
MS_REG_GPU_KERNEL_ONE(ScatterAdd,
KernelAttr()
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeUInt8)
.AddOutputAttr(kNumberTypeUInt8),
ScatterAddKernel, uint8_t)
} // namespace kernel
} // namespace mindspore

View File

@ -0,0 +1,237 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/gpu/arrays/scatter_functor_gpu_kernel.h"
namespace mindspore {
namespace kernel {
// ScatterUpdate
MS_REG_GPU_KERNEL_TWO(ScatterUpdate,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
ScatterFunctorKernel, float, int)
MS_REG_GPU_KERNEL_TWO(ScatterUpdate,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
ScatterFunctorKernel, float, int64_t)
MS_REG_GPU_KERNEL_TWO(ScatterUpdate,
KernelAttr()
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat16)
.AddOutputAttr(kNumberTypeFloat16),
ScatterFunctorKernel, half, int)
MS_REG_GPU_KERNEL_TWO(ScatterUpdate,
KernelAttr()
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeFloat16)
.AddOutputAttr(kNumberTypeFloat16),
ScatterFunctorKernel, half, int64_t)
MS_REG_GPU_KERNEL_TWO(ScatterUpdate,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeInt32),
ScatterFunctorKernel, int, int)
MS_REG_GPU_KERNEL_TWO(ScatterUpdate,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeInt32),
ScatterFunctorKernel, int, int64_t)
MS_REG_GPU_KERNEL_TWO(ScatterUpdate,
KernelAttr()
.AddInputAttr(kNumberTypeInt8)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt8)
.AddOutputAttr(kNumberTypeInt8),
ScatterFunctorKernel, int8_t, int)
MS_REG_GPU_KERNEL_TWO(ScatterUpdate,
KernelAttr()
.AddInputAttr(kNumberTypeInt8)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt8)
.AddOutputAttr(kNumberTypeInt8),
ScatterFunctorKernel, int8_t, int64_t)
MS_REG_GPU_KERNEL_TWO(ScatterUpdate,
KernelAttr()
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeUInt8)
.AddOutputAttr(kNumberTypeUInt8),
ScatterFunctorKernel, uint8_t, int)
MS_REG_GPU_KERNEL_TWO(ScatterUpdate,
KernelAttr()
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeUInt8)
.AddOutputAttr(kNumberTypeUInt8),
ScatterFunctorKernel, uint8_t, int64_t)
// ScatterAdd
MS_REG_GPU_KERNEL_TWO(ScatterAdd,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
ScatterFunctorKernel, float, int)
MS_REG_GPU_KERNEL_TWO(ScatterAdd,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
ScatterFunctorKernel, float, int64_t)
MS_REG_GPU_KERNEL_TWO(ScatterAdd,
KernelAttr()
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat16)
.AddOutputAttr(kNumberTypeFloat16),
ScatterFunctorKernel, half, int)
MS_REG_GPU_KERNEL_TWO(ScatterAdd,
KernelAttr()
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeFloat16)
.AddOutputAttr(kNumberTypeFloat16),
ScatterFunctorKernel, half, int64_t)
MS_REG_GPU_KERNEL_TWO(ScatterAdd,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeInt32),
ScatterFunctorKernel, int, int)
MS_REG_GPU_KERNEL_TWO(ScatterAdd,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeInt32),
ScatterFunctorKernel, int, int64_t)
MS_REG_GPU_KERNEL_TWO(ScatterAdd,
KernelAttr()
.AddInputAttr(kNumberTypeInt8)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt8)
.AddOutputAttr(kNumberTypeInt8),
ScatterFunctorKernel, int8_t, int)
MS_REG_GPU_KERNEL_TWO(ScatterAdd,
KernelAttr()
.AddInputAttr(kNumberTypeInt8)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt8)
.AddOutputAttr(kNumberTypeInt8),
ScatterFunctorKernel, int8_t, int64_t)
MS_REG_GPU_KERNEL_TWO(ScatterAdd,
KernelAttr()
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeUInt8)
.AddOutputAttr(kNumberTypeUInt8),
ScatterFunctorKernel, uint8_t, int)
MS_REG_GPU_KERNEL_TWO(ScatterAdd,
KernelAttr()
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeUInt8)
.AddOutputAttr(kNumberTypeUInt8),
ScatterFunctorKernel, uint8_t, int64_t)
// ScatterSub
MS_REG_GPU_KERNEL_TWO(ScatterSub,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
ScatterFunctorKernel, float, int)
MS_REG_GPU_KERNEL_TWO(ScatterSub,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
ScatterFunctorKernel, float, int64_t)
MS_REG_GPU_KERNEL_TWO(ScatterSub,
KernelAttr()
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat16)
.AddOutputAttr(kNumberTypeFloat16),
ScatterFunctorKernel, half, int)
MS_REG_GPU_KERNEL_TWO(ScatterSub,
KernelAttr()
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeFloat16)
.AddOutputAttr(kNumberTypeFloat16),
ScatterFunctorKernel, half, int64_t)
MS_REG_GPU_KERNEL_TWO(ScatterSub,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeInt32),
ScatterFunctorKernel, int, int)
MS_REG_GPU_KERNEL_TWO(ScatterSub,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeInt32),
ScatterFunctorKernel, int, int64_t)
MS_REG_GPU_KERNEL_TWO(ScatterSub,
KernelAttr()
.AddInputAttr(kNumberTypeInt8)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt8)
.AddOutputAttr(kNumberTypeInt8),
ScatterFunctorKernel, int8_t, int)
MS_REG_GPU_KERNEL_TWO(ScatterSub,
KernelAttr()
.AddInputAttr(kNumberTypeInt8)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeInt8)
.AddOutputAttr(kNumberTypeInt8),
ScatterFunctorKernel, int8_t, int64_t)
MS_REG_GPU_KERNEL_TWO(ScatterSub,
KernelAttr()
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeUInt8)
.AddOutputAttr(kNumberTypeUInt8),
ScatterFunctorKernel, uint8_t, int)
MS_REG_GPU_KERNEL_TWO(ScatterSub,
KernelAttr()
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeInt64)
.AddInputAttr(kNumberTypeUInt8)
.AddOutputAttr(kNumberTypeUInt8),
ScatterFunctorKernel, uint8_t, int64_t)
} // namespace kernel
} // namespace mindspore

View File

@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -14,21 +14,30 @@
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_ADD_GPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_ADD_GPU_KERNEL_H_
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_FUNCTOR_GPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_FUNCTOR_GPU_KERNEL_H_
#include <vector>
#include <string>
#include <map>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/cuda_impl/scatter_add_impl.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/scatter_functor_impl.cuh"
namespace mindspore {
namespace kernel {
template <typename T>
class ScatterAddKernel : public GpuKernel {
static const std::map<std::string, ScatterFunctorType> kScatterFunctorTypeMap = {
{"ScatterUpdate", SCATTER_FUNC_UPDATE},
{"ScatterAdd", SCATTER_FUNC_ADD},
{"ScatterSub", SCATTER_FUNC_SUB},
};
template <typename T, typename S>
class ScatterFunctorKernel : public GpuKernel {
public:
ScatterAddKernel() { ResetResource(); }
~ScatterAddKernel() override = default;
ScatterFunctorKernel() { ResetResource(); }
~ScatterFunctorKernel() override = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
@ -37,10 +46,12 @@ class ScatterAddKernel : public GpuKernel {
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
T *input = GetDeviceAddress<T>(inputs, 0);
int *indices = GetDeviceAddress<int>(inputs, 1);
S *indices = GetDeviceAddress<S>(inputs, 1);
T *updates = GetDeviceAddress<T>(inputs, 2);
T *output = GetDeviceAddress<T>(outputs, 0);
CalScatterAdd(inner_size_, indices_size_, indices, updates, input, reinterpret_cast<cudaStream_t>(stream_ptr));
ScatterFunc(scatter_functor_type_, inner_size_, indices_size_, indices, updates, input,
reinterpret_cast<cudaStream_t>(stream_ptr));
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
cudaMemcpyAsync(&output[0], &input[0], input_size_ * sizeof(T), cudaMemcpyDeviceToDevice,
reinterpret_cast<cudaStream_t>(stream_ptr)),
@ -49,15 +60,22 @@ class ScatterAddKernel : public GpuKernel {
}
bool Init(const CNodePtr &kernel_node) override {
std::string kernel_name = AnfAlgo::GetCNodeName(kernel_node);
auto iter = kScatterFunctorTypeMap.find(kernel_name);
if (iter == kScatterFunctorTypeMap.end()) {
MS_LOG(EXCEPTION) << "Scatter functor " << kernel_name << " is not supported.";
} else {
scatter_functor_type_ = iter->second;
}
kernel_node_ = kernel_node;
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 3) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but ScatterAdd needs 3 inputs.";
MS_LOG(ERROR) << "Input number is " << input_num << ", but " << kernel_name << " needs 3 inputs.";
return false;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but ScatterAdd has 1 output.";
MS_LOG(ERROR) << "Output number is " << output_num << ", but " << kernel_name << " has 1 output.";
return false;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
@ -90,12 +108,13 @@ class ScatterAddKernel : public GpuKernel {
protected:
void InitSizeLists() override {
input_size_list_.push_back(input_size_ * sizeof(T));
input_size_list_.push_back(indices_size_ * sizeof(int));
input_size_list_.push_back(indices_size_ * sizeof(S));
input_size_list_.push_back(updates_size_ * sizeof(T));
output_size_list_.push_back(input_size_ * sizeof(T));
}
private:
ScatterFunctorType scatter_functor_type_;
size_t input_size_;
size_t inner_size_;
size_t indices_size_;
@ -106,4 +125,4 @@ class ScatterAddKernel : public GpuKernel {
};
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_ADD_GPU_KERNEL_H_
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_FUNCTOR_GPU_KERNEL_H_

View File

@ -1,57 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/gpu/arrays/scatter_update_gpu_kernel.h"
namespace mindspore {
namespace kernel {
MS_REG_GPU_KERNEL_ONE(ScatterUpdate,
KernelAttr()
.AddInputAttr(kNumberTypeFloat32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat32)
.AddOutputAttr(kNumberTypeFloat32),
ScatterUpdateKernel, float)
MS_REG_GPU_KERNEL_ONE(ScatterUpdate,
KernelAttr()
.AddInputAttr(kNumberTypeFloat16)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeFloat16)
.AddOutputAttr(kNumberTypeFloat16),
ScatterUpdateKernel, half)
MS_REG_GPU_KERNEL_ONE(ScatterUpdate,
KernelAttr()
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt32)
.AddOutputAttr(kNumberTypeInt32),
ScatterUpdateKernel, int)
MS_REG_GPU_KERNEL_ONE(ScatterUpdate,
KernelAttr()
.AddInputAttr(kNumberTypeInt8)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeInt8)
.AddOutputAttr(kNumberTypeInt8),
ScatterUpdateKernel, int8_t)
MS_REG_GPU_KERNEL_ONE(ScatterUpdate,
KernelAttr()
.AddInputAttr(kNumberTypeUInt8)
.AddInputAttr(kNumberTypeInt32)
.AddInputAttr(kNumberTypeUInt8)
.AddOutputAttr(kNumberTypeUInt8),
ScatterUpdateKernel, uint8_t)
} // namespace kernel
} // namespace mindspore

View File

@ -1,109 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_UPDATE_GPU_KERNEL_H_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_UPDATE_GPU_KERNEL_H_
#include <vector>
#include "backend/kernel_compiler/gpu/gpu_kernel.h"
#include "backend/kernel_compiler/gpu/gpu_kernel_factory.h"
#include "backend/kernel_compiler/gpu/cuda_impl/scatter_update_impl.cuh"
namespace mindspore {
namespace kernel {
template <typename T>
class ScatterUpdateKernel : public GpuKernel {
public:
ScatterUpdateKernel() { ResetResource(); }
~ScatterUpdateKernel() override = default;
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
const std::vector<size_t> &GetOutputSizeList() const override { return output_size_list_; }
const std::vector<size_t> &GetWorkspaceSizeList() const override { return workspace_size_list_; }
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
T *input = GetDeviceAddress<T>(inputs, 0);
int *indices = GetDeviceAddress<int>(inputs, 1);
T *updates = GetDeviceAddress<T>(inputs, 2);
T *output = GetDeviceAddress<T>(outputs, 0);
CalScatterUpdate(inner_size_, indices_size_, indices, updates, input, reinterpret_cast<cudaStream_t>(stream_ptr));
CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
cudaMemcpyAsync(&output[0], &input[0], input_size_ * sizeof(T), cudaMemcpyDeviceToDevice,
reinterpret_cast<cudaStream_t>(stream_ptr)),
"cudaMemcpyAsync output failed");
return true;
}
bool Init(const CNodePtr &kernel_node) override {
kernel_node_ = kernel_node;
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
if (input_num != 3) {
MS_LOG(ERROR) << "Input number is " << input_num << ", but ScatterUpdate needs 3 inputs.";
return false;
}
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
if (output_num != 1) {
MS_LOG(ERROR) << "Output number is " << output_num << ", but ScatterUpdate has 1 output.";
return false;
}
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
input_size_ = 1;
inner_size_ = 1;
for (size_t i = 1; i < input_shape.size(); i++) {
inner_size_ *= input_shape[i];
}
input_size_ = input_shape[0] * inner_size_;
auto indices_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
indices_size_ = 1;
for (size_t i = 0; i < indices_shape.size(); i++) {
indices_size_ *= indices_shape[i];
}
updates_size_ = indices_size_ * inner_size_;
InitSizeLists();
return true;
}
void ResetResource() noexcept override {
input_size_ = 0;
inner_size_ = 0;
indices_size_ = 0;
updates_size_ = 0;
input_size_list_.clear();
output_size_list_.clear();
workspace_size_list_.clear();
}
protected:
void InitSizeLists() override {
input_size_list_.push_back(input_size_ * sizeof(T));
input_size_list_.push_back(indices_size_ * sizeof(int));
input_size_list_.push_back(updates_size_ * sizeof(T));
output_size_list_.push_back(input_size_ * sizeof(T));
}
private:
size_t input_size_;
size_t inner_size_;
size_t indices_size_;
size_t updates_size_;
std::vector<size_t> input_size_list_;
std::vector<size_t> output_size_list_;
std::vector<size_t> workspace_size_list_;
};
} // namespace kernel
} // namespace mindspore
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_SCATTER_UPDATE_GPU_KERNEL_H_

View File

@ -1,49 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/gpu/cuda_impl/util.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/scatter_add_impl.cuh"
template <typename T>
__global__ void ScatterAdd(const size_t inner_size, const size_t updates_size, const int *indices, const T *updates,
T *input) {
for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < updates_size; pos += blockDim.x * gridDim.x) {
const size_t index = pos / inner_size;
const size_t offset = pos % inner_size;
const size_t current_pos = indices[index] * inner_size + offset;
MsAtomicAdd(&input[current_pos], updates[pos]);
}
}
template <typename T>
void CalScatterAdd(const size_t &inner_size, const size_t &indices_size, const int *indices, const T *updates, T *input,
cudaStream_t cuda_stream) {
const size_t updates_size = inner_size * indices_size;
ScatterAdd<<<GET_BLOCKS(updates_size), GET_THREADS, 0, cuda_stream>>>(inner_size, updates_size, indices, updates,
input);
}
template void CalScatterAdd<float>(const size_t &inner_size, const size_t &indices_size, const int *indices,
const float *updates, float *input, cudaStream_t cuda_stream);
template void CalScatterAdd<half>(const size_t &inner_size, const size_t &indices_size, const int *indices,
const half *updates, half *input, cudaStream_t cuda_stream);
template void CalScatterAdd<int>(const size_t &inner_size, const size_t &indices_size, const int *indices,
const int *updates, int *input, cudaStream_t cuda_stream);
template void CalScatterAdd<unsigned char>(const size_t &inner_size, const size_t &indices_size, const int *indices,
const unsigned char *updates, unsigned char *input,
cudaStream_t cuda_stream);
template void CalScatterAdd<int8_t>(const size_t &inner_size, const size_t &indices_size, const int *indices,
const int8_t *updates, int8_t *input, cudaStream_t cuda_stream);

View File

@ -0,0 +1,103 @@
/**
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/gpu/cuda_impl/util.cuh"
#include "backend/kernel_compiler/gpu/cuda_impl/scatter_functor_impl.cuh"
template <typename T, typename S>
__global__ void ScatterUpdateKernel(const size_t inner_size, const size_t updates_size, const S *indices,
const T *updates, T *input) {
for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < updates_size; pos += blockDim.x * gridDim.x) {
const size_t index = pos / inner_size;
const size_t offset = pos % inner_size;
const size_t current_pos = indices[index] * inner_size + offset;
input[current_pos] = updates[pos];
}
}
template <typename T, typename S>
__global__ void ScatterAddKernel(const size_t inner_size, const size_t updates_size, const S *indices, const T *updates,
T *input) {
for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < updates_size; pos += blockDim.x * gridDim.x) {
const size_t index = pos / inner_size;
const size_t offset = pos % inner_size;
const size_t current_pos = indices[index] * inner_size + offset;
MsAtomicAdd(&input[current_pos], updates[pos]);
}
}
template <typename T, typename S>
__global__ void ScatterSubKernel(const size_t inner_size, const size_t updates_size, const S *indices, const T *updates,
T *input) {
for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < updates_size; pos += blockDim.x * gridDim.x) {
const size_t index = pos / inner_size;
const size_t offset = pos % inner_size;
const size_t current_pos = indices[index] * inner_size + offset;
MsAtomicAdd(&input[current_pos], -updates[pos]);
}
}
template <typename T, typename S>
void ScatterFunc(enum ScatterFunctorType func_type, const size_t &inner_size, const size_t &indices_size,
const S *indices, const T *updates, T *input, cudaStream_t cuda_stream) {
const size_t updates_size = inner_size * indices_size;
switch (func_type) {
case SCATTER_FUNC_UPDATE:
return ScatterUpdateKernel<<<GET_BLOCKS(updates_size), GET_THREADS, 0, cuda_stream>>>(inner_size, updates_size,
indices, updates, input);
case SCATTER_FUNC_ADD:
return ScatterAddKernel<<<GET_BLOCKS(updates_size), GET_THREADS, 0, cuda_stream>>>(inner_size, updates_size,
indices, updates, input);
case SCATTER_FUNC_SUB:
return ScatterSubKernel<<<GET_BLOCKS(updates_size), GET_THREADS, 0, cuda_stream>>>(inner_size, updates_size,
indices, updates, input);
default:
break;
}
}
template void ScatterFunc<float, int>(enum ScatterFunctorType func_type, const size_t &inner_size,
const size_t &indices_size, const int *indices, const float *updates,
float *input, cudaStream_t cuda_stream);
template void ScatterFunc<float, int64_t>(enum ScatterFunctorType func_type, const size_t &inner_size,
const size_t &indices_size, const int64_t *indices, const float *updates,
float *input, cudaStream_t cuda_stream);
template void ScatterFunc<half, int>(enum ScatterFunctorType func_type, const size_t &inner_size,
const size_t &indices_size, const int *indices, const half *updates, half *input,
cudaStream_t cuda_stream);
template void ScatterFunc<half, int64_t>(enum ScatterFunctorType func_type, const size_t &inner_size,
const size_t &indices_size, const int64_t *indices, const half *updates,
half *input, cudaStream_t cuda_stream);
template void ScatterFunc<int, int>(enum ScatterFunctorType func_type, const size_t &inner_size,
const size_t &indices_size, const int *indices, const int *updates, int *input,
cudaStream_t cuda_stream);
template void ScatterFunc<int, int64_t>(enum ScatterFunctorType func_type, const size_t &inner_size,
const size_t &indices_size, const int64_t *indices, const int *updates,
int *input, cudaStream_t cuda_stream);
template void ScatterFunc<unsigned char, int>(enum ScatterFunctorType func_type, const size_t &inner_size,
const size_t &indices_size, const int *indices,
const unsigned char *updates, unsigned char *input,
cudaStream_t cuda_stream);
template void ScatterFunc<unsigned char, int64_t>(enum ScatterFunctorType func_type, const size_t &inner_size,
const size_t &indices_size, const int64_t *indices,
const unsigned char *updates, unsigned char *input,
cudaStream_t cuda_stream);
template void ScatterFunc<int8_t, int>(enum ScatterFunctorType func_type, const size_t &inner_size,
const size_t &indices_size, const int *indices, const int8_t *updates,
int8_t *input, cudaStream_t cuda_stream);
template void ScatterFunc<int8_t, int64_t>(enum ScatterFunctorType func_type, const size_t &inner_size,
const size_t &indices_size, const int64_t *indices, const int8_t *updates,
int8_t *input, cudaStream_t cuda_stream);

View File

@ -1,5 +1,5 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
* Copyright 2020-2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -14,13 +14,20 @@
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_ADD_IMPL_CUH_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_ADD_IMPL_CUH_
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_FUNCTOR_IMPL_CUH_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_FUNCTOR_IMPL_CUH_
#include "runtime/device/gpu/cuda_common.h"
template <typename T>
void CalScatterAdd(const size_t &inner_size, const size_t &indices_size, const int *indices, const T *updates, T *input,
cudaStream_t cuda_stream);
enum ScatterFunctorType {
SCATTER_FUNC_UPDATE = 0,
SCATTER_FUNC_ADD,
SCATTER_FUNC_SUB,
SCATTER_FUNC_INVALID_TYPE = 255
};
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_ADD_IMPL_CUH_
template <typename T, typename S>
void ScatterFunc(enum ScatterFunctorType func_type, const size_t &inner_size, const size_t &indices_size,
const S *indices, const T *updates, T *input, cudaStream_t cuda_stream);
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_FUNCTOR_IMPL_CUH_

View File

@ -1,48 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "backend/kernel_compiler/gpu/cuda_impl/scatter_update_impl.cuh"
template <typename T>
__global__ void ScatterUpdate(const size_t inner_size, const size_t updates_size, const int *indices, const T *updates,
T *input) {
for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < updates_size; pos += blockDim.x * gridDim.x) {
const size_t index = pos / inner_size;
const size_t offset = pos % inner_size;
const size_t current_pos = indices[index] * inner_size + offset;
input[current_pos] = updates[pos];
}
}
template <typename T>
void CalScatterUpdate(const size_t &inner_size, const size_t &indices_size, const int *indices, const T *updates,
T *input, cudaStream_t cuda_stream) {
const size_t updates_size = inner_size * indices_size;
ScatterUpdate<<<GET_BLOCKS(updates_size), GET_THREADS, 0, cuda_stream>>>(inner_size, updates_size, indices, updates,
input);
}
template void CalScatterUpdate<float>(const size_t &inner_size, const size_t &indices_size, const int *indices,
const float *updates, float *input, cudaStream_t cuda_stream);
template void CalScatterUpdate<half>(const size_t &inner_size, const size_t &indices_size, const int *indices,
const half *updates, half *input, cudaStream_t cuda_stream);
template void CalScatterUpdate<int>(const size_t &inner_size, const size_t &indices_size, const int *indices,
const int *updates, int *input, cudaStream_t cuda_stream);
template void CalScatterUpdate<unsigned char>(const size_t &inner_size, const size_t &indices_size, const int *indices,
const unsigned char *updates, unsigned char *input,
cudaStream_t cuda_stream);
template void CalScatterUpdate<int8_t>(const size_t &inner_size, const size_t &indices_size, const int *indices,
const int8_t *updates, int8_t *input, cudaStream_t cuda_stream);

View File

@ -1,26 +0,0 @@
/**
* Copyright 2020 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_UPDATE_IMPL_CUH_
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_UPDATE_IMPL_CUH_
#include "runtime/device/gpu/cuda_common.h"
template <typename T>
void CalScatterUpdate(const size_t &inner_size, const size_t &indices_size, const int *indices, const T *updates,
T *input, cudaStream_t cuda_stream);
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_SCATTER_UPDATE_IMPL_CUH_

View File

@ -185,6 +185,8 @@ AbstractBasePtr InferImplDynamicStitch(const AnalysisEnginePtr &, const Primitiv
const AbstractBasePtrList &args_spec_list);
AbstractBasePtr InferImplScatterAdd(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
const AbstractBasePtrList &args_spec_list);
AbstractBasePtr InferImplScatterSub(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
const AbstractBasePtrList &args_spec_list);
AbstractBasePtr InferImplScatterUpdate(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
const AbstractBasePtrList &args_spec_list);
AbstractBasePtr InferImplDiv(const AnalysisEnginePtr &, const PrimitivePtr &primitive,

View File

@ -391,6 +391,21 @@ AbstractBasePtr InferImplScatterAdd(const AnalysisEnginePtr &, const PrimitivePt
return std::make_shared<AbstractTensor>(x->element(), std::make_shared<Shape>(shape, min_shape, max_shape));
}
AbstractBasePtr InferImplScatterSub(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
const AbstractBasePtrList &args_spec_list) {
constexpr auto kScatterSubInputNum = 3;
const std::string op_name = primitive->name();
CheckRequiredArgsSize(op_name, args_spec_list, kScatterSubInputNum);
auto x = CheckArg<AbstractTensor>(op_name, args_spec_list, 0);
MS_EXCEPTION_IF_NULL(x);
MS_EXCEPTION_IF_NULL(x->shape());
ShapeVector shape = x->shape()->shape();
ShapeVector min_shape = x->shape()->min_shape();
ShapeVector max_shape = x->shape()->max_shape();
CheckMinMaxShape(shape, &min_shape, &max_shape);
return std::make_shared<AbstractTensor>(x->element(), std::make_shared<Shape>(shape, min_shape, max_shape));
}
AbstractBasePtr InferImplScatterUpdate(const AnalysisEnginePtr &, const PrimitivePtr &primitive,
const AbstractBasePtrList &args_spec_list) {
const std::string op_name = primitive->name();

View File

@ -92,6 +92,7 @@ PrimitiveEvalImplMap &GetPrimitiveToEvalImplMap() {
{prim::kPrimUnsortedSegmentMax, {InferImplUnsortedSegmentMax, nullptr, true}},
{prim::kPrimUnsortedSegmentMin, {InferImplUnsortedSegmentMin, nullptr, true}},
{prim::kPrimScatterAdd, {InferImplScatterAdd, nullptr, true}},
{prim::kPrimScatterSub, {InferImplScatterSub, nullptr, true}},
{prim::kPrimSubAndFilter, {InferImplSubAndFilter, nullptr, true}},
{prim::kPrimScatterUpdate, {InferImplScatterUpdate, nullptr, true}},
{prim::kPrimMapCacheIdx, {InferImplMapCacheIdx, nullptr, true}},

View File

@ -214,6 +214,7 @@ inline const PrimitivePtr kPrimDynamicRNNGrad = std::make_shared<Primitive>("Dyn
inline const PrimitivePtr kPrimDynamicGRUV2 = std::make_shared<Primitive>("DynamicGRUV2");
inline const PrimitivePtr kPrimDynamicGRUV2Grad = std::make_shared<Primitive>("DynamicGRUV2Grad");
inline const PrimitivePtr kPrimScatterAdd = std::make_shared<Primitive>("ScatterAdd");
inline const PrimitivePtr kPrimScatterSub = std::make_shared<Primitive>("ScatterSub");
inline const PrimitivePtr kPrimScatterUpdate = std::make_shared<Primitive>("ScatterUpdate");
inline const PrimitivePtr kPrimTensorCopySlices = std::make_shared<Primitive>("TensorCopySlices");
inline const PrimitivePtr kPrimMapUniform = std::make_shared<Primitive>("MapUniform");

View File

@ -4129,7 +4129,7 @@ class ScatterAdd(_ScatterOpDynamic):
self.add_prim_attr('side_effect_mem', True)
class ScatterSub(_ScatterOp):
class ScatterSub(_ScatterOpDynamic):
r"""
Updates the value of the input tensor through the subtraction operation.
@ -4230,6 +4230,13 @@ class ScatterSub(_ScatterOp):
[-12. -12. -12.]]
"""
@prim_attr_register
def __init__(self, use_locking=False):
"""Initialize ScatterSub"""
validator.check_value_type('use_locking', use_locking, [bool], self.name)
self.init_prim_io_names(inputs=['x', 'indices', 'updates'], outputs=['y'])
self.add_prim_attr('side_effect_mem', True)
class ScatterMul(_ScatterOp):
r"""

View File

@ -1,338 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor, Parameter
from mindspore.ops import operations as P
from mindspore.ops.operations import _inner_ops as inner
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
# all cases tested against dchip
class TestScatterAddNet(nn.Cell):
def __init__(self, lock, inputx, indices, updates):
super(TestScatterAddNet, self).__init__()
self.scatter_add = P.ScatterAdd(use_locking=lock)
self.inputx = Parameter(inputx, name="inputx")
self.indices = Parameter(indices, name="indices")
self.updates = Parameter(updates, name="updates")
def construct(self):
out = self.scatter_add(self.inputx, self.indices, self.updates)
return out
def scatter_add_net(inputx, indices, updates):
lock = True
net = TestScatterAddNet(lock, inputx, indices, updates)
return net()
def scatter_add_use_locking_false_net(inputx, indices, updates):
lock = False
net = TestScatterAddNet(lock, inputx, indices, updates)
return net()
class TestScatterAddDynamicNet(nn.Cell):
def __init__(self, inputx, indices, updates):
super(TestScatterAddDynamicNet, self).__init__()
self.scatter_add = P.ScatterAdd()
self.test_dynamic = inner.GpuConvertToDynamicShape()
self.inputx = Parameter(inputx, name="inputx")
self.indices = Parameter(indices, name="indices")
self.updates = Parameter(updates, name="updates")
def construct(self):
indices = self.test_dynamic(self.indices)
updates = self.test_dynamic(self.updates)
out = self.scatter_add(self.inputx, indices, updates)
return out
def scatter_add_d_net(inputx, indices, updates):
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
net = TestScatterAddDynamicNet(inputx, indices, updates)
return net()
class TestScatterAddDynamicNet2(nn.Cell):
def __init__(self, inputx):
super(TestScatterAddDynamicNet2, self).__init__()
self.scatter_add = P.ScatterAdd()
self.test_dynamic = inner.GpuConvertToDynamicShape()
self.inputx = Parameter(inputx, name="inputx")
def construct(self, indices, updates):
indices = self.test_dynamic(indices)
updates = self.test_dynamic(updates)
out = self.scatter_add(self.inputx, indices, updates)
return out
def scatter_add_d2_net(inputx, indices_1, updates_1,
indices_2, updates_2):
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
net = TestScatterAddDynamicNet2(inputx)
out1 = net(indices_1, updates_1)
out2 = net(indices_2, updates_2)
return (out1, out2)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_add_small_float32():
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
output = scatter_add_net(inputx, indices, updates)
expected = np.array([[6., 8., 10.],
[12., 14., 16.]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_add_input_updated():
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
lock = True
net = TestScatterAddNet(lock, inputx, indices, updates)
net()
expected = np.array([[6., 8., 10.],
[12., 14., 16.]])
np.testing.assert_array_almost_equal(net.inputx.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_add_large_shape_float32():
inputx = Tensor(np.ones((4, 2, 3, 4)).astype(np.float32))
indices = Tensor(np.array([[0, 2], [3, 1]]).astype(np.int32))
updates = Tensor(np.arange(96).reshape((2, 2, 2, 3, 4)).astype(np.float32))
output = scatter_add_net(inputx, indices, updates)
expected = np.array([[[[1., 2., 3., 4.],
[5., 6., 7., 8.],
[9., 10., 11., 12.]],
[[13., 14., 15., 16.],
[17., 18., 19., 20.],
[21., 22., 23., 24.]]],
[[[73., 74., 75., 76.],
[77., 78., 79., 80.],
[81., 82., 83., 84.]],
[[85., 86., 87., 88.],
[89., 90., 91., 92.],
[93., 94., 95., 96.]]],
[[[25., 26., 27., 28.],
[29., 30., 31., 32.],
[33., 34., 35., 36.]],
[[37., 38., 39., 40.],
[41., 42., 43., 44.],
[45., 46., 47., 48.]]],
[[[49., 50., 51., 52.],
[53., 54., 55., 56.],
[57., 58., 59., 60.]],
[[61., 62., 63., 64.],
[65., 66., 67., 68.],
[69., 70., 71., 72.]]]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_add_small_float32_use_locking_false():
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
indices = Tensor(np.array([1, 0]).astype(np.int32))
updates = Tensor(np.arange(6).reshape((2, 3)).astype(np.float32))
output = scatter_add_use_locking_false_net(inputx, indices, updates)
expected = np.array([[3., 4., 5.],
[0., 1., 2.]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_add_input_less_than_1_float32():
inputx = Tensor(np.array([[0.214141, 0.415151, 0.51516],
[0.876542, 0.451611, 0.55112],
[0.111244, 0.633333, 0.34444]]).astype(np.float32))
indices = Tensor(np.array([[[1, 0, 2],
[2, 2, 0]],
[[1, 0, 1],
[2, 1, 2]]]).astype(np.int32))
updates = Tensor(np.arange(34, 70).reshape((2, 2, 3, 3)).astype(np.float32))
output = scatter_add_net(inputx, indices, updates)
expected = np.array([[141.21414, 144.41515, 147.51517],
[208.87654, 212.45161, 216.55112],
[257.11124, 262.63333, 267.34442]], dtype=np.float32)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_add_float16():
inputx = Tensor(np.zeros((2, 3)).astype(np.float16))
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float16))
output = scatter_add_net(inputx, indices, updates)
expected = np.array([[6., 8., 10.],
[12., 14., 16.]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_add_large_float16():
inputx = Tensor(np.zeros((2, 3, 4)).astype(np.float16))
indices = Tensor(np.array([[0, 0], [1, 1]]).astype(np.int32))
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.float16))
output = scatter_add_net(inputx, indices, updates)
expected = np.array([[[138., 140., 142., 144.],
[146., 148., 150., 152.],
[154., 156., 158., 160.]],
[[186., 188., 190., 192.],
[194., 196., 198., 200.],
[202., 204., 206., 208.]]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_add_disordered_float16():
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.float16)))
indices = Tensor(np.array([[[0, 1, 2],
[2, 1, 0]],
[[0, 0, 0],
[2, 2, 2]]]).astype(np.int32))
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.float16))
output = scatter_add_net(inputx, indices, updates)
expected = np.array([[464., 468., 472., 476.],
[187., 188., 189., 190.],
[492., 496., 500., 504.]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_add_large_int32():
inputx = Tensor(np.zeros((2, 3, 4)).astype(np.int32))
indices = Tensor(np.array([[0, 0], [1, 1]]).astype(np.int32))
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.int32))
output = scatter_add_net(inputx, indices, updates)
expected = np.array([[[138., 140., 142., 144.],
[146., 148., 150., 152.],
[154., 156., 158., 160.]],
[[186., 188., 190., 192.],
[194., 196., 198., 200.],
[202., 204., 206., 208.]]]).astype(np.int32)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_add_disordered_int32():
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int32)))
indices = Tensor(np.array([[[0, 1, 2],
[2, 1, 0]],
[[0, 0, 0],
[2, 2, 2]]]).astype(np.int32))
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.int32))
output = scatter_add_net(inputx, indices, updates)
expected = np.array([[464., 468., 472., 476.],
[187., 188., 189., 190.],
[492., 496., 500., 504.]]).astype(np.int32)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_add_disordered_dynamic_int32():
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int32)))
indices = Tensor(np.array([[[0, 1, 2],
[2, 1, 0]],
[[0, 0, 0],
[2, 2, 2]]]).astype(np.int32))
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.int32))
output = scatter_add_d_net(inputx, indices, updates)
expected = np.array([[464., 468., 472., 476.],
[187., 188., 189., 190.],
[492., 496., 500., 504.]]).astype(np.int32)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_add_disordered_dynamic_int8():
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int8)))
indices = Tensor(np.array([[[0, 1, 2],
[2, 1, 0]],
[[0, 0, 0],
[2, 2, 2]]]).astype(np.int32))
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.int8))
output = scatter_add_d_net(inputx, indices, updates)
expected = np.array([[464., 468., 472., 476.],
[187., 188., 189., 190.],
[492., 496., 500., 504.]]).astype(np.int8)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_add_disordered_dynamic_uint8():
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.uint8)))
indices = Tensor(np.array([[[0, 1, 2],
[2, 1, 0]],
[[0, 0, 0],
[2, 2, 2]]]).astype(np.int32))
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.uint8))
output = scatter_add_d_net(inputx, indices, updates)
expected = np.array([[464., 468., 472., 476.],
[187., 188., 189., 190.],
[492., 496., 500., 504.]]).astype(np.uint8)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_add_input_less_than_1_dynamic_float32():
inputx = Tensor(np.array([[0.214141, 0.415151, 0.51516],
[0.876542, 0.451611, 0.55112],
[0.111244, 0.633333, 0.34444]]).astype(np.float32))
indices = Tensor(np.array([[[1, 0, 2],
[2, 2, 0]],
[[1, 0, 1],
[2, 1, 2]]]).astype(np.int32))
updates = Tensor(np.arange(34, 70).reshape((2, 2, 3, 3)).astype(np.float32))
output = scatter_add_d_net(inputx, indices, updates)
expected = np.array([[141.21414, 144.41515, 147.51517],
[208.87654, 212.45161, 216.55112],
[257.11124, 262.63333, 267.34442]], dtype=np.float32)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_add_dynamic_two_inputs():
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
indices_1 = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
updates_1 = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
indices_2 = Tensor(np.array([[0, 0], [1, 1], [1, 0]]).astype(np.int32))
updates_2 = Tensor(np.flip(np.arange(18).reshape((3, 2, 3)).astype(np.float32)))
output_1, output_2 = scatter_add_d2_net(inputx, indices_1, updates_1,
indices_2, updates_2)
expected_1 = np.array([[6., 8., 10.],
[12., 14., 16.]])
expected_2 = np.array([[39., 38., 37.],
[36., 35., 34.]])
np.testing.assert_array_almost_equal(output_1.asnumpy(), expected_1)
np.testing.assert_array_almost_equal(output_2.asnumpy(), expected_2)

View File

@ -0,0 +1,720 @@
# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor, Parameter
from mindspore.ops import operations as P
from mindspore.ops.operations import _inner_ops as inner
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
# all cases tested against dchip
func_map = {
"update": P.ScatterUpdate,
"add": P.ScatterAdd,
"sub": P.ScatterSub,
}
class TestScatterFuncNet(nn.Cell):
def __init__(self, func, lock, inputx, indices, updates):
super(TestScatterFuncNet, self).__init__()
self.scatter_func = func_map[func](use_locking=lock)
self.inputx = Parameter(inputx, name="inputx")
self.indices = Parameter(indices, name="indices")
self.updates = Parameter(updates, name="updates")
def construct(self):
out = self.scatter_func(self.inputx, self.indices, self.updates)
return out
def scatter_func_net(func, inputx, indices, updates):
lock = True
net = TestScatterFuncNet(func, lock, inputx, indices, updates)
return net()
def scatter_func_use_locking_false_net(func, inputx, indices, updates):
lock = False
net = TestScatterFuncNet(func, lock, inputx, indices, updates)
return net()
class TestScatterFuncDynamicNet(nn.Cell):
def __init__(self, func, inputx, indices, updates):
super(TestScatterFuncDynamicNet, self).__init__()
self.scatter_func = func_map[func]()
self.test_dynamic = inner.GpuConvertToDynamicShape()
self.inputx = Parameter(inputx, name="inputx")
self.indices = Parameter(indices, name="indices")
self.updates = Parameter(updates, name="updates")
def construct(self):
indices = self.test_dynamic(self.indices)
updates = self.test_dynamic(self.updates)
out = self.scatter_func(self.inputx, indices, updates)
return out
def scatter_func_d_net(func, inputx, indices, updates):
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
net = TestScatterFuncDynamicNet(func, inputx, indices, updates)
return net()
class TestScatterFuncDynamicNet2(nn.Cell):
def __init__(self, func, inputx):
super(TestScatterFuncDynamicNet2, self).__init__()
self.scatter_func = func_map[func]()
self.test_dynamic = inner.GpuConvertToDynamicShape()
self.inputx = Parameter(inputx, name="inputx")
def construct(self, indices, updates):
indices = self.test_dynamic(indices)
updates = self.test_dynamic(updates)
out = self.scatter_func(self.inputx, indices, updates)
return out
def scatter_func_d2_net(func, inputx, indices_1, updates_1, indices_2, updates_2):
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
net = TestScatterFuncDynamicNet2(func, inputx)
out1 = net(indices_1, updates_1)
out2 = net(indices_2, updates_2)
return (out1, out2)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_func_small_float32():
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
# update
output = scatter_func_net("update", inputx, indices, updates)
expected = np.array([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
# add
output = scatter_func_net("add", inputx, indices, updates)
expected = np.array([[6.0, 8.0, 10.0], [12.0, 14.0, 16.0]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
# sub
output = scatter_func_net("sub", inputx, indices, updates)
expected = np.array([[-6.0, -8.0, -10.0], [-12.0, -14.0, -16.0]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_func_input_updated():
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
lock = True
# update
net = TestScatterFuncNet("update", lock, inputx, indices, updates)
net()
expected = np.array([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]])
np.testing.assert_array_almost_equal(net.inputx.asnumpy(), expected)
# add
net = TestScatterFuncNet("add", lock, inputx, indices, updates)
net()
expected = np.array([[6.0, 8.0, 10.0], [12.0, 14.0, 16.0]])
np.testing.assert_array_almost_equal(net.inputx.asnumpy(), expected)
# sub
net = TestScatterFuncNet("sub", lock, inputx, indices, updates)
net()
expected = np.array([[-6.0, -8.0, -10.0], [-12.0, -14.0, -16.0]])
np.testing.assert_array_almost_equal(net.inputx.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_func_large_shape_float32():
inputx = Tensor(np.ones((4, 2, 3, 4)).astype(np.float32))
indices = Tensor(np.array([[0, 2], [3, 1]]).astype(np.int32))
updates = Tensor(np.arange(96).reshape((2, 2, 2, 3, 4)).astype(np.float32))
# update
output = scatter_func_net("update", inputx, indices, updates)
expected = np.array(
[
[
[[0.0, 1.0, 2.0, 3.0], [4.0, 5.0, 6.0, 7.0], [8.0, 9.0, 10.0, 11.0]],
[[12.0, 13.0, 14.0, 15.0], [16.0, 17.0, 18.0, 19.0], [20.0, 21.0, 22.0, 23.0]],
],
[
[[72.0, 73.0, 74.0, 75.0], [76.0, 77.0, 78.0, 79.0], [80.0, 81.0, 82.0, 83.0]],
[[84.0, 85.0, 86.0, 87.0], [88.0, 89.0, 90.0, 91.0], [92.0, 93.0, 94.0, 95.0]],
],
[
[[24.0, 25.0, 26.0, 27.0], [28.0, 29.0, 30.0, 31.0], [32.0, 33.0, 34.0, 35.0]],
[[36.0, 37.0, 38.0, 39.0], [40.0, 41.0, 42.0, 43.0], [44.0, 45.0, 46.0, 47.0]],
],
[
[[48.0, 49.0, 50.0, 51.0], [52.0, 53.0, 54.0, 55.0], [56.0, 57.0, 58.0, 59.0]],
[[60.0, 61.0, 62.0, 63.0], [64.0, 65.0, 66.0, 67.0], [68.0, 69.0, 70.0, 71.0]],
],
]
)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
# add
output = scatter_func_net("add", inputx, indices, updates)
expected = np.array(
[
[
[[1.0, 2.0, 3.0, 4.0], [5.0, 6.0, 7.0, 8.0], [9.0, 10.0, 11.0, 12.0]],
[[13.0, 14.0, 15.0, 16.0], [17.0, 18.0, 19.0, 20.0], [21.0, 22.0, 23.0, 24.0]],
],
[
[[73.0, 74.0, 75.0, 76.0], [77.0, 78.0, 79.0, 80.0], [81.0, 82.0, 83.0, 84.0]],
[[85.0, 86.0, 87.0, 88.0], [89.0, 90.0, 91.0, 92.0], [93.0, 94.0, 95.0, 96.0]],
],
[
[[25.0, 26.0, 27.0, 28.0], [29.0, 30.0, 31.0, 32.0], [33.0, 34.0, 35.0, 36.0]],
[[37.0, 38.0, 39.0, 40.0], [41.0, 42.0, 43.0, 44.0], [45.0, 46.0, 47.0, 48.0]],
],
[
[[49.0, 50.0, 51.0, 52.0], [53.0, 54.0, 55.0, 56.0], [57.0, 58.0, 59.0, 60.0]],
[[61.0, 62.0, 63.0, 64.0], [65.0, 66.0, 67.0, 68.0], [69.0, 70.0, 71.0, 72.0]],
],
]
)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
# sub
output = scatter_func_net("sub", inputx, indices, updates)
expected = np.array(
[
[
[[1.0, 0.0, -1.0, -2.0], [-3.0, -4.0, -5.0, -6.0], [-7.0, -8.0, -9.0, -10.0]],
[
[-11.0, -12.0, -13.0, -14.0],
[-15.0, -16.0, -17.0, -18.0],
[-19.0, -20.0, -21.0, -22.0],
],
],
[
[
[-71.0, -72.0, -73.0, -74.0],
[-75.0, -76.0, -77.0, -78.0],
[-79.0, -80.0, -81.0, -82.0],
],
[
[-83.0, -84.0, -85.0, -86.0],
[-87.0, -88.0, -89.0, -90.0],
[-91.0, -92.0, -93.0, -94.0],
],
],
[
[
[-23.0, -24.0, -25.0, -26.0],
[-27.0, -28.0, -29.0, -30.0],
[-31.0, -32.0, -33.0, -34.0],
],
[
[-35.0, -36.0, -37.0, -38.0],
[-39.0, -40.0, -41.0, -42.0],
[-43.0, -44.0, -45.0, -46.0],
],
],
[
[
[-47.0, -48.0, -49.0, -50.0],
[-51.0, -52.0, -53.0, -54.0],
[-55.0, -56.0, -57.0, -58.0],
],
[
[-59.0, -60.0, -61.0, -62.0],
[-63.0, -64.0, -65.0, -66.0],
[-67.0, -68.0, -69.0, -70.0],
],
],
]
)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_func_small_float32_use_locking_false():
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
indices = Tensor(np.array([1, 0]).astype(np.int32))
updates = Tensor(np.arange(6).reshape((2, 3)).astype(np.float32))
# update
output = scatter_func_use_locking_false_net("update", inputx, indices, updates)
expected = np.array([[3.0, 4.0, 5.0], [0.0, 1.0, 2.0]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
# add
output = scatter_func_use_locking_false_net("add", inputx, indices, updates)
expected = np.array([[3.0, 4.0, 5.0], [0.0, 1.0, 2.0]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
# sub
output = scatter_func_use_locking_false_net("sub", inputx, indices, updates)
expected = np.array([[-3.0, -4.0, -5.0], [0.0, -1.0, -2.0]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_func_input_less_than_1_float32():
inputx = Tensor(
np.array(
[
[0.214141, 0.415151, 0.51516],
[0.876542, 0.451611, 0.55112],
[0.111244, 0.633333, 0.34444],
]
).astype(np.float32)
)
indices = Tensor(np.array([[[1, 0, 2], [2, 2, 0]], [[1, 0, 1], [2, 1, 2]]]).astype(np.int32))
updates = Tensor(np.arange(34, 70).reshape((2, 2, 3, 3)).astype(np.float32))
# update
output = scatter_func_net("update", inputx, indices, updates)
expected = np.array(
[[37.0, 38.0, 39.0], [34.0, 35.0, 66.0], [67.0, 68.0, 69.0],], dtype=np.float32,
)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
# add
output = scatter_func_net("add", inputx, indices, updates)
expected = np.array(
[
[141.21414, 144.41515, 147.51517],
[208.87654, 212.45161, 216.55112],
[257.11124, 262.63333, 267.34442],
],
dtype=np.float32,
)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
# sub
output = scatter_func_net("sub", inputx, indices, updates)
expected = np.array(
[
[-140.78586, -143.58485, -146.48483],
[-207.12346, -211.54839, -215.44888],
[-256.88876, -261.36667, -266.65558],
],
dtype=np.float32,
)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_func_float16():
inputx = Tensor(np.zeros((2, 3)).astype(np.float16))
indices = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
updates = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float16))
# update
output = scatter_func_net("update", inputx, indices, updates)
expected = np.array([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
# add
output = scatter_func_net("add", inputx, indices, updates)
expected = np.array([[6.0, 8.0, 10.0], [12.0, 14.0, 16.0]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
# sub
output = scatter_func_net("sub", inputx, indices, updates)
expected = np.array([[-6.0, -8.0, -10.0], [-12.0, -14.0, -16.0]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_func_large_float16():
inputx = Tensor(np.zeros((2, 3, 4)).astype(np.float16))
indices = Tensor(np.array([[0, 0], [1, 1]]).astype(np.int32))
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.float16))
# update
output = scatter_func_net("update", inputx, indices, updates)
expected = np.array(
[
[[63.0, 64.0, 65.0, 66.0], [67.0, 68.0, 69.0, 70.0], [71.0, 72.0, 73.0, 74.0],],
[[99.0, 100.0, 101.0, 102.0], [103.0, 104.0, 105.0, 106.0], [95.0, 96.0, 97.0, 98.0],],
]
)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
# add
output = scatter_func_net("add", inputx, indices, updates)
expected = np.array(
[
[
[138.0, 140.0, 142.0, 144.0],
[146.0, 148.0, 150.0, 152.0],
[154.0, 156.0, 158.0, 160.0],
],
[
[186.0, 188.0, 190.0, 192.0],
[194.0, 196.0, 198.0, 200.0],
[202.0, 204.0, 206.0, 208.0],
],
]
)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
# sub
output = scatter_func_net("sub", inputx, indices, updates)
expected = np.array(
[
[
[-138.0, -140.0, -142.0, -144.0],
[-146.0, -148.0, -150.0, -152.0],
[-154.0, -156.0, -158.0, -160.0],
],
[
[-186.0, -188.0, -190.0, -192.0],
[-194.0, -196.0, -198.0, -200.0],
[-202.0, -204.0, -206.0, -208.0],
],
]
)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_func_disordered_float16():
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.float16)))
indices = Tensor(np.array([[[0, 1, 2], [2, 1, 0]], [[0, 0, 0], [2, 2, 2]]]).astype(np.int32))
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.float16))
# update
output = scatter_func_net("update", inputx, indices, updates)
expected = np.array(
[[95.0, 96.0, 97.0, 98.0], [67.0, 68.0, 69.0, 70.0], [99.0, 100.0, 101.0, 102.0]]
)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
# add
output = scatter_func_net("add", inputx, indices, updates)
expected = np.array(
[[464.0, 468.0, 472.0, 476.0], [187.0, 188.0, 189.0, 190.0], [492.0, 496.0, 500.0, 504.0]]
)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
# sub
output = scatter_func_net("sub", inputx, indices, updates)
expected = np.array(
[
[-374.0, -380.0, -386.0, -392.0],
[-105.0, -108.0, -111.0, -114.0],
[-418.0, -424.0, -430.0, -436.0],
]
)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_func_large_int32():
inputx = Tensor(np.zeros((2, 3, 4)).astype(np.int32))
indices = Tensor(np.array([[0, 0], [1, 1]]).astype(np.int32))
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.int32))
# update
output = scatter_func_net("update", inputx, indices, updates)
expected = np.array(
[
[[63.0, 64.0, 65.0, 66.0], [67.0, 68.0, 69.0, 70.0], [71.0, 72.0, 73.0, 74.0],],
[[99.0, 100.0, 101.0, 102.0], [103.0, 104.0, 105.0, 106.0], [95.0, 96.0, 97.0, 98.0],],
]
).astype(np.int32)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
# add
output = scatter_func_net("add", inputx, indices, updates)
expected = np.array(
[
[
[138.0, 140.0, 142.0, 144.0],
[146.0, 148.0, 150.0, 152.0],
[154.0, 156.0, 158.0, 160.0],
],
[
[186.0, 188.0, 190.0, 192.0],
[194.0, 196.0, 198.0, 200.0],
[202.0, 204.0, 206.0, 208.0],
],
]
).astype(np.int32)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
# sub
output = scatter_func_net("sub", inputx, indices, updates)
expected = np.array(
[
[
[-138.0, -140.0, -142.0, -144.0],
[-146.0, -148.0, -150.0, -152.0],
[-154.0, -156.0, -158.0, -160.0],
],
[
[-186.0, -188.0, -190.0, -192.0],
[-194.0, -196.0, -198.0, -200.0],
[-202.0, -204.0, -206.0, -208.0],
],
]
).astype(np.int32)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_func_disordered_int32():
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int32)))
indices = Tensor(np.array([[[0, 1, 2], [2, 1, 0]], [[0, 0, 0], [2, 2, 2]]]).astype(np.int32))
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.int32))
# update
output = scatter_func_net("update", inputx, indices, updates)
expected = np.array(
[[95.0, 96.0, 97.0, 98.0], [67.0, 68.0, 69.0, 70.0], [99.0, 100.0, 101.0, 102.0]]
).astype(np.int32)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
# add
output = scatter_func_net("add", inputx, indices, updates)
expected = np.array(
[[464.0, 468.0, 472.0, 476.0], [187.0, 188.0, 189.0, 190.0], [492.0, 496.0, 500.0, 504.0]]
).astype(np.int32)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
# sub
output = scatter_func_net("sub", inputx, indices, updates)
expected = np.array(
[
[-374.0, -380.0, -386.0, -392.0],
[-105.0, -108.0, -111.0, -114.0],
[-418.0, -424.0, -430.0, -436.0],
]
).astype(np.int32)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_func_disordered_dynamic_int32():
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int32)))
indices = Tensor(np.array([[[0, 1, 2], [2, 1, 0]], [[0, 0, 0], [2, 2, 2]]]).astype(np.int32))
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.int32))
# update
output = scatter_func_d_net("update", inputx, indices, updates)
expected = np.array(
[[95.0, 96.0, 97.0, 98.0], [67.0, 68.0, 69.0, 70.0], [99.0, 100.0, 101.0, 102.0]]
).astype(np.int32)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
# add
output = scatter_func_d_net("add", inputx, indices, updates)
expected = np.array(
[[464.0, 468.0, 472.0, 476.0], [187.0, 188.0, 189.0, 190.0], [492.0, 496.0, 500.0, 504.0]]
).astype(np.int32)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
# sub
output = scatter_func_d_net("sub", inputx, indices, updates)
expected = np.array(
[
[-374.0, -380.0, -386.0, -392.0],
[-105.0, -108.0, -111.0, -114.0],
[-418.0, -424.0, -430.0, -436.0],
]
).astype(np.int32)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_func_disordered_dynamic_int8():
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int8)))
indices = Tensor(np.array([[[0, 1, 2], [2, 1, 0]], [[0, 0, 0], [2, 2, 2]]]).astype(np.int32))
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.int8))
# update
output = scatter_func_d_net("update", inputx, indices, updates)
expected = np.array(
[[95.0, 96.0, 97.0, 98.0], [67.0, 68.0, 69.0, 70.0], [99.0, 100.0, 101.0, 102.0]]
).astype(np.int8)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
# add
output = scatter_func_d_net("add", inputx, indices, updates)
expected = np.array(
[[464.0, 468.0, 472.0, 476.0], [187.0, 188.0, 189.0, 190.0], [492.0, 496.0, 500.0, 504.0]]
).astype(np.int8)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
# sub
output = scatter_func_d_net("sub", inputx, indices, updates)
expected = np.array(
[
[-118.0, -124.0, 126.0, 120.0],
[-105.0, -108.0, -111.0, -114.0],
[94.0, 88.0, 82.0, 76.0],
]
).astype(np.int8)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_func_disordered_dynamic_uint8():
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.uint8)))
indices = Tensor(np.array([[[0, 1, 2], [2, 1, 0]], [[0, 0, 0], [2, 2, 2]]]).astype(np.int32))
updates = Tensor(np.arange(63, 111).reshape((2, 2, 3, 4)).astype(np.uint8))
# update
output = scatter_func_d_net("update", inputx, indices, updates)
expected = np.array(
[[95.0, 96.0, 97.0, 98.0], [67.0, 68.0, 69.0, 70.0], [99.0, 100.0, 101.0, 102.0]]
).astype(np.uint8)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
# add
output = scatter_func_d_net("add", inputx, indices, updates)
expected = np.array(
[[464.0, 468.0, 472.0, 476.0], [187.0, 188.0, 189.0, 190.0], [492.0, 496.0, 500.0, 504.0]]
).astype(np.uint8)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
# sub
output = scatter_func_d_net("sub", inputx, indices, updates)
expected = np.array(
[[138.0, 132.0, 126.0, 120.0], [151.0, 148.0, 145.0, 142.0], [94.0, 88.0, 82.0, 76.0]]
).astype(np.uint8)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_func_input_less_than_1_dynamic_float32():
inputx = Tensor(
np.array(
[
[0.214141, 0.415151, 0.51516],
[0.876542, 0.451611, 0.55112],
[0.111244, 0.633333, 0.34444],
]
).astype(np.float32)
)
indices = Tensor(np.array([[[1, 0, 2], [2, 2, 0]], [[1, 0, 1], [2, 1, 2]]]).astype(np.int32))
updates = Tensor(np.arange(34, 70).reshape((2, 2, 3, 3)).astype(np.float32))
# update
output = scatter_func_d_net("update", inputx, indices, updates)
expected = np.array(
[[37.0, 38.0, 39.0], [34.0, 35.0, 66.0], [67.0, 68.0, 69.0],], dtype=np.float32,
)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
# add
output = scatter_func_d_net("add", inputx, indices, updates)
expected = np.array(
[
[141.21414, 144.41515, 147.51517],
[208.87654, 212.45161, 216.55112],
[257.11124, 262.63333, 267.34442],
],
dtype=np.float32,
)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
# sub
output = scatter_func_d_net("sub", inputx, indices, updates)
expected = np.array(
[
[-140.78586, -143.58485, -146.48483],
[-207.12346, -211.54839, -215.44888],
[-256.88876, -261.36667, -266.65558],
],
dtype=np.float32,
)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_func_dynamic_two_inputs():
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
indices_1 = Tensor(np.array([[0, 1], [0, 1]]).astype(np.int32))
updates_1 = Tensor(np.arange(12).reshape((2, 2, 3)).astype(np.float32))
indices_2 = Tensor(np.array([[0, 0], [1, 1], [1, 0]]).astype(np.int32))
updates_2 = Tensor(np.flip(np.arange(18).reshape((3, 2, 3)).astype(np.float32)))
# update
output_1, output_2 = scatter_func_d2_net(
"update", inputx, indices_1, updates_1, indices_2, updates_2
)
expected_1 = np.array([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0]])
expected_2 = np.array([[17.0, 16.0, 15.0], [11.0, 10.0, 9.0]])
np.testing.assert_array_almost_equal(output_1.asnumpy(), expected_1)
np.testing.assert_array_almost_equal(output_2.asnumpy(), expected_2)
# add
output_1, output_2 = scatter_func_d2_net(
"add", inputx, indices_1, updates_1, indices_2, updates_2
)
expected_1 = np.array([[6.0, 8.0, 10.0], [12.0, 14.0, 16.0]])
expected_2 = np.array([[39.0, 38.0, 37.0], [36.0, 35.0, 34.0]])
np.testing.assert_array_almost_equal(output_1.asnumpy(), expected_1)
np.testing.assert_array_almost_equal(output_2.asnumpy(), expected_2)
# sub
output_1, output_2 = scatter_func_d2_net(
"sub", inputx, indices_1, updates_1, indices_2, updates_2
)
expected_1 = np.array([[-6.0, -8.0, -10.0], [-12.0, -14.0, -16.0]])
expected_2 = np.array([[-39.0, -38.0, -37.0], [-36.0, -35.0, -34.0]])
np.testing.assert_array_almost_equal(output_1.asnumpy(), expected_1)
np.testing.assert_array_almost_equal(output_2.asnumpy(), expected_2)

View File

@ -1,359 +0,0 @@
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import numpy as np
import pytest
import mindspore.context as context
import mindspore.nn as nn
from mindspore import Tensor, Parameter
from mindspore.ops import operations as P
from mindspore.ops.operations import _inner_ops as inner
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
# all cases tested against dchip
class TestScatterUpdateNet(nn.Cell):
def __init__(self, inputx, indices, updates):
super(TestScatterUpdateNet, self).__init__()
self.scatter_update = P.ScatterUpdate()
self.inputx = Parameter(inputx, name="inputx")
self.indices = Parameter(indices, name="indices")
self.updates = Parameter(updates, name="updates")
def construct(self):
out = self.scatter_update(self.inputx, self.indices, self.updates)
return out
def scatter_update_net(inputx, indices, updates):
net = TestScatterUpdateNet(inputx, indices, updates)
return net()
class TestScatterUpdateDynamicNet(nn.Cell):
def __init__(self, inputx, indices, updates):
super(TestScatterUpdateDynamicNet, self).__init__()
self.scatter_update = P.ScatterUpdate()
self.test_dynamic = inner.GpuConvertToDynamicShape()
self.inputx = Parameter(inputx, name="inputx")
self.indices = Parameter(indices, name="indices")
self.updates = Parameter(updates, name="updates")
def construct(self):
indices = self.test_dynamic(self.indices)
updates = self.test_dynamic(self.updates)
out = self.scatter_update(self.inputx, indices, updates)
return out
def scatter_update_d_net(inputx, indices, updates):
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
net = TestScatterUpdateDynamicNet(inputx, indices, updates)
return net()
class TestScatterUpdateDynamicNet2(nn.Cell):
def __init__(self, inputx):
super(TestScatterUpdateDynamicNet2, self).__init__()
self.scatter_update = P.ScatterUpdate()
self.test_dynamic = inner.GpuConvertToDynamicShape()
self.inputx = Parameter(inputx, name="inputx")
def construct(self, indices, updates):
indices = self.test_dynamic(indices)
updates = self.test_dynamic(updates)
out = self.scatter_update(self.inputx, indices, updates)
return out
def scatter_update_d2_net(inputx, indices_1, updates_1,
indices_2, updates_2):
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
net = TestScatterUpdateDynamicNet2(inputx)
out1 = net(indices_1, updates_1)
out2 = net(indices_2, updates_2)
return (out1, out2)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_update_small_float32():
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
indices = Tensor(np.array([0, 1]).astype(np.int32))
updates = Tensor(np.arange(6).reshape((2, 3)).astype(np.float32))
output = scatter_update_net(inputx, indices, updates)
expected = np.array([[0., 1., 2.],
[3., 4., 5.]])
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_update_input_updated():
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
indices = Tensor(np.array([0, 1]).astype(np.int32))
updates = Tensor(np.arange(6).reshape((2, 3)).astype(np.float32))
net = TestScatterUpdateNet(inputx, indices, updates)
net()
expected = np.array([[0., 1., 2.],
[3., 4., 5.]])
np.testing.assert_array_almost_equal(net.inputx.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_update_input_less_than_1_float32():
inputx = Tensor(np.array([[0.214141, 0.415151, 0.51516],
[0.876542, 0.451611, 0.55112],
[0.111244, 0.633333, 0.34444]]).astype(np.float32))
indices = Tensor(np.array([1, 0, 2]).astype(np.int32))
updates = Tensor(np.arange(34, 43).reshape((3, 3)).astype(np.float32))
output = scatter_update_net(inputx, indices, updates)
expected = np.array([[37., 38., 39.],
[34., 35., 36.],
[40., 41., 42.]], dtype=np.float32)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_update_float16():
inputx = Tensor(np.zeros((2, 3)).astype(np.float16))
indices = Tensor(np.array([0, 1]).astype(np.int32))
updates = Tensor(np.arange(6).reshape((2, 3)).astype(np.float16))
output = scatter_update_net(inputx, indices, updates)
expected = np.array([[0., 1., 2.],
[3., 4., 5.]]).astype(np.float16)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_update_int32():
inputx = Tensor(np.zeros((2, 3)).astype(np.int32))
indices = Tensor(np.array([0, 1]).astype(np.int32))
updates = Tensor(np.arange(6).reshape((2, 3)).astype(np.int32))
output = scatter_update_net(inputx, indices, updates)
expected = np.array([[0., 1., 2.],
[3., 4., 5.]]).astype(np.int32)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_update_large_float16():
inputx = Tensor(np.zeros((4, 3)).astype(np.float16))
indices = Tensor(np.array([[2, 1], [0, 3]]).astype(np.int32))
updates = Tensor(np.arange(63, 75).reshape((2, 2, 3)).astype(np.float16))
output = scatter_update_net(inputx, indices, updates)
expected = np.array([[69., 70., 71.],
[66., 67., 68.],
[63., 64., 65.],
[72., 73., 74.]]).astype(np.float16)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_update_disordered_float16():
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.float16)))
indices = Tensor(np.array([1, 2]).astype(np.int32))
updates = Tensor(np.arange(63, 71).reshape((2, 4)).astype(np.float16))
output = scatter_update_net(inputx, indices, updates)
expected = np.array([[45., 44., 43., 42.],
[63., 64., 65., 66.],
[67., 68., 69., 70.]]).astype(np.float16)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_update_disordered_int32():
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int32)))
indices = Tensor(np.array([1, 2]).astype(np.int32))
updates = Tensor(np.arange(63, 71).reshape((2, 4)).astype(np.int32))
output = scatter_update_net(inputx, indices, updates)
expected = np.array([[45., 44., 43., 42.],
[63., 64., 65., 66.],
[67., 68., 69., 70.]]).astype(np.int32)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_update_large_shape_float16():
inputx = Tensor(np.arange(96).reshape((4, 2, 3, 4)).astype(np.float16))
indices = Tensor(np.array([1, 0]).astype(np.int32))
updates = Tensor(np.flip(np.arange(48).reshape((2, 2, 3, 4)).astype(np.float16)))
output = scatter_update_net(inputx, indices, updates)
expected = np.array([[[[23., 22., 21., 20.],
[19., 18., 17., 16.],
[15., 14., 13., 12.]],
[[11., 10., 9., 8.],
[7., 6., 5., 4.],
[3., 2., 1., 0.]]],
[[[47., 46., 45., 44.],
[43., 42., 41., 40.],
[39., 38., 37., 36.]],
[[35., 34., 33., 32.],
[31., 30., 29., 28.],
[27., 26., 25., 24.]]],
[[[48., 49., 50., 51.],
[52., 53., 54., 55.],
[56., 57., 58., 59.]],
[[60., 61., 62., 63.],
[64., 65., 66., 67.],
[68., 69., 70., 71.]]],
[[[72., 73., 74., 75.],
[76., 77., 78., 79.],
[80., 81., 82., 83.]],
[[84., 85., 86., 87.],
[88., 89., 90., 91.],
[92., 93., 94., 95.]]]]).astype(np.float16)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_update_disordered_int8():
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int8)))
indices = Tensor(np.array([1, 2]).astype(np.int32))
updates = Tensor(np.arange(63, 71).reshape((2, 4)).astype(np.int8))
output = scatter_update_net(inputx, indices, updates)
expected = np.array([[45., 44., 43., 42.],
[63., 64., 65., 66.],
[67., 68., 69., 70.]]).astype(np.int8)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_update_large_shape_int8():
inputx = Tensor(np.arange(96).reshape((4, 2, 3, 4)).astype(np.int8))
indices = Tensor(np.array([1, 0]).astype(np.int32))
updates = Tensor(np.flip(np.arange(48).reshape((2, 2, 3, 4)).astype(np.int8)))
output = scatter_update_net(inputx, indices, updates)
expected = np.array([[[[23., 22., 21., 20.],
[19., 18., 17., 16.],
[15., 14., 13., 12.]],
[[11., 10., 9., 8.],
[7., 6., 5., 4.],
[3., 2., 1., 0.]]],
[[[47., 46., 45., 44.],
[43., 42., 41., 40.],
[39., 38., 37., 36.]],
[[35., 34., 33., 32.],
[31., 30., 29., 28.],
[27., 26., 25., 24.]]],
[[[48., 49., 50., 51.],
[52., 53., 54., 55.],
[56., 57., 58., 59.]],
[[60., 61., 62., 63.],
[64., 65., 66., 67.],
[68., 69., 70., 71.]]],
[[[72., 73., 74., 75.],
[76., 77., 78., 79.],
[80., 81., 82., 83.]],
[[84., 85., 86., 87.],
[88., 89., 90., 91.],
[92., 93., 94., 95.]]]]).astype(np.int8)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_update_large_uint8():
inputx = Tensor(np.zeros((4, 3)).astype(np.uint8))
indices = Tensor(np.array([[2, 1], [0, 3]]).astype(np.int32))
updates = Tensor(np.arange(63, 75).reshape((2, 2, 3)).astype(np.uint8))
output = scatter_update_net(inputx, indices, updates)
expected = np.array([[69., 70., 71.],
[66., 67., 68.],
[63., 64., 65.],
[72., 73., 74.]]).astype(np.uint8)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_update_disordered_uint8():
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.uint8)))
indices = Tensor(np.array([1, 2]).astype(np.int32))
updates = Tensor(np.arange(63, 71).reshape((2, 4)).astype(np.uint8))
output = scatter_update_net(inputx, indices, updates)
expected = np.array([[45., 44., 43., 42.],
[63., 64., 65., 66.],
[67., 68., 69., 70.]]).astype(np.uint8)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_update_large_shape_dynamic_int8():
inputx = Tensor(np.arange(96).reshape((4, 2, 3, 4)).astype(np.int8))
indices = Tensor(np.array([1, 0]).astype(np.int32))
updates = Tensor(np.flip(np.arange(48).reshape((2, 2, 3, 4)).astype(np.int8)))
output = scatter_update_d_net(inputx, indices, updates)
expected = np.array([[[[23., 22., 21., 20.],
[19., 18., 17., 16.],
[15., 14., 13., 12.]],
[[11., 10., 9., 8.],
[7., 6., 5., 4.],
[3., 2., 1., 0.]]],
[[[47., 46., 45., 44.],
[43., 42., 41., 40.],
[39., 38., 37., 36.]],
[[35., 34., 33., 32.],
[31., 30., 29., 28.],
[27., 26., 25., 24.]]],
[[[48., 49., 50., 51.],
[52., 53., 54., 55.],
[56., 57., 58., 59.]],
[[60., 61., 62., 63.],
[64., 65., 66., 67.],
[68., 69., 70., 71.]]],
[[[72., 73., 74., 75.],
[76., 77., 78., 79.],
[80., 81., 82., 83.]],
[[84., 85., 86., 87.],
[88., 89., 90., 91.],
[92., 93., 94., 95.]]]]).astype(np.int8)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_update_disordered_dynamic_int32():
inputx = Tensor(np.flip(np.arange(34, 46).reshape(3, 4).astype(np.int32)))
indices = Tensor(np.array([1, 2]).astype(np.int32))
updates = Tensor(np.arange(63, 71).reshape((2, 4)).astype(np.int32))
output = scatter_update_d_net(inputx, indices, updates)
expected = np.array([[45., 44., 43., 42.],
[63., 64., 65., 66.],
[67., 68., 69., 70.]]).astype(np.int32)
np.testing.assert_array_almost_equal(output.asnumpy(), expected)
@pytest.mark.level0
@pytest.mark.platform_x86_gpu_training
@pytest.mark.env_onecard
def test_scatter_update_two_inputs():
inputx = Tensor(np.zeros((2, 3)).astype(np.float32))
indices_1 = Tensor(np.array([0, 1]).astype(np.int32))
updates_1 = Tensor(np.arange(6).reshape((2, 3)).astype(np.float32))
indices_2 = Tensor(np.array([1]).astype(np.int32))
updates_2 = Tensor(np.arange(34, 37).reshape((1, 3)).astype(np.float32))
output_1, output_2 = scatter_update_d2_net(inputx, indices_1, updates_1,
indices_2, updates_2)
expected_1 = np.array([[0., 1., 2.],
[3., 4., 5.]], dtype=np.float32)
expected_2 = np.array([[0., 1., 2.],
[34., 35., 36.]], dtype=np.float32)
np.testing.assert_array_almost_equal(output_1.asnumpy(), expected_1)
np.testing.assert_array_almost_equal(output_2.asnumpy(), expected_2)