forked from mindspore-Ecosystem/mindspore
fix cpu/gpu argmax op
This commit is contained in:
parent
609a518068
commit
d65a5affba
|
@ -18,48 +18,82 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
void ArgmaxCPUKernel::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
std::vector<size_t> shape = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
if (shape.size() != 2) {
|
||||
MS_LOG(EXCEPTION) << "argmax kernel dims invalid " << shape.size();
|
||||
}
|
||||
batch_size_ = shape[0];
|
||||
class_num_ = shape[1];
|
||||
|
||||
int64_t axis = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS);
|
||||
if (axis != -1 && axis != 1) {
|
||||
MS_LOG(EXCEPTION) << "argmax kernel not support axis " << axis;
|
||||
namespace {
|
||||
size_t get_element_num(const std::vector<size_t> &shape) {
|
||||
size_t size = 1;
|
||||
for (size_t i = 0; i < shape.size(); i++) {
|
||||
size *= shape[i];
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
bool ArgmaxCPUKernel::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> & /*workspaces*/,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
if (inputs.empty() || outputs.empty()) {
|
||||
MS_LOG(EXCEPTION) << "input or output empty!";
|
||||
template <typename T>
|
||||
bool check_validation(const std::vector<size_t> &shape, const size_t num_before_axis, const size_t num_after_axis,
|
||||
const std::vector<kernel::AddressPtr> &inputs, const std::vector<kernel::AddressPtr> &outputs) {
|
||||
if (inputs.size() != 1 || outputs.size() != 1) {
|
||||
MS_LOG(EXCEPTION) << "Wrong number of inputs or outputs!";
|
||||
return false;
|
||||
}
|
||||
size_t data_size = sizeof(T);
|
||||
size_t input_size = get_element_num(shape) * data_size;
|
||||
size_t output_num = num_before_axis * num_after_axis;
|
||||
size_t output_size = output_num * sizeof(int);
|
||||
if (inputs[0]->size != input_size || outputs[0]->size != output_size) {
|
||||
MS_LOG(EXCEPTION) << "invalid input or output data size!";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
template <typename T>
|
||||
void ArgmaxCPUKernel<T>::InitKernel(const CNodePtr &kernel_node) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_node);
|
||||
shape_ = AnfAlgo::GetInputDeviceShape(kernel_node, 0);
|
||||
size_t shape_len = shape_.size();
|
||||
int64_t axis = AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS);
|
||||
axis += shape_len;
|
||||
if (axis < 0) {
|
||||
MS_LOG(EXCEPTION) << "Invalid axis:" << axis << ", should in range [-1, " << shape_len - 1 << "]";
|
||||
}
|
||||
axis = axis % static_cast<int64_t>(shape_len);
|
||||
num_before_axis_ = 1;
|
||||
num_after_axis_ = 1;
|
||||
for (size_t i = 0; i < shape_len; i++) {
|
||||
if (static_cast<int64_t>(i) < axis) {
|
||||
num_before_axis_ *= shape_[i];
|
||||
} else if (static_cast<int64_t>(i) > axis) {
|
||||
num_after_axis_ *= shape_[i];
|
||||
}
|
||||
}
|
||||
dim_axis_ = shape_[axis];
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool ArgmaxCPUKernel<T>::Launch(const std::vector<kernel::AddressPtr> &inputs,
|
||||
const std::vector<kernel::AddressPtr> & /*workspaces*/,
|
||||
const std::vector<kernel::AddressPtr> &outputs) {
|
||||
if (!check_validation<T>(shape_, num_before_axis_, num_after_axis_, inputs, outputs)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t batch_float_size = batch_size_ * sizeof(float);
|
||||
size_t batch_class_float_size = class_num_ * batch_float_size;
|
||||
if (inputs[0]->size != batch_class_float_size || outputs[0]->size != batch_float_size) {
|
||||
MS_LOG(EXCEPTION) << "invalid input or output data size!";
|
||||
}
|
||||
auto input = reinterpret_cast<float *>(inputs[0]->addr);
|
||||
auto output = reinterpret_cast<int *>(outputs[0]->addr);
|
||||
size_t row_start = 0;
|
||||
for (size_t i = 0; i < batch_size_; ++i) {
|
||||
size_t max_index = 0;
|
||||
float max_value = input[row_start];
|
||||
for (size_t j = 1; j < class_num_; ++j) {
|
||||
size_t index = row_start + j;
|
||||
if (input[index] > max_value) {
|
||||
max_value = input[index];
|
||||
max_index = j;
|
||||
auto input = reinterpret_cast<T *>(inputs[0]->addr);
|
||||
auto output = reinterpret_cast<int32_t *>(outputs[0]->addr);
|
||||
|
||||
for (size_t i = 0; i < num_before_axis_; i++) {
|
||||
size_t src_index_i = i * dim_axis_ * num_after_axis_;
|
||||
for (size_t j = 0; j < num_after_axis_; j++) {
|
||||
std::vector<float> array_axis;
|
||||
size_t src_index_j = src_index_i + j;
|
||||
for (size_t k = 0; k < dim_axis_; k++) {
|
||||
size_t src_index_k = k * num_after_axis_ + src_index_j;
|
||||
array_axis.push_back(static_cast<float>(input[src_index_k]));
|
||||
}
|
||||
auto max_ops = std::max_element(array_axis.begin(), array_axis.end());
|
||||
auto max_index = static_cast<int32_t>(std::distance(array_axis.begin(), max_ops));
|
||||
auto dst_index = i * num_after_axis_ + j;
|
||||
output[dst_index] = max_index;
|
||||
}
|
||||
output[i] = SizeToInt(max_index);
|
||||
row_start += class_num_;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
template <typename T>
|
||||
class ArgmaxCPUKernel : public CPUKernel {
|
||||
public:
|
||||
ArgmaxCPUKernel() = default;
|
||||
|
@ -33,12 +34,16 @@ class ArgmaxCPUKernel : public CPUKernel {
|
|||
const std::vector<AddressPtr> &outputs) override;
|
||||
|
||||
private:
|
||||
size_t class_num_{0};
|
||||
size_t batch_size_{0};
|
||||
std::vector<size_t> shape_;
|
||||
size_t num_before_axis_;
|
||||
size_t num_after_axis_;
|
||||
size_t dim_axis_;
|
||||
};
|
||||
|
||||
MS_REG_CPU_KERNEL(Argmax, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeInt32),
|
||||
ArgmaxCPUKernel);
|
||||
MS_REG_CPU_KERNEL_T(Argmax, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeInt32),
|
||||
ArgmaxCPUKernel, float);
|
||||
MS_REG_CPU_KERNEL_T(Argmax, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeInt32),
|
||||
ArgmaxCPUKernel, float16);
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
||||
|
|
|
@ -18,9 +18,9 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
MS_REG_GPU_KERNEL_ONE(Argmax, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeInt32),
|
||||
ArgmaxGpuKernel, float)
|
||||
MS_REG_GPU_KERNEL_ONE(Argmax, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeInt32),
|
||||
ArgmaxGpuKernel, half)
|
||||
MS_REG_GPU_KERNEL_TWO(Argmax, KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeInt32),
|
||||
ArgmaxGpuKernel, float, int)
|
||||
MS_REG_GPU_KERNEL_TWO(Argmax, KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeInt32),
|
||||
ArgmaxGpuKernel, half, int)
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -23,11 +23,10 @@
|
|||
#include "backend/kernel_compiler/gpu/cuda_impl/argmax_impl.cuh"
|
||||
namespace mindspore {
|
||||
namespace kernel {
|
||||
#define ARGMAX_MAX_DIMENSION 2
|
||||
template <typename T>
|
||||
template <typename T, typename S>
|
||||
class ArgmaxGpuKernel : public GpuKernel {
|
||||
public:
|
||||
ArgmaxGpuKernel() : input_size_(0), output_size_(0), workspace_size_(0), batch_size_(0), channel_size_(0), axis_(0) {}
|
||||
ArgmaxGpuKernel() : input_size_(0), output_size_(0), workspace_size_(0), bound_(0), outer_size_(0), inner_size_(0) {}
|
||||
~ArgmaxGpuKernel() override = default;
|
||||
|
||||
const std::vector<size_t> &GetInputSizeList() const override { return input_size_list_; }
|
||||
|
@ -37,47 +36,38 @@ class ArgmaxGpuKernel : public GpuKernel {
|
|||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
|
||||
T *input = GetDeviceAddress<T>(inputs, 0);
|
||||
int *output = GetDeviceAddress<int>(outputs, 0);
|
||||
CalArgmax(input, SizeToInt(batch_size_), SizeToInt(channel_size_), axis_, output,
|
||||
reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
S *output = GetDeviceAddress<S>(outputs, 0);
|
||||
CalArgmax(input, bound_, outer_size_, inner_size_, output, reinterpret_cast<cudaStream_t>(stream_ptr));
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Init(const CNodePtr &kernel_node) override {
|
||||
size_t input_num = AnfAlgo::GetInputTensorNum(kernel_node);
|
||||
if (input_num != 1) {
|
||||
MS_LOG(ERROR) << "Input number is " << input_num << ", but argmax needs 1 input.";
|
||||
return false;
|
||||
auto shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
auto output_shape = AnfAlgo::GetOutputInferShape(kernel_node, 0);
|
||||
int64_t dims = shape.size();
|
||||
int64_t axis = GetAttr<int64_t>(kernel_node, "axis");
|
||||
if (axis < 0) {
|
||||
axis += dims;
|
||||
}
|
||||
size_t output_num = AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_num != 1) {
|
||||
MS_LOG(ERROR) << "Output number is " << output_num << ", but argmax needs 1 output.";
|
||||
return false;
|
||||
input_size_ = sizeof(T);
|
||||
for (auto x : shape) {
|
||||
input_size_ *= x;
|
||||
}
|
||||
auto output_type = GetValue<TypePtr>(AnfAlgo::GetCNodePrimitive(kernel_node)->GetAttr("output_type"));
|
||||
if (output_type->type_id() != TypeId::kNumberTypeInt32) {
|
||||
MS_LOG(EXCEPTION) << "Argmax only supports int32 output type.";
|
||||
output_size_ = sizeof(S);
|
||||
for (auto x : output_shape) {
|
||||
output_size_ *= x;
|
||||
}
|
||||
auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
|
||||
if (input_shape.size() > ARGMAX_MAX_DIMENSION) {
|
||||
MS_LOG(EXCEPTION) << "Input is " << input_shape.size() << "-D, but Argmax supports max " << ARGMAX_MAX_DIMENSION
|
||||
<< "-D inputs.";
|
||||
bound_ = static_cast<S>(shape[axis]);
|
||||
if (shape[axis] != static_cast<size_t>(bound_)) {
|
||||
MS_LOG(EXCEPTION) << "Bound's shape is larger than index type and overflows when casting.";
|
||||
}
|
||||
|
||||
axis_ = GetAttr<int64_t>(kernel_node, "axis");
|
||||
if (axis_ < 0) {
|
||||
axis_ += static_cast<int64_t>(input_shape.size());
|
||||
outer_size_ = 1;
|
||||
for (int64_t i = axis - 1; i >= 0; i--) {
|
||||
outer_size_ *= shape[i];
|
||||
}
|
||||
if (input_shape.size() == 1) {
|
||||
batch_size_ = 0;
|
||||
channel_size_ = input_shape[0];
|
||||
input_size_ = sizeof(T) * channel_size_;
|
||||
output_size_ = sizeof(int);
|
||||
} else {
|
||||
batch_size_ = input_shape[0];
|
||||
channel_size_ = input_shape[1];
|
||||
input_size_ = sizeof(T) * batch_size_ * channel_size_;
|
||||
output_size_ = (axis_ == 1) ? sizeof(int) * batch_size_ : sizeof(int) * channel_size_;
|
||||
inner_size_ = 1;
|
||||
for (int64_t i = axis + 1; i < dims; i++) {
|
||||
inner_size_ *= shape[i];
|
||||
}
|
||||
InitSizeLists();
|
||||
return true;
|
||||
|
@ -96,9 +86,9 @@ class ArgmaxGpuKernel : public GpuKernel {
|
|||
std::vector<size_t> input_size_list_;
|
||||
std::vector<size_t> output_size_list_;
|
||||
std::vector<size_t> workspace_size_list_;
|
||||
size_t batch_size_;
|
||||
size_t channel_size_;
|
||||
int64_t axis_;
|
||||
S bound_;
|
||||
size_t outer_size_;
|
||||
size_t inner_size_;
|
||||
};
|
||||
} // namespace kernel
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -17,72 +17,36 @@
|
|||
#include "argmax_impl.cuh"
|
||||
#include "runtime/device/gpu/cuda_common.h"
|
||||
#include "include/cuda_fp16.h"
|
||||
template <typename T>
|
||||
__global__ void Argmax1D(const T *input, const int channel_size, int *output) {
|
||||
int max_index = 0;
|
||||
T max = input[0];
|
||||
for (int pos = 1; pos < channel_size; pos++) {
|
||||
if (max < input[pos]) {
|
||||
max = input[pos];
|
||||
max_index = pos;
|
||||
template <typename T, typename S>
|
||||
__global__ void Argmax(const T *input, const S bound, const size_t outer_size,
|
||||
const size_t inner_size, S *output) {
|
||||
for (size_t pos = blockIdx.x * blockDim.x + threadIdx.x; pos < outer_size * inner_size;
|
||||
pos += gridDim.x * blockDim.x) {
|
||||
size_t x = pos / inner_size % outer_size;
|
||||
size_t y = pos % inner_size;
|
||||
S idx = 0;
|
||||
size_t input_offset = x * bound * inner_size + 0 * inner_size + y;
|
||||
T max_data = input[input_offset];
|
||||
for (S i = 1; i < bound; i++) {
|
||||
input_offset = x * bound * inner_size + i * inner_size + y;
|
||||
auto input_data = input[input_offset];
|
||||
idx = input_data > max_data ? i : idx;
|
||||
max_data = input_data > max_data ? input_data : max_data;
|
||||
}
|
||||
}
|
||||
output[0] = max_index;
|
||||
return;
|
||||
}
|
||||
template <typename T>
|
||||
__global__ void ArgmaxDefault2D(const T *input, const int batch_size, const int channel_size, int *output) {
|
||||
int pos;
|
||||
int max_index;
|
||||
T max;
|
||||
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < batch_size; i += blockDim.x * gridDim.x) {
|
||||
max = input[i * channel_size];
|
||||
max_index = 0;
|
||||
for (int j = 1; j < channel_size; j++) {
|
||||
pos = i * channel_size + j;
|
||||
if (max < input[pos]) {
|
||||
max = input[pos];
|
||||
max_index = j;
|
||||
}
|
||||
}
|
||||
|
||||
output[i] = max_index;
|
||||
}
|
||||
return;
|
||||
}
|
||||
template <typename T>
|
||||
__global__ void ArgmaxAxis2D(const T *input, const int batch_size, const int channel_size, int *output) {
|
||||
int pos;
|
||||
int max_index;
|
||||
T max;
|
||||
for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < channel_size; i += blockDim.x * gridDim.x) {
|
||||
max = input[i];
|
||||
max_index = 0;
|
||||
for (int j = 1; j < batch_size; j++) {
|
||||
pos = j * channel_size + i;
|
||||
if (max < input[pos]) {
|
||||
max = input[pos];
|
||||
max_index = j;
|
||||
}
|
||||
}
|
||||
output[i] = max_index;
|
||||
}
|
||||
return;
|
||||
}
|
||||
template <typename T>
|
||||
void CalArgmax(const T *input, const int batch_size, const int channel_size, const int64_t axis, int *output,
|
||||
cudaStream_t cuda_stream) {
|
||||
if (batch_size == 0) {
|
||||
Argmax1D<<<1, 1, 0, cuda_stream>>>(input, channel_size, output);
|
||||
} else if (axis == 1) {
|
||||
ArgmaxDefault2D<<<GET_BLOCKS(batch_size), GET_THREADS, 0, cuda_stream>>>(input, batch_size, channel_size, output);
|
||||
} else {
|
||||
ArgmaxAxis2D<<<GET_BLOCKS(channel_size), GET_THREADS, 0, cuda_stream>>>(input, batch_size, channel_size, output);
|
||||
output[pos] = idx;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
template void CalArgmax<float>(const float *input, const int batch_size, const int channel_size, const int64_t axis,
|
||||
int *output, cudaStream_t cuda_stream);
|
||||
template void CalArgmax<half>(const half *input, const int batch_size, const int channel_size, const int64_t axis,
|
||||
int *output, cudaStream_t cuda_stream);
|
||||
template <typename T, typename S>
|
||||
void CalArgmax(const T *input, const S bound, const size_t outer_size, const size_t inner_size,
|
||||
S *output, cudaStream_t cuda_stream) {
|
||||
Argmax<<<GET_BLOCKS(outer_size), GET_THREADS, 0, cuda_stream>>>(input, bound, outer_size, inner_size,
|
||||
output);
|
||||
return;
|
||||
}
|
||||
|
||||
template void CalArgmax<float, int>(const float *input, const int bound, const size_t outer_size,
|
||||
const size_t inner_size, int *output, cudaStream_t cuda_stream);
|
||||
template void CalArgmax<half, int>(const half *input, const int bound, const size_t outer_size,
|
||||
const size_t inner_size, int *output, cudaStream_t cuda_stream);
|
||||
|
|
|
@ -16,8 +16,8 @@
|
|||
|
||||
#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_ARGMAX_IMPL_CUH_
|
||||
#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_ARGMAX_IMPL_CUH_
|
||||
template <typename T>
|
||||
void CalArgmax(const T *input, const int batch_size, const int channel_size, const int64_t axis, int *output,
|
||||
template <typename T, typename S>
|
||||
void CalArgmax(const T *input, const S bound, const size_t outer_size, const size_t inner_size, S *output,
|
||||
cudaStream_t cuda_stream);
|
||||
|
||||
#endif // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_CUDA_IMPL_ARGMAX_IMPL_CUH_
|
||||
|
|
|
@ -13,6 +13,8 @@
|
|||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
import random
|
||||
from functools import reduce
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
@ -20,33 +22,59 @@ import mindspore.context as context
|
|||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore.common import dtype as mstype
|
||||
from mindspore.common.initializer import initializer
|
||||
from mindspore.common.parameter import Parameter
|
||||
from mindspore.ops import operations as P
|
||||
import mindspore.ops as ops
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
|
||||
|
||||
|
||||
class NetArgmax(nn.Cell):
|
||||
def __init__(self):
|
||||
def __init__(self, axis=0):
|
||||
super(NetArgmax, self).__init__()
|
||||
self.argmax = P.Argmax(output_type=mstype.int32)
|
||||
x = Tensor(np.array([[1., 20., 5.],
|
||||
[67., 8., 9.],
|
||||
[130., 24., 15.]]).astype(np.float32))
|
||||
self.x = Parameter(initializer(x, x.shape), name='x')
|
||||
self.argmax = ops.Argmax(axis=axis, output_type=mstype.int32)
|
||||
|
||||
def construct(self):
|
||||
return self.argmax(self.x)
|
||||
def construct(self, x):
|
||||
return self.argmax(x)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_argmax():
|
||||
Argmax = NetArgmax()
|
||||
output = Argmax()
|
||||
print("================================")
|
||||
expect = np.array([1, 0, 0]).astype(np.float32)
|
||||
print(output)
|
||||
def test_argmax_1d():
|
||||
x = Tensor(np.array([1., 20., 5.]).astype(np.float32))
|
||||
Argmax = NetArgmax(axis=0)
|
||||
output = Argmax(x)
|
||||
expect = np.array([1]).astype(np.float32)
|
||||
assert (output.asnumpy() == expect).all()
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_argmax_2d():
|
||||
x = Tensor(np.array([[1., 20., 5.],
|
||||
[67., 8., 9.],
|
||||
[130., 24., 15.]]).astype(np.float32))
|
||||
Argmax_axis_0 = NetArgmax(axis=0)
|
||||
output = Argmax_axis_0(x)
|
||||
expect = np.array([2, 2, 2]).astype(np.float32)
|
||||
assert (output.asnumpy() == expect).all()
|
||||
Argmax_axis_1 = NetArgmax(axis=1)
|
||||
output = Argmax_axis_1(x)
|
||||
expect = np.array([1, 0, 0]).astype(np.float32)
|
||||
assert (output.asnumpy() == expect).all()
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_cpu
|
||||
@pytest.mark.env_onecard
|
||||
def test_argmax_high_dims():
|
||||
for dim in range(3, 10):
|
||||
shape = np.random.randint(1, 10, size=dim)
|
||||
x = np.random.randn(reduce(lambda x, y: x * y, shape)).astype(np.float32)
|
||||
x = x.reshape(shape)
|
||||
|
||||
rnd_axis = random.randint(-dim + 1, dim - 1)
|
||||
Argmax = NetArgmax(axis=rnd_axis)
|
||||
ms_output = Argmax(Tensor(x))
|
||||
np_output = np.argmax(x, axis=rnd_axis)
|
||||
assert (ms_output.asnumpy() == np_output).all()
|
||||
|
|
|
@ -13,6 +13,8 @@
|
|||
# limitations under the License.
|
||||
# ============================================================================
|
||||
|
||||
import random
|
||||
from functools import reduce
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
@ -20,43 +22,67 @@ import mindspore.context as context
|
|||
import mindspore.nn as nn
|
||||
from mindspore import Tensor
|
||||
from mindspore.common import dtype as mstype
|
||||
from mindspore.ops import operations as P
|
||||
import mindspore.ops as ops
|
||||
|
||||
|
||||
class NetArgmax(nn.Cell):
|
||||
def __init__(self):
|
||||
def __init__(self, axis=0):
|
||||
super(NetArgmax, self).__init__()
|
||||
axis1 = 0
|
||||
axis2 = -1
|
||||
self.argmax1 = P.Argmax(axis1, output_type=mstype.int32)
|
||||
self.argmax2 = P.Argmax(axis2, output_type=mstype.int32)
|
||||
self.argmax3 = P.Argmax(output_type=mstype.int32)
|
||||
self.argmax = ops.Argmax(axis, output_type=mstype.int32)
|
||||
|
||||
def construct(self, x):
|
||||
return (self.argmax1(x), self.argmax2(x), self.argmax3(x))
|
||||
return self.argmax(x)
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_argmax():
|
||||
def test_argmax_1d():
|
||||
for mode in [context.PYNATIVE_MODE, context.GRAPH_MODE]:
|
||||
context.set_context(mode=mode, device_target="GPU")
|
||||
|
||||
x = Tensor(np.array([1., 20., 5.]).astype(np.float32))
|
||||
Argmax = NetArgmax(axis=0)
|
||||
output = Argmax(x)
|
||||
expect = np.array([1]).astype(np.float32)
|
||||
assert (output.asnumpy() == expect).all()
|
||||
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_argmax_2d():
|
||||
for mode in [context.PYNATIVE_MODE, context.GRAPH_MODE]:
|
||||
context.set_context(mode=mode, device_target="GPU")
|
||||
|
||||
x = Tensor(np.array([[1., 20., 5.],
|
||||
[67., 8., 9.],
|
||||
[130., 24., 15.],
|
||||
[0.3, -0.4, -15.]]).astype(np.float32))
|
||||
expect1 = np.array([2, 2, 2]).astype(np.int32)
|
||||
expect2 = np.array([1, 0, 0, 0]).astype(np.int32)
|
||||
Argmax_axis_0 = NetArgmax(axis=0)
|
||||
output = Argmax_axis_0(x)
|
||||
expect = np.array([2, 2, 2]).astype(np.int32)
|
||||
assert (output.asnumpy() == expect).all()
|
||||
|
||||
context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU")
|
||||
argmax = NetArgmax()
|
||||
output = argmax(x)
|
||||
assert (output[0].asnumpy() == expect1).all()
|
||||
assert (output[1].asnumpy() == expect2).all()
|
||||
assert (output[2].asnumpy() == expect2).all()
|
||||
Argmax_axis_1 = NetArgmax(axis=1)
|
||||
output = Argmax_axis_1(x)
|
||||
expect = np.array([1, 0, 0, 0]).astype(np.int32)
|
||||
assert (output.asnumpy() == expect).all()
|
||||
|
||||
context.set_context(mode=context.GRAPH_MODE, device_target="GPU")
|
||||
argmax1 = NetArgmax()
|
||||
output1 = argmax1(x)
|
||||
assert (output1[0].asnumpy() == expect1).all()
|
||||
assert (output1[1].asnumpy() == expect2).all()
|
||||
assert (output1[2].asnumpy() == expect2).all()
|
||||
|
||||
@pytest.mark.level0
|
||||
@pytest.mark.platform_x86_gpu_training
|
||||
@pytest.mark.env_onecard
|
||||
def test_argmax_high_dims():
|
||||
for mode in [context.PYNATIVE_MODE, context.GRAPH_MODE]:
|
||||
context.set_context(mode=mode, device_target="GPU")
|
||||
for dim in range(3, 10):
|
||||
shape = np.random.randint(1, 10, size=dim)
|
||||
x = np.random.randn(reduce(lambda x, y: x * y, shape)).astype(np.float32)
|
||||
x = x.reshape(shape)
|
||||
|
||||
rnd_axis = random.randint(-dim + 1, dim - 1)
|
||||
Argmax = NetArgmax(axis=rnd_axis)
|
||||
ms_output = Argmax(Tensor(x))
|
||||
np_output = np.argmax(x, axis=rnd_axis)
|
||||
assert (ms_output.asnumpy() == np_output).all()
|
||||
|
|
Loading…
Reference in New Issue