diff --git a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unique_gpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unique_gpu_kernel.h index 237bb1cbec0..e8a6c853c25 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unique_gpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/gpu/arrays/unique_gpu_kernel.h @@ -26,7 +26,7 @@ namespace kernel { template class UniqueGpuKernel : public GpuKernel { public: - UniqueGpuKernel() : input_size_(0), output_size_(0), workspace_size_(0), num_elements_(1), post_output_size_(0) {} + UniqueGpuKernel() { ResetResource(); } ~UniqueGpuKernel() override = default; const std::vector &GetInputSizeList() const override { return input_size_list_; } @@ -48,7 +48,7 @@ class UniqueGpuKernel : public GpuKernel { bool Init(const CNodePtr &kernel_node) override { kernel_node_ = kernel_node; - std::vector shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); + std::vector shape = AnfAlgo::GetInputRealDeviceShapeIfExist(kernel_node, 0); for (auto x : shape) { num_elements_ *= x; } @@ -77,6 +77,19 @@ class UniqueGpuKernel : public GpuKernel { AnfAlgo::SetOutputInferTypeAndShape(type_ids, shapes, kernel_node_.get()); } + void ResetResource() noexcept override { + input_size_ = 0; + output_size_ = 0; + workspace_size_ = 0; + num_elements_ = 1; + post_output_size_ = 0; + stream_ptr_ = nullptr; + kernel_node_ = nullptr; + input_size_list_.clear(); + output_size_list_.clear(); + workspace_size_list_.clear(); + } + protected: void InitSizeLists() override { input_size_list_.push_back(input_size_); diff --git a/mindspore/core/abstract/prim_arrays.cc b/mindspore/core/abstract/prim_arrays.cc index 42644edfd0c..c9369bc3a07 100644 --- a/mindspore/core/abstract/prim_arrays.cc +++ b/mindspore/core/abstract/prim_arrays.cc @@ -164,7 +164,10 @@ AbstractBasePtr InferImplUnique(const AnalysisEnginePtr &, const PrimitivePtr &p } ShapeVector ids_shape = {Shape::SHP_ANY}; ShapeVector min_shape = {1}; - ShapeVector max_shape = shape->shape(); + ShapeVector max_shape = shape->max_shape(); + if (max_shape.empty()) { + max_shape = shape->shape(); + } auto ids = std::make_shared(input->element(), std::make_shared(ids_shape, min_shape, max_shape)); // Currently we choose the same data type as input for the idx. @@ -174,7 +177,17 @@ AbstractBasePtr InferImplUnique(const AnalysisEnginePtr &, const PrimitivePtr &p if (input->element()->GetTypeTrack()->type_id() == TypeId::kNumberTypeInt64) { ids_idx_type = kInt64; } - auto ids_idx = std::make_shared(ids_idx_type, shape->shape()); + ShapeVector idx_shape = shape->shape(); + ShapeVector idx_min_shape = shape->min_shape(); + if (idx_min_shape.empty()) { + idx_min_shape = shape->shape(); + } + ShapeVector idx_max_shape = shape->max_shape(); + if (idx_max_shape.empty()) { + idx_max_shape = shape->shape(); + } + auto ids_idx = std::make_shared(ids_idx_type, idx_shape); + ids_idx->set_shape(std::make_shared(idx_shape, idx_min_shape, idx_max_shape)); // outputs: ids, ids_idx AbstractBasePtrList elements = {ids, ids_idx}; return std::make_shared(elements); diff --git a/tests/st/ops/gpu/test_unique_op.py b/tests/st/ops/gpu/test_unique_op.py index 4aa45a1c95c..a5070ac93f5 100644 --- a/tests/st/ops/gpu/test_unique_op.py +++ b/tests/st/ops/gpu/test_unique_op.py @@ -20,7 +20,7 @@ import mindspore.context as context import mindspore.nn as nn from mindspore import Tensor from mindspore.ops import operations as P - +from mindspore.ops.operations import _inner_ops as inner class NetUnique(nn.Cell): def __init__(self): @@ -32,6 +32,20 @@ class NetUnique(nn.Cell): return x_unique, x_idx +class NetUniqueDynamic(nn.Cell): + def __init__(self): + super(NetUniqueDynamic, self).__init__() + self.convert = inner.GpuConvertToDynamicShape() + self.unique = P.Unique() + self.split = P.Split(0, 2) + + def construct(self, x): + x_convert = self.convert(x) + x_unique, x_idx = self.unique(x_convert) + x_split = self.split(x_unique) + return x_unique, x_idx, x_split + + @pytest.mark.level0 @pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard @@ -224,3 +238,32 @@ def test_unique_large_int32(): x_unique, x_idx = net(x) assert (x_unique.asnumpy() == exp_output).all() assert (x_idx.asnumpy() == exp_idx).all() + + +@pytest.mark.level0 +@pytest.mark.platform_x86_gpu_training +@pytest.mark.env_onecard +def test_unique_dynamic(): + x = Tensor(np.array([4, 5, 1, 2, 3, 3, 4, 5, 6]).astype(np.float32)) + expt_unique = np.array([1, 2, 3, 4, 5, 6]).astype(np.float32) + expt_index = np.array([3, 4, 0, 1, 2, 2, 3, 4, 5]).astype(np.int32) + expt_split = np.array([[1, 2, 3], [4, 5, 6]]).astype(np.float32) + + x2 = Tensor(np.array([1, 1, 4, 4, 7, 8, 8]).astype(np.float32)) + expt_unique2 = np.array([1, 4, 7, 8]).astype(np.float32) + expt_index2 = np.array([0, 0, 1, 1, 2, 3, 3]).astype(np.int32) + expt_split2 = np.array([[1, 4], [7, 8]]).astype(np.float32) + + context.set_context(mode=context.GRAPH_MODE, device_target="GPU") + net = NetUniqueDynamic() + x_unique, x_idx, x_split = net(x) + assert (x_unique.asnumpy() == expt_unique).all() + assert (x_idx.asnumpy() == expt_index).all() + for i, out in enumerate(x_split): + assert (out.asnumpy() == expt_split[i]).all() + + x_unique2, x_idx2, x_split2 = net(x2) + assert (x_unique2.asnumpy() == expt_unique2).all() + assert (x_idx2.asnumpy() == expt_index2).all() + for i, out in enumerate(x_split2): + assert (out.asnumpy() == expt_split2[i]).all()