From 9c8ada03fedb2a5a85918e286c66306dd077110b Mon Sep 17 00:00:00 2001 From: kswang Date: Mon, 19 Apr 2021 10:47:13 +0800 Subject: [PATCH] cpu unique kernel support fp16 --- .../backend/kernel_compiler/cpu/cpu_kernel.h | 1 + .../kernel_compiler/cpu/unique_cpu_kernel.cc | 25 +++++++++++++------ .../kernel_compiler/cpu/unique_cpu_kernel.h | 3 ++- .../cpu/unique_with_pad_cpu_kernel.cc | 6 +++-- .../cpu/unique_with_pad_cpu_kernel.h | 2 +- tests/st/ops/cpu/test_unique_op.py | 12 +++++++++ .../cpu/unique_with_pad_cpu_kernel_test.cc | 6 ++--- 7 files changed, 41 insertions(+), 14 deletions(-) diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h index d60fcf3e9e9..9ead4d34d88 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/cpu_kernel.h @@ -57,6 +57,7 @@ const char GROUP[] = "group"; const char START[] = "start"; const char LIMIT[] = "limit"; const char DELTA[] = "delta"; +const char SORTED[] = "sorted"; enum OperateType { ADD = 0, diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.cc index fbdabb2f31f..23707b002d5 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.cc @@ -19,13 +19,16 @@ namespace mindspore { namespace kernel { -const size_t kUseBucketUniqueSize = 100000; +constexpr size_t kBucketSortThreshold = 100000; void UniqueCPUKernel::InitKernel(const CNodePtr &kernel_node) { node_wpt_ = kernel_node; CheckParam(kernel_node); auto input_shape = AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0); input_size_ = input_shape[0]; dtype_ = AnfAlgo::GetPrevNodeOutputInferDataType(kernel_node, 0); + if (AnfAlgo::HasNodeAttr(SORTED, kernel_node)) { + sorted_ = AnfAlgo::GetNodeAttr(kernel_node, SORTED); + } } void UniqueCPUKernel::InitInputOutputSize(const CNodePtr &kernel_node) { @@ -41,9 +44,11 @@ bool UniqueCPUKernel::Launch(const std::vector &inputs, if (dtype_ == kNumberTypeInt32) { LaunchKernel(inputs, workspace, outputs); } else if (dtype_ == kNumberTypeInt64) { - LaunchKernel(inputs, workspace, outputs); - } else if (dtype_ == kNumberTypeFloat32) { + LaunchKernel(inputs, workspace, outputs); + } else if (dtype_ == kNumberTypeFloat32 || dtype_ == kNumberTypeFloat16) { LaunchKernel(inputs, workspace, outputs); + } else { + MS_LOG(EXCEPTION) << "Not support type: " << dtype_; } if (!node_wpt_.expired()) { auto node_ = node_wpt_.lock(); @@ -86,12 +91,18 @@ void UniqueCPUKernel::LaunchKernel(const std::vector &inputs, const params->inverse_idx_ = reinterpret_cast(outputs[1]->addr); params->input_size_ = input_size_; params->output_size_ = 0; - params->need_sort_ = true; + params->thread_num_ = common::ThreadPool::GetInstance().GetSyncRunThreadNum(); - if (input_size_ < kUseBucketUniqueSize) { - Unique(params); + if (sorted_) { + params->need_sort_ = true; + if (input_size_ < kBucketSortThreshold) { + Unique(params); + } else { + BucketUnique(params); + } } else { - BucketUnique(params); + params->need_sort_ = false; + Unique(params); } output_size_ = params->output_size_; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.h index 4736441531a..41ec6159c20 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_cpu_kernel.h @@ -60,6 +60,7 @@ class UniqueCPUKernel : public CPUKernel { size_t input_size_{0}; TypeId dtype_{kTypeUnknown}; size_t output_size_{0}; + bool sorted_{false}; CNodeWeakPtr node_wpt_; template @@ -378,7 +379,7 @@ MS_REG_CPU_KERNEL( UniqueCPUKernel); MS_REG_CPU_KERNEL( - Unique, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt32), + Unique, KernelAttr().AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64), UniqueCPUKernel); MS_REG_CPU_KERNEL( diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_with_pad_cpu_kernel.cc b/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_with_pad_cpu_kernel.cc index 96cc8715ae6..0f774975f81 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_with_pad_cpu_kernel.cc +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_with_pad_cpu_kernel.cc @@ -26,11 +26,13 @@ bool UniqueWithPadCPUKernel::Launch(const std::vector &input UniqueCPUKernel::LaunchKernel(inputs, workspace, outputs); PadOutput(inputs, outputs); } else if (dtype_ == kNumberTypeInt64) { - UniqueCPUKernel::LaunchKernel(inputs, workspace, outputs); + UniqueCPUKernel::LaunchKernel(inputs, workspace, outputs); PadOutput(inputs, outputs); - } else if (dtype_ == kNumberTypeFloat32) { + } else if (dtype_ == kNumberTypeFloat32 || dtype_ == kNumberTypeFloat16) { UniqueCPUKernel::LaunchKernel(inputs, workspace, outputs); PadOutput(inputs, outputs); + } else { + MS_LOG(EXCEPTION) << "Not support data type: " << dtype_; } return true; } diff --git a/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_with_pad_cpu_kernel.h b/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_with_pad_cpu_kernel.h index 759ef8b4dfc..fdd0b8d3728 100644 --- a/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_with_pad_cpu_kernel.h +++ b/mindspore/ccsrc/backend/kernel_compiler/cpu/unique_with_pad_cpu_kernel.h @@ -51,7 +51,7 @@ MS_REG_CPU_KERNEL(UniqueWithPad, .AddInputAttr(kNumberTypeInt64) .AddInputAttr(kNumberTypeInt64) .AddOutputAttr(kNumberTypeInt64) - .AddOutputAttr(kNumberTypeInt32), + .AddOutputAttr(kNumberTypeInt64), UniqueWithPadCPUKernel); MS_REG_CPU_KERNEL(UniqueWithPad, diff --git a/tests/st/ops/cpu/test_unique_op.py b/tests/st/ops/cpu/test_unique_op.py index 4bf888bb11c..ecf1d93fa64 100644 --- a/tests/st/ops/cpu/test_unique_op.py +++ b/tests/st/ops/cpu/test_unique_op.py @@ -46,6 +46,18 @@ def test_net_fp32(): assert (output[0].asnumpy() == expect_y_result).all() assert (output[1].asnumpy() == expect_idx_result).all() +def test_net_fp16(): + x = Tensor(np.array([1, 5, 2, 2]), mstype.float16) + uniq = Net() + output = uniq(x) + print("x:\n", x) + print("y:\n", output[0]) + print("idx:\n", output[1]) + expect_y_result = [1., 5., 2.] + expect_idx_result = [0, 1, 2, 2] + + assert (output[0].asnumpy() == expect_y_result).all() + assert (output[1].asnumpy() == expect_idx_result).all() def test_net_int32(): x = Tensor(np.array([1, 2, 5, 2]), mstype.int32) diff --git a/tests/ut/cpp/kernel/cpu/unique_with_pad_cpu_kernel_test.cc b/tests/ut/cpp/kernel/cpu/unique_with_pad_cpu_kernel_test.cc index 25d5727cf25..e90f20a5fc6 100644 --- a/tests/ut/cpp/kernel/cpu/unique_with_pad_cpu_kernel_test.cc +++ b/tests/ut/cpp/kernel/cpu/unique_with_pad_cpu_kernel_test.cc @@ -55,7 +55,7 @@ class UniqueWithPadCpuKernelTest : public UT::Common { std::vector x_; int64_t pad_dim_; std::vector out_; - std::vector idx_; + std::vector idx_; std::vector workspace_idx_; std::vector inputs_; std::vector workspace_; @@ -73,8 +73,8 @@ TEST_F(UniqueWithPadCpuKernelTest, compute_test) { unique_with_pad_->Launch(inputs_, workspace_, outputs_); // check compute result - std::vector expect_out{1, 2, 3, 4, 5, 8, 8, 8, 8, 8}; - std::vector expect_idx{0, 0, 4, 4, 3, 3, 2, 2, 1, 1}; + std::vector expect_out{1, 5, 4, 3, 2, 8, 8, 8, 8, 8}; + std::vector expect_idx{0, 0, 1, 1, 2, 2, 3, 3, 4, 4}; EXPECT_TRUE(out_ == expect_out); EXPECT_TRUE(idx_ == expect_idx); }