!44214 [MS][Kernel]add log_unform_candidate_sampler primitive

Merge pull request !44214 from zhaizhiqiang/master
2022-10-24 07:31:33 +00:00 · 2022-10-24 07:31:33 +00:00 · 90f66d904d
parent 330ffe038e a254fee827
commit 90f66d904d
65 changed files with 828 additions and 280 deletions
--- a/docs/api/api_python/mindspore.ops.function.rst
+++ b/docs/api/api_python/mindspore.ops.function.rst
@ -104,6 +104,17 @@ mindspore.ops.function

    mindspore.ops.cdist

+
+采样函数
+^^^^^^^^^^
+
+.. mscnplatformautosummary::
+    :toctree: ops
+    :nosignatures:
+    :template: classtemplate.rst
+
+    mindspore.ops.log_uniform_candidate_sampler
+
 数学运算函数
 ^^^^^^^^^^^^^^^^^

--- a/docs/api/api_python/ops/mindspore.ops.func_log_uniform_candidate_sampler.rst
+++ b/docs/api/api_python/ops/mindspore.ops.func_log_uniform_candidate_sampler.rst
@ -0,0 +1,32 @@
+mindspore.ops.log_uniform_candidate_sampler
+===========================================
+
+.. py:function:: mindspore.ops.log_uniform_candidate_sampler(true_classes, num_true=1, num_sampled=5, unique=True, range_max=5, seed=0)
+
+    使用log-uniform(Zipfian)分布对一组类别进行采样。
+
+    该操作从整数范围[0, `range_max` )中随机采样一个采样类( `sampled_candidates` )的Tensor。
+
+    **参数：**
+
+    - **true_classes** (Tensor) - 目标类，其数据类型为int64，shape为 :math:`(batch\_size, num\_true)` 。
+    - **num_true** (int) - 每个训练样本的目标类数。默认值：1。
+    - **num_sampled** (int) - 随机采样的类数。默认值：5。
+    - **unique** (bool) - 确认批处理中的所有采样类是否都是唯一的。如果 `unique` 为True，则批处理中的所有采样类都唯一。默认值：True。
+    - **range_max** (int) - 可能的类数。当 `unique` 为True时， `range_max` 必须大于或等于 `num_sampled` 。默认值：5。
+    - **seed** (int) - 随机种子，必须是非负。默认值：0。
+
+    **返回：**
+
+    3个Tensor组成的元组。
+
+    - **sampled_candidates** (Tensor) - shape为 :math:`(num\_sampled,)` 且数据类型与 `true_classes` 相同的Tensor。
+    - **true_expected_count** (Tensor) - shape与 `true_classes` 相同且数据类型为float32的Tensor。
+    - **sampled_expected_count** (Tensor) - shape与 `sampled_candidates` 相同且数据类型为float32的Tensor。
+
+    **异常：**
+
+    - **TypeError** - `num_true` 和 `num_sampled` 都不是int。
+    - **TypeError** - `unique` 不是bool。
+    - **TypeError** - `range_max` 和 `seed` 都不是int。
+    - **TypeError** - `true_classes` 不是Tensor。
--- a/mindspore/ccsrc/include/common/utils/utils.h
+++ b/mindspore/ccsrc/include/common/utils/utils.h
@ -408,6 +408,7 @@ constexpr auto kTransposeNODOpName = "TransposeNOD";
 constexpr auto kTransposeOpName = "Transpose";
 constexpr auto kTruncatedNormal = "TruncatedNormal";
 constexpr auto kUniformCandidateSamplerOpName = "UniformCandidateSampler";
+constexpr auto kLogUniformCandidateSamplerOpName = "LogUniformCandidateSampler";
 constexpr auto kUniformIntOpName = "UniformInt";
 constexpr auto kUniformRealOpName = "UniformReal";
 constexpr auto kUniqueConsecutiveOpName = "UniqueConsecutive";
--- a/mindspore/ccsrc/kernel/kernel.cc
+++ b/mindspore/ccsrc/kernel/kernel.cc
@ -191,6 +191,7 @@ int KernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector<Ke
  auto ret = KRET_OK;
  workspace_size_list_.clear();
  input_size_list_.clear();
+  input_shapes_.clear();
  for (auto &input : inputs) {
    size_t tensor_size = 0;
    size_t type_size = GetTypeByte(TypeIdToType(input->GetDtype()));
@ -204,8 +205,9 @@ int KernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector<Ke
      tensor_size = std::max(tensor_size, type_size);
    }
    (void)input_size_list_.emplace_back(tensor_size);
+    input_shapes_.emplace_back(shape);
  }
-
+  output_shapes_.clear();
  output_size_list_.clear();
  for (auto &output : outputs) {
    size_t tensor_size = 0;
@ -232,6 +234,7 @@ int KernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector<Ke
      tensor_size = std::max(tensor_size, type_size);
    }
    (void)output_size_list_.emplace_back(tensor_size);
+    output_shapes_.emplace_back(shape);
  }
  return static_cast<int>(ret);
 }
--- a/mindspore/ccsrc/kernel/kernel.h
+++ b/mindspore/ccsrc/kernel/kernel.h
@ -218,6 +218,7 @@ struct TensorInfo {
 using TensorInfoPtr = std::shared_ptr<TensorInfo>;
 using BaseOperatorPtr = std::shared_ptr<ops::BaseOperator>;

+class KernelAttr;
 class BACKEND_EXPORT KernelTensor {
 public:
  KernelTensor() = default;
@ -286,6 +287,8 @@ class BACKEND_EXPORT KernelMod {
  virtual const std::vector<size_t> &GetInputSizeList() const { return input_size_list_; }
  virtual const std::vector<size_t> &GetOutputSizeList() const { return output_size_list_; }
  virtual const std::vector<size_t> &GetWorkspaceSizeList() const { return workspace_size_list_; }
+  virtual const std::vector<std::vector<int64_t>> &GetInputShapes() const { return input_shapes_; }
+  virtual const std::vector<std::vector<int64_t>> &GetOutputShapes() const { return output_shapes_; }
  virtual bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
                      const std::vector<AddressPtr> &outputs, void *stream_ptr) = 0;
  virtual std::vector<size_t> GenParameters() { return {}; }
@ -295,11 +298,9 @@ class BACKEND_EXPORT KernelMod {
                    const std::vector<KernelTensorPtr> & /* outputs */) {
    return true;
  }
-
-  // Resize is for updating shape related information and performing shape related operation(e.g., calculate output
-  // tensor size and allocate output tensor memory).
-  // sometimes resize need the input tensor data, framework will sync and retain these tensor data from device to host
-  // and pass them by the param inputsOnHost
+  virtual std::vector<KernelAttr> GetOpSupport() = 0;
+  // Resize() is for validating input/output shape and calculating the workspace size, framework will invoke this
+  // routine after infer shape.
  virtual int Resize(
    const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
    const std::vector<KernelTensorPtr> &outputs,
@ -342,7 +343,9 @@ class BACKEND_EXPORT KernelMod {
  std::string fullname_;
  bool is_monad_{false};
  std::vector<size_t> input_size_list_;
+  std::vector<std::vector<int64_t>> input_shapes_;
  std::vector<size_t> output_size_list_;
+  std::vector<std::vector<int64_t>> output_shapes_;
  std::vector<size_t> workspace_size_list_;
  bool is_need_retrieve_output_shape_ = false;
  uint32_t device_id_ = 0;
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/ascend_kernel_mod.h
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/ascend_kernel_mod.h
@ -22,6 +22,7 @@
 #include <string>
 #include "plugin/device/ascend/hal/device/ge_runtime/task_info.h"
 #include "kernel/kernel.h"
+#include "kernel/common_utils.h"
 #ifndef ENABLE_SECURITY
 #include "debug/data_dump/dump_json_parser.h"
 #endif
@ -50,6 +51,7 @@ class AscendKernelMod : public KernelMod {
  bool IsNeedRetrieveOutputShape() override;
  void SetAtomicCleanNodes(const std::vector<CNodePtr> &atomic_clean_node);
  std::string GetAtomicCompileInfo() const { return atomic_compile_info_; }
+  std::vector<KernelAttr> GetOpSupport() override { return {}; }

 protected:
  virtual void UpdateOutputSizeList();
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/host/reshape_kernel.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/host/reshape_kernel.cc
@ -31,7 +31,7 @@ namespace kernel {
 namespace {
 constexpr size_t kInputNum = 2;

-std::vector<int64_t> GetInputValue(const CNodePtr &cnode, size_t index) {
+static std::vector<int64_t> GetInputValue(const CNodePtr &cnode, size_t index) {
  auto address_x = AnfAlgo::GetPrevNodeMutableOutputAddr(cnode, index);
  MS_EXCEPTION_IF_NULL(address_x);
  auto shape_x = common::AnfAlgo::GetPrevNodeOutputInferShape(cnode, index);
@ -69,13 +69,13 @@ std::vector<int64_t> GetInputValue(const CNodePtr &cnode, size_t index) {
  return input_shape;
 }

-int64_t GetArrProd(const CNodePtr &cnode) {
+static int64_t GetArrProd(const CNodePtr &cnode) {
  auto shape_x = common::AnfAlgo::GetPrevNodeOutputInferShape(cnode, 0);
  auto arr_prod = std::accumulate(shape_x.begin(), shape_x.end(), static_cast<int64_t>(1), std::multiplies<int64_t>());
  return arr_prod;
 }

-std::vector<int64_t> GetOutputShapes(const CNodePtr &cnode) {
+static std::vector<int64_t> GetOutputShapesFromCNode(const CNodePtr &cnode) {
  std::vector<int64_t> output_shapes;
  auto input_num = common::AnfAlgo::GetInputTensorNum(cnode);
  if (input_num != kInputNum) {
@ -135,7 +135,7 @@ void ReshapeKernelMod::Execute() const {
  auto address_x = AnfAlgo::GetPrevNodeMutableOutputAddr(cnode, 0);
  MS_EXCEPTION_IF_NULL(address_x);

-  std::vector<int64_t> output_shapes = GetOutputShapes(cnode);
+  std::vector<int64_t> output_shapes = GetOutputShapesFromCNode(cnode);
  auto type_x = common::AnfAlgo::GetOutputInferDataType(cnode, 0);

  size_t input_size_byte = LongToSize(GetArrProd(cnode)) * abstract::TypeIdSize(type_x);
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/akg/akg_cpu_kernel_mod.h
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/akg/akg_cpu_kernel_mod.h
@ -52,6 +52,8 @@ class AkgCpuKernelMod : public CpuKernelMod {
  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
              const std::vector<AddressPtr> &outputs, void *) override;

+  std::vector<KernelAttr> GetOpSupport() { return {}; }
+
  static AkgCpuKernelManagerPtr kernel_manager_;

 private:
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/cpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/cpu_kernel.h
@ -158,7 +158,8 @@ class BACKEND_EXPORT NativeCpuKernelMod : public CpuKernelMod {
    return temp_mod->GetAllSupportedList(kernel_name);
  }

-  virtual std::vector<KernelAttr> GetOpSupport() { return {}; }
+  std::vector<KernelAttr> GetOpSupport() { return {}; }
+
  enum KernelModType GetKernelModType() const override { return KernelModType::NativeCpuKernelMod; }

  ParallelSearchInfo parallel_search_info_;
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/gather_d_cpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/gather_d_cpu_kernel.h
@ -38,6 +38,7 @@ class GatherDCpuKernelMod : public NativeCpuKernelMod, public MatchKernelHelper<
    return kernel_func_(this, inputs, workspace, outputs);
  }
  const std::vector<std::pair<KernelAttr, KernelRunFunc>> &GetFuncList() const override;
+  std::vector<KernelAttr> GetOpSupport() override { return OpSupport(); }

 private:
  template <typename T, typename I>
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/log_uniform_candidate_sampler_cpu_kernel.cc
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/log_uniform_candidate_sampler_cpu_kernel.cc
@ -0,0 +1,159 @@
+/**
+ * Copyright 2020-2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "plugin/device/cpu/kernel/log_uniform_candidate_sampler_cpu_kernel.h"
+#include <cmath>
+#include <map>
+#include <utility>
+#include <algorithm>
+#include <unordered_set>
+#include "plugin/device/cpu/hal/device/cpu_device_address.h"
+#include "mindspore/core/ops/log_uniform_candidate_sampler.h"
+#include "utils/shape_utils.h"
+
+namespace mindspore {
+namespace kernel {
+bool LogUniformCandidateSamplerCpuKernel::Init(const BaseOperatorPtr &base_operator,
+                                               const std::vector<KernelTensorPtr> &inputs,
+                                               const std::vector<KernelTensorPtr> &outputs) {
+  auto op = std::dynamic_pointer_cast<ops::LogUniformCandidateSampler>(base_operator);
+  if (op == nullptr) {
+    MS_LOG(ERROR) << "cast op LogUniformCandidateSampler failed!";
+    return false;
+  }
+  this->num_true_ = op->get_num_true();
+  this->num_sampled_ = op->get_num_sampled();
+  this->unique_ = op->get_unique();
+  this->seed_ = op->get_seed();
+  this->range_max_ = op->get_range_max();
+  this->log_range_ = log1p(range_max_);
+  if (unique_ && range_max_ < num_sampled_) {
+    MS_LOG(ERROR) << "When unique is True, range_max must be greater than or equal to num_sampled";
+    return false;
+  }
+  int64_t seed = 87654321;
+  int64_t seed2 = seed_;
+  generator_.Init(seed, seed2);
+  reserveSamplesNr_ = 2048 * num_sampled_;
+  return true;
+}
+
+static float CalcExpectedCount(float p, int num_sampled, int num_tries) {
+  if (num_tries == num_sampled) {
+    return p * num_sampled;
+  }
+  return -std::expm1(num_tries * std::log1p(-p));
+}
+
+float LogUniformCandidateSamplerCpuKernel::Probability(int64_t value) const {
+  return (log((value + 2.0) / (value + 1.0))) / log_range_;
+}
+
+int64_t LogUniformCandidateSamplerCpuKernel::Sample(random::SinglePhiloxRandom *single) {
+  double d = single->GenDouble();
+  int64_t val = static_cast<int64_t>(exp(d * log_range_)) - 1;
+  return val % range_max_;
+}
+
+int LogUniformCandidateSamplerCpuKernel::Resize(const BaseOperatorPtr &base_operator,
+                                                const std::vector<KernelTensorPtr> &inputs,
+                                                const std::vector<KernelTensorPtr> &outputs,
+                                                const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost) {
+  int ret = KRET_OK;
+  if ((ret = NativeCpuKernelMod::Resize(base_operator, inputs, outputs)) != 0) {
+    return ret;
+  }
+  auto true_classes_shape = input_shapes_.at(0);
+  if (true_classes_shape.size() != 2) {
+    MS_LOG(ERROR) << "input true_classes dims should be 2.";
+    return KRET_RESIZE_FAILED;
+  }
+
+  if (true_classes_shape[1] != num_true_) {
+    MS_LOG(ERROR) << "input true_classes dim[1] should equal to num_true, true_classes.dim[1] = "
+                  << true_classes_shape[1] << ", num_true = " << num_true_;
+    return KRET_RESIZE_FAILED;
+  }
+
+  auto sampled_candidates_shape = output_shapes_.at(0);
+  if (sampled_candidates_shape.size() != 1 || sampled_candidates_shape[0] != static_cast<int64_t>(num_sampled_)) {
+    MS_LOG(ERROR) << "output sampled_candidates shape should equal to (num_sampled, ), sampled_candidates shape = "
+                  << VectorToString(sampled_candidates_shape) << ", num_sampled_ = " << num_sampled_;
+    return KRET_RESIZE_FAILED;
+  }
+
+  auto true_expected_count_shape = output_shapes_.at(1);
+  if (true_expected_count_shape != true_classes_shape) {
+    MS_LOG(ERROR)
+      << "output true_expected_count shape should be same with true_classes shape, true_expected_count shape = "
+      << VectorToString(true_expected_count_shape) << ", true_classes shape = " << VectorToString(true_classes_shape);
+    return KRET_RESIZE_FAILED;
+  }
+
+  auto sampled_expected_count_shape = output_shapes_.at(2);
+  if (sampled_expected_count_shape.size() != 1 ||
+      sampled_expected_count_shape[0] != static_cast<int64_t>(num_sampled_)) {
+    MS_LOG(ERROR)
+      << "output sampled_expected_count shape shape should equal to (num_sampled, ), sampled_expected_count shape = "
+      << VectorToString(sampled_candidates_shape) << ", num_sampled_ = " << num_sampled_;
+    return KRET_RESIZE_FAILED;
+  }
+  return ret;
+}
+bool LogUniformCandidateSamplerCpuKernel::Launch(const std::vector<AddressPtr> &inputs,
+                                                 const std::vector<AddressPtr> &workspace,
+                                                 const std::vector<AddressPtr> &outputs) {
+  int64_t *true_classes = static_cast<int64_t *>(inputs.at(0)->addr);
+  auto true_classes_size = input_size_list_.at(0);
+  int64_t *sampled_candidates = static_cast<int64_t *>(outputs.at(0)->addr);
+  float *true_expected_count = static_cast<float *>(outputs.at(1)->addr);
+  float *sampled_expected_count = static_cast<float *>(outputs.at(2)->addr);
+
+  auto gen = generator_.ReserveSamples32(reserveSamplesNr_);
+
+  random::SinglePhiloxRandom single(&gen);
+
+  int num_tries = 0;
+  if (unique_) {
+    std::unordered_set<int64_t> used(num_sampled_);
+    int32_t idx = 0;
+    while (idx < num_sampled_) {
+      num_tries++;
+      int64_t value = Sample(&single);
+      if (used.find(value) == used.end()) {
+        sampled_candidates[idx++] = value;
+        used.emplace(value);
+      }
+    }
+  } else {
+    for (int32_t idx = 0; idx < num_sampled_; idx++) {
+      sampled_candidates[idx] = Sample(&single);
+    }
+    num_tries = num_sampled_;
+  }
+
+  for (int32_t i = 0; i < num_sampled_; i++) {
+    sampled_expected_count[i] = CalcExpectedCount(Probability(sampled_candidates[i]), num_sampled_, num_tries);
+  }
+
+  for (size_t i = 0; i < true_classes_size; i++) {
+    true_expected_count[i] = CalcExpectedCount(Probability(true_classes[i]), num_sampled_, num_tries);
+  }
+  return true;
+}
+MS_KERNEL_FACTORY_REG(NativeCpuKernelMod, LogUniformCandidateSampler, LogUniformCandidateSamplerCpuKernel);
+}  // namespace kernel
+}  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/log_uniform_candidate_sampler_cpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/log_uniform_candidate_sampler_cpu_kernel.h
@ -0,0 +1,73 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_CANDIDATE_SAMPLER_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_CANDIDATE_SAMPLER_CPU_KERNEL_H_
+
+#include <functional>
+#include <memory>
+#include <vector>
+#include <iostream>
+#include <string>
+#include <complex>
+#include <map>
+#include <utility>
+
+#include "plugin/device/cpu/kernel/cpu_kernel.h"
+#include "plugin/factory/ms_factory.h"
+#include "plugin/device/cpu/kernel/random_util.h"
+
+namespace mindspore {
+namespace kernel {
+class LogUniformCandidateSamplerCpuKernel : public NativeCpuKernelMod {
+ public:
+  LogUniformCandidateSamplerCpuKernel() = default;
+  ~LogUniformCandidateSamplerCpuKernel() = default;
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs) override;
+  int Resize(
+    const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
+    const std::vector<KernelTensorPtr> &outputs,
+    const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost = std::map<uint32_t, tensor::TensorPtr>()) override;
+
+  bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
+            const std::vector<KernelTensorPtr> &outputs) override;
+  std::vector<KernelAttr> GetOpSupport() override {
+    std::vector<KernelAttr> support = {KernelAttr()
+                                         .AddInputAttr(kNumberTypeInt64)
+                                         .AddOutputAttr(kNumberTypeInt64)
+                                         .AddOutputAttr(kNumberTypeFloat32)
+                                         .AddOutputAttr(kNumberTypeFloat32)};
+    return support;
+  }
+
+ private:
+  int64_t Sample(random::SinglePhiloxRandom *single);
+  float Probability(int64_t value) const;
+
+ private:
+  int32_t num_true_;
+  int32_t num_sampled_;
+  bool unique_;
+  int32_t range_max_;
+  random::GuardedPhiloxRandom generator_;
+  double log_range_;
+  int32_t seed_;
+  int64_t reserveSamplesNr_;
+};
+}  // namespace kernel
+}  // namespace mindspore
+#endif  // MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_CANDIDATE_SAMPLER_CPU_KERNEL_H_
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/random_util.cc
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/random_util.cc
@ -66,6 +66,13 @@ double Uint64ToDouble(uint32_t x0, uint32_t x1) {
  return d_result - 1.0;
 }

+float Uint32ToFloat(uint32_t x) {
+  uint32_t val = (127 << 23) | (x & 0x7fffffu);
+  float f;
+  memcpy(&f, &val, sizeof(val));
+  return f - 1.0f;
+}
+
 void BoxMullerDouble(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, double *data0, double *data1) {
  const double epsilon = 1.0e-7;
  double u1 = Uint64ToDouble(x0, x1);
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/random_util.h
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/random_util.h
@ -18,6 +18,7 @@
 #define MINDSPORE_CCSRC_BACKEND_KERNEL_GUARDED_PHILOX_RANDOM_H

 #include <cstdint>
+#include <iostream>
 #include <mutex>
 #include <string>
 #include "Eigen/Core"
@ -169,6 +170,8 @@ class GuardedPhiloxRandom {

  random::MSPhiloxRandom ReserveSamples128(uint64_t samples);

+  random::MSPhiloxRandom ReserveSamples32(int64_t samples) { return ReserveSamples128((samples + 3) / 4); }
+
  random::MSPhiloxRandom ReserveRandomOutputs(int64_t output_count, int multiplier) {
    int64_t conservative_sample_count = output_count * multiplier;
    return ReserveSamples128(conservative_sample_count);
@ -185,6 +188,36 @@ class GuardedPhiloxRandom {
 };

 double Uint64ToDouble(uint32_t x0, uint32_t x1);
+float Uint32ToFloat(uint32_t x);
+class SinglePhiloxRandom {
+ public:
+  explicit SinglePhiloxRandom(MSPhiloxRandom *gen)
+      : generator_(gen), group_random_idx_(MSPhiloxRandom::kResultElementCount) {}
+  uint32_t GenUint32() {
+    if (group_random_idx_ == MSPhiloxRandom::kResultElementCount) {
+      group_random_ = (*generator_)();
+      group_random_idx_ = 0;
+    }
+    return group_random_[group_random_idx_++];
+  }
+  uint64_t GenUint64() {
+    uint32_t lo = GenUint32(), hi = GenUint32();
+    return lo | static_cast<uint64_t>(hi) << 32;
+  }
+  float GenFloat() {
+    uint32_t u0 = GenUint32();
+    return Uint32ToFloat(u0);
+  }
+  double GenDouble() {
+    uint32_t lo = GenUint32(), hi = GenUint32();
+    return Uint64ToDouble(lo, hi);
+  }
+
+ private:
+  MSPhiloxRandom *generator_;
+  MSPhiloxRandom::ResType group_random_;
+  int group_random_idx_ = 0;
+};

 void BoxMullerDouble(uint32_t x0, uint32_t x1, uint32_t x2, uint32_t x3, double *data0, double *data1);

--- a/mindspore/ccsrc/plugin/device/cpu/kernel/space_to_batch_nd_cpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/space_to_batch_nd_cpu_kernel.h
@ -48,6 +48,8 @@ class SpaceToBatchNDCpuKernelMod : public NativeCpuKernelMod, public MatchKernel

  const std::vector<std::pair<KernelAttr, KernelRunFunc>> &GetFuncList() const override;

+  std::vector<KernelAttr> GetOpSupport() override { return OpSupport(); }
+
 private:
  void CheckParam();

--- a/mindspore/ccsrc/plugin/device/gpu/kernel/akg/akg_gpu_kernel_mod.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/akg/akg_gpu_kernel_mod.h
@ -23,6 +23,7 @@
 #include <memory>
 #include "kernel/kernel.h"
 #include "plugin/device/gpu/kernel/gpu_kernel_mod.h"
+#include "kernel/common_utils.h"

 namespace mindspore {
 namespace kernel {
@ -65,6 +66,7 @@ class AkgGpuKernelMod : public GpuKernelMod {
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override;

  static AkgGpuKernelManagerPtr kernel_manager_;
+  std::vector<KernelAttr> GetOpSupport() override { return {}; }

 private:
  KernelPackPtr kernel_pack_;
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/adjust_hue_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/adjust_hue_gpu_kernel.h
@ -47,8 +47,9 @@ class AdjustHueGpuKernelMod : public NativeGpuKernelMod {
             const std::vector<KernelTensorPtr> &outputs,
             const std::map<uint32_t, tensor::TensorPtr> &others = std::map<uint32_t, tensor::TensorPtr>()) override;

- protected:
  std::vector<KernelAttr> GetOpSupport() override;
+
+ protected:
  template <typename T>
  bool LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
                    const std::vector<AddressPtr> &outputs, void *stream_ptr);
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/adjust_saturation_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/adjust_saturation_gpu_kernel.h
@ -47,8 +47,9 @@ class AdjustSaturationGpuKernelMod : public NativeGpuKernelMod {
             const std::vector<KernelTensorPtr> &outputs,
             const std::map<uint32_t, tensor::TensorPtr> &others = std::map<uint32_t, tensor::TensorPtr>()) override;

- protected:
  std::vector<KernelAttr> GetOpSupport() override;
+
+ protected:
  template <typename T>
  bool LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
                    const std::vector<AddressPtr> &outputs, void *stream_ptr);
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/affine_grid_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/affine_grid_gpu_kernel.h
@ -50,7 +50,6 @@ class AffineGridGpuKernelMod : public NativeGpuKernelMod {
    return kernel_func_(this, inputs, workspace, outputs, stream_ptr);
  }

- protected:
  std::vector<KernelAttr> GetOpSupport() override;

 private:
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/gather_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/gather_grad_gpu_kernel.h
@ -46,8 +46,9 @@ class GatherGradGpuKernelMod : public NativeGpuKernelMod {
             const std::vector<KernelTensorPtr> &outputs,
             const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost) override;

- protected:
  std::vector<KernelAttr> GetOpSupport() override;
+
+ protected:
  template <typename T, typename S>
  bool LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs, void *stream_ptr);

--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/gatherv2_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/gatherv2_gpu_kernel.h
@ -56,8 +56,9 @@ class GatherV2FwdGpuKernelMod : public NativeGpuKernelMod {
    workspace_size_list_.clear();
  }

- protected:
  std::vector<KernelAttr> GetOpSupport() override;
+
+ protected:
  void InitSizeLists() {
    auto input_size = std::accumulate(input_shapes_.begin(), input_shapes_.end(), 1, std::multiplies{});
    auto indices_size = std::accumulate(indices_shapes_.begin(), indices_shapes_.end(), 1, std::multiplies{});
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/index_fill_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/index_fill_gpu_kernel.h
@ -46,7 +46,6 @@ class IndexFillGpuKernelMod : public NativeGpuKernelMod {
  int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
             const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;

- protected:
  std::vector<KernelAttr> GetOpSupport() override;

 private:
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/one_hot_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/one_hot_gpu_kernel.h
@ -42,7 +42,6 @@ class OneHotGpuKernelMod : public NativeGpuKernelMod {
  int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
             const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;

- protected:
  std::vector<KernelAttr> GetOpSupport() override;

 private:
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/gpu_kernel.h
@ -116,12 +116,12 @@ class NativeGpuKernelMod : public GpuKernelMod {
    }
    return Factory<NativeGpuKernelMod>::Instance().Create(kernel_name)->GetAllSupportedList(kernel_name);
  }
+  std::vector<KernelAttr> GetOpSupport() { return {}; }
  static bool GpuCheckSupport(const std::string &kernel_name, const KernelAttr &kernel_attr);

  static ReducePrecisonRes GpuReducePrecisionCheck(const std::string &kernel_name, const KernelAttr &kernel_attr) {
    return Factory<NativeGpuKernelMod>::Instance().Create(kernel_name)->ReducePrecisionCheck(kernel_name, kernel_attr);
  }
-  virtual std::vector<KernelAttr> GetOpSupport() { return {}; }
  enum KernelModType GetKernelModType() const override { return KernelModType::NativeGpuKernelMod; }

 protected:
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/cumsum_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/cumsum_gpu_kernel.h
@ -46,7 +46,6 @@ class CumSumGpuKernelMod : public NativeGpuKernelMod {
  int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
             const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;

- protected:
  std::vector<KernelAttr> GetOpSupport() override;

 private:
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/fft_with_size_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/fft_with_size_gpu_kernel.h
@ -54,7 +54,6 @@ class FFTWithSizeGpuKernelMod : public NativeGpuKernelMod {
    return launch_func_(this, inputs, workspace, outputs, stream_ptr);
  }

- protected:
  std::vector<KernelAttr> GetOpSupport() override;

 private:
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/float_status_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/float_status_gpu_kernel.h
@ -45,9 +45,9 @@ class FloatStatusGpuKernelMod : public NativeGpuKernelMod {
             const std::vector<KernelTensorPtr> &outputs,
             const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost) override;

- protected:
  std::vector<KernelAttr> GetOpSupport() override;

+ protected:
  template <typename T>
  bool LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);

--- a/mindspore/ccsrc/plugin/device/gpu/kernel/math/non_max_suppression_with_overlaps_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/math/non_max_suppression_with_overlaps_gpu_kernel.h
@ -55,8 +55,9 @@ class NMSWithOverlapsFwdGpuKernelMod : public NativeGpuKernelMod {
    return kernel_func_(this, inputs, workspace, outputs, stream_ptr);
  }

- protected:
  std::vector<KernelAttr> GetOpSupport() override;
+
+ protected:
  template <typename T>
  bool LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
                    const std::vector<AddressPtr> &outputs, void *stream_ptr);
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/apply_proximal_gradient_descent_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/apply_proximal_gradient_descent_gpu_kernel.h
@ -47,7 +47,6 @@ class ApplyProximalGradientDescentGpuKernelMod : public NativeGpuKernelMod {
    return kernel_func_(this, inputs, workspace, outputs);
  }

- protected:
  std::vector<KernelAttr> GetOpSupport() override;

 private:
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/gelu_grad_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/gelu_grad_kernel.h
@ -40,7 +40,6 @@ class GeLUGradGpuKernelMod : public NativeGpuKernelMod {
    return kernel_func_(this, inputs, workspace, outputs, stream_ptr);
  }

- protected:
  std::vector<KernelAttr> GetOpSupport() override;

 private:
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/gelu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/gelu_kernel.h
@ -52,7 +52,6 @@ class GeLUGpuKernelMod : public NativeGpuKernelMod {
    return kernel_func_(this, inputs, outputs);
  }

- protected:
  std::vector<KernelAttr> GetOpSupport() override;

 private:
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/hsigmoid_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/hsigmoid_gpu_kernel.h
@ -52,7 +52,6 @@ class HSigmoidGpuKernelMod : public NativeGpuKernelMod {
    return kernel_func_(this, inputs, outputs);
  }

- protected:
  std::vector<KernelAttr> GetOpSupport() override;

 private:
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/hsigmoid_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/hsigmoid_grad_gpu_kernel.h
@ -52,7 +52,6 @@ class HSigmoidGradGpuKernelMod : public NativeGpuKernelMod {
    return kernel_func_(this, inputs, outputs);
  }

- protected:
  std::vector<KernelAttr> GetOpSupport() override;

 private:
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/hswish_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/hswish_gpu_kernel.h
@ -40,7 +40,6 @@ class HSwishGpuKernelMod : public NativeGpuKernelMod {
    return kernel_func_(this, inputs, workspace, outputs, stream_ptr);
  }

- protected:
  std::vector<KernelAttr> GetOpSupport() override;

 private:
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/hswish_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/hswish_grad_gpu_kernel.h
@ -40,7 +40,6 @@ class HSwishGradGpuKernelMod : public NativeGpuKernelMod {
    return kernel_func_(this, inputs, workspace, outputs, stream_ptr);
  }

- protected:
  std::vector<KernelAttr> GetOpSupport() override;

 private:
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/l2normalize_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/l2normalize_grad_gpu_kernel.h
@ -45,7 +45,6 @@ class L2NormalizeGradGpuKernelMod : public NativeGpuKernelMod {
  int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
             const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;

- protected:
  std::vector<KernelAttr> GetOpSupport() override;

 private:
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/memcpy_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/memcpy_gpu_kernel.h
@ -44,7 +44,6 @@ class MemcpyGpuKernelMod : public NativeGpuKernelMod {
  int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
             const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;

- protected:
  std::vector<KernelAttr> GetOpSupport() override;

 private:
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/mirror_pad_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/mirror_pad_grad_gpu_kernel.h
@ -47,8 +47,9 @@ class MirrorPadGradGpuKernelMod : public NativeGpuKernelMod {
             const std::vector<KernelTensorPtr> &outputs,
             const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost) override;

- protected:
  std::vector<KernelAttr> GetOpSupport() override;
+
+ protected:
  template <typename T>
  bool LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
                    const std::vector<AddressPtr> &outputs, void *stream_ptr);
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/prelu_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/prelu_gpu_kernel.h
@ -44,7 +44,6 @@ class PReLUGpuKernelMod : public NativeGpuKernelMod {
  int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
             const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;

- protected:
  std::vector<KernelAttr> GetOpSupport() override;

 private:
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/prelu_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/prelu_grad_gpu_kernel.h
@ -45,8 +45,9 @@ class PReLUGradGpuKernelMod : public NativeGpuKernelMod {
  int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
             const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;

- protected:
  std::vector<KernelAttr> GetOpSupport() override;
+
+ protected:
  template <typename T>
  bool LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
                    const std::vector<AddressPtr> &outputs, void *stream_ptr);
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/relu_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/relu_gpu_kernel.h
@ -41,8 +41,9 @@ class ReLUFwdGpuKernelMod : public NativeGpuKernelMod {
  int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
             const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;

- protected:
  std::vector<KernelAttr> GetOpSupport() override;
+
+ protected:
  template <typename T>
  bool LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
                    const std::vector<AddressPtr> &outputs, void *stream_ptr);
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/relu_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/relu_grad_gpu_kernel.h
@ -42,8 +42,9 @@ class ReLUGradGpuKernelMod : public NativeGpuKernelMod {
  int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
             const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;

- protected:
  std::vector<KernelAttr> GetOpSupport() override;
+
+ protected:
  template <typename T>
  bool LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
                    const std::vector<AddressPtr> &outputs, void *stream_ptr);
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/softmax_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/softmax_gpu_kernel.h
@ -43,8 +43,9 @@ class SoftmaxGpuKernelMod : public NativeGpuKernelMod {
  int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
             const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;

- protected:
  std::vector<KernelAttr> GetOpSupport() override;
+
+ protected:
  template <typename T>
  bool LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
                    const std::vector<AddressPtr> &outputs, void *stream_ptr);
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/sparse_apply_adagrad_v2_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/sparse_apply_adagrad_v2_gpu_kernel.h
@ -54,7 +54,6 @@ class SparseApplyAdagradV2GpuKernelMod : public NativeGpuKernelMod {
    return true;
  }

- protected:
  std::vector<KernelAttr> GetOpSupport() override;

 private:
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/upsample_nearest_3d_gpu_grad.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/upsample_nearest_3d_gpu_grad.h
@ -44,7 +44,6 @@ class UpsampleNearest3DGradGpuKernelMod : public NativeGpuKernelMod {
  int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
             const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;

- protected:
  std::vector<KernelAttr> GetOpSupport() override;

 private:
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/upsample_trilinear_3d_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/upsample_trilinear_3d_grad_gpu_kernel.h
@ -51,7 +51,6 @@ class UpsampleTrilinear3DGradGpuKernelMod : public NativeGpuKernelMod {
    const std::vector<KernelTensorPtr> &outputs,
    const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost = std::map<uint32_t, tensor::TensorPtr>()) override;

- protected:
  std::vector<KernelAttr> GetOpSupport() override;

 private:
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/random/randperm_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/random/randperm_gpu_kernel.h
@ -46,8 +46,9 @@ class RandpermGpuKernelMod : public NativeGpuKernelMod {
  int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
             const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;

- protected:
  std::vector<KernelAttr> GetOpSupport() override;
+
+ protected:
  template <typename T>
  bool LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
                    const std::vector<AddressPtr> &outputs, void *stream_ptr);
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/rl/dynamic_gru_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/rl/dynamic_gru_gpu_kernel.h
@ -89,8 +89,9 @@ class DynamicGruGpuKernelMod : public NativeGpuKernelMod {
 #endif
  }

- protected:
  std::vector<KernelAttr> GetOpSupport() override;
+
+ protected:
  void InitResource() override {
 #if CUDNN_VERSION >= 8000
    handle_ = device::gpu::GPUDeviceManager::GetInstance().GetCudnnHandle();
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/sparse/sparse_to_dense_v2_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sparse/sparse_to_dense_v2_gpu_kernel.h
@ -50,7 +50,6 @@ class SparseToDenseV2GpuKernelMod : public NativeGpuKernelMod {
  int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
             const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;

- protected:
  std::vector<KernelAttr> GetOpSupport() override;

 private:
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/sparse_grad/sparse_add_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sparse_grad/sparse_add_grad_gpu_kernel.h
@ -49,8 +49,9 @@ class SparseAddGradGpuKernelMod : public NativeGpuKernelMod {
  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
              const std::vector<AddressPtr> &outputs, void *cuda_stream) override;

- protected:
  std::vector<KernelAttr> GetOpSupport() override;
+
+ protected:
  void SyncData() override;
  std::vector<KernelTensorPtr> GetOutputs() override { return outputs_; }

--- a/mindspore/ccsrc/plugin/device/gpu/kernel/sparse_grad/sparse_slice_grad_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sparse_grad/sparse_slice_grad_gpu_kernel.h
@ -49,7 +49,6 @@ class SparseSliceGradGpuKernelMod : public NativeGpuKernelMod {
    return kernel_func_(this, inputs, workspace, outputs, stream_ptr);
  }

- protected:
  std::vector<KernelAttr> GetOpSupport() override;

 private:
--- a/mindspore/core/ops/core_ops.h
+++ b/mindspore/core/ops/core_ops.h
@ -1532,6 +1532,7 @@ GVAR_DEF(PrimitivePtr, kPrimRandomShuffle, std::make_shared<Primitive>("RandomSh
 GVAR_DEF(PrimitivePtr, kPrimRandomCategorical, std::make_shared<Primitive>("RandomCategorical"));
 GVAR_DEF(PrimitivePtr, kPrimRandperm, std::make_shared<Primitive>("Randperm"));
 GVAR_DEF(PrimitivePtr, kPrimUniformCandidateSampler, std::make_shared<Primitive>("UniformCandidateSampler"));
+GVAR_DEF(PrimitivePtr, kPrimLogUniformCandidateSampler, std::make_shared<Primitive>("LogUniformCandidateSampler"));
 GVAR_DEF(PrimitivePtr, kPrimMultinomial, std::make_shared<Primitive>("Multinomial"));
 GVAR_DEF(PrimitivePtr, kPrimRandomChoiceWithMask, std::make_shared<Primitive>("RandomChoiceWithMask"));

--- a/mindspore/core/ops/log_uniform_candidate_sampler.cc
+++ b/mindspore/core/ops/log_uniform_candidate_sampler.cc
@ -0,0 +1,76 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "ops/log_uniform_candidate_sampler.h"
+#include <string>
+#include <memory>
+#include <set>
+#include "utils/check_convert_utils.h"
+#include "mindapi/src/helper.h"
+
+namespace mindspore {
+namespace ops {
+void LogUniformCandidateSampler::Init(int64_t num_true, int64_t num_sampled, bool unique, int64_t range_max,
+                                      int64_t seed) {
+  this->set_num_true(num_true);
+  this->set_num_sampled(num_sampled);
+  this->set_unique(unique);
+  this->set_range_max(range_max);
+  this->set_seed(seed);
+}
+namespace {
+abstract::TupleShapePtr LogUniformCandidateSamplerInferShape(const PrimitivePtr &primitive,
+                                                             const std::vector<AbstractBasePtr> &input_args) {
+  int64_t num_sampled = GetValue<int64_t>(primitive->GetAttr(kNumSampled));
+  auto sampled_candidate_shape = std::make_shared<abstract::Shape>(ShapeVector({num_sampled}));
+  auto true_expected_shape = input_args[0]->BuildShape();
+
+  std::vector<abstract::BaseShapePtr> shape_tuple;
+  (void)shape_tuple.emplace_back(sampled_candidate_shape);
+  (void)shape_tuple.emplace_back(true_expected_shape);
+  (void)shape_tuple.emplace_back(sampled_candidate_shape);
+  return std::make_shared<abstract::TupleShape>(shape_tuple);
+}
+
+TuplePtr LogUniformCandidateSamplerInferType(const PrimitivePtr &primitive,
+                                             const std::vector<AbstractBasePtr> &input_args) {
+  // check input data type
+  const std::set<TypePtr> valid_types = {kInt64};
+  CheckAndConvertUtils::CheckTensorTypeValid("true_classes", input_args[0]->BuildType(), valid_types,
+                                             primitive->name());
+
+  // return outputs data type
+  auto sampled_candidate_type = std::make_shared<TensorType>(kInt64);
+  auto true_expected_type = std::make_shared<TensorType>(kFloat32);
+  auto sampled_expected = std::make_shared<TensorType>(kFloat32);
+  return std::make_shared<Tuple>(std::vector<TypePtr>{sampled_candidate_type, true_expected_type, sampled_expected});
+}
+}  // namespace
+
+AbstractBasePtr LogUniformCandidateSamplerInfer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
+                                                const std::vector<AbstractBasePtr> &input_args) {
+  MS_EXCEPTION_IF_NULL(primitive);
+  const int64_t kInputNum = 1;
+  CheckAndConvertUtils::CheckInputArgs(input_args, kEqual, kInputNum, primitive->name());
+  auto type = LogUniformCandidateSamplerInferType(primitive, input_args);
+  auto shape = LogUniformCandidateSamplerInferShape(primitive, input_args);
+  return abstract::MakeAbstract(shape, type);
+}
+
+MIND_API_OPERATOR_IMPL(LogUniformCandidateSampler, BaseOperator);
+REGISTER_PRIMITIVE_EVAL_IMPL(LogUniformCandidateSampler, prim::kPrimLogUniformCandidateSampler,
+                             LogUniformCandidateSamplerInfer, nullptr, true);
+}  // namespace ops
+}  // namespace mindspore
--- a/mindspore/core/ops/log_uniform_candidate_sampler.h
+++ b/mindspore/core/ops/log_uniform_candidate_sampler.h
@ -0,0 +1,80 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CORE_OPS_RANDOM_LOG_UNIFORM_CANDIDATE_SAMPLER_H_
+#define MINDSPORE_CORE_OPS_RANDOM_LOG_UNIFORM_CANDIDATE_SAMPLER_H_
+#include <map>
+#include <vector>
+#include <string>
+#include <memory>
+
+#include "ops/base_operator.h"
+#include "ops/op_name.h"
+#include "ops/op_utils.h"
+#include "mindapi/base/types.h"
+
+namespace mindspore {
+namespace ops {
+constexpr auto kNameLogUniformCandidateSampler = "LogUniformCandidateSampler";
+/// \brief Generates random labels with a log-uniform distribution for sampled_candidates.
+/// Refer to Python API @ref mindspore.ops.log_uniform_candidate_sampler for more details.
+class MIND_API LogUniformCandidateSampler : public BaseOperator {
+ public:
+  MIND_API_BASE_MEMBER(LogUniformCandidateSampler);
+  /// \brief Constructor.
+  LogUniformCandidateSampler() : BaseOperator(kNameLogUniformCandidateSampler) {
+    InitIOName({"true_classes"}, {"sampled_candidates", "true_expected_count", "sampled_expected_count"});
+  }
+
+  /// \brief Method to init the op's attributes.
+  ///
+  /// \param[in] num_true The number of target classes per training example.
+  /// \param[in] num_sampled The number of classes to randomly sample.
+  /// \param[bool] unique Determines whether sample with rejection. If `unique` is True, all sampled classes in a batch
+  /// are unique.
+  /// \param[in] range_max The number of possible classes. When `unique` is True, `range_max` must be greater than or
+  /// equal to `num_sampled`.
+  /// \param[in] seed Random seed, must be non-negative.
+  void Init(int64_t num_true = 1, int64_t num_sampled = 5, bool unique = true, int64_t range_max = 5, int64_t seed = 0);
+
+  inline void set_num_true(int64_t num_true) { (void)this->AddAttr(kNumTrue, api::MakeValue(num_true)); }
+
+  inline int64_t get_num_true() { return GetValue<int64_t>(GetAttr(kNumTrue)); }
+
+  inline void set_num_sampled(int64_t num_sampled) { (void)this->AddAttr(kNumSampled, api::MakeValue(num_sampled)); }
+
+  inline int64_t get_num_sampled() { return GetValue<int64_t>(GetAttr(kNumSampled)); }
+
+  inline void set_unique(bool unique) { (void)this->AddAttr(kUnique, api::MakeValue(unique)); }
+
+  inline bool get_unique() { return GetValue<bool>(GetAttr(kUnique)); }
+
+  inline void set_range_max(int64_t range_max) { (void)this->AddAttr(kRangeMax, api::MakeValue(range_max)); }
+
+  inline int64_t get_range_max() { return GetValue<int64_t>(GetAttr(kRangeMax)); }
+
+  inline void set_seed(int64_t seed) { (void)this->AddAttr(kSeed, api::MakeValue(seed)); }
+
+  inline int64_t get_seed() { return GetValue<int64_t>(GetAttr(kSeed)); }
+};
+
+abstract::AbstractBasePtr LogUniformCandidateSamplerInfer(const abstract::AnalysisEnginePtr &,
+                                                          const PrimitivePtr &primitive,
+                                                          const std::vector<abstract::AbstractBasePtr> &input_args);
+}  // namespace ops
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CORE_OPS_RANDOM_LOG_UNIFORM_CANDIDATE_SAMPLER_H_
--- a/mindspore/core/ops/op_name.h
+++ b/mindspore/core/ops/op_name.h
@ -359,6 +359,10 @@ constexpr auto kCol = "col";
 constexpr auto kBatchSize = "batch_size";
 constexpr auto kCross = "cross";
 constexpr auto kDeviceNum = "device_num";
+constexpr auto kNumTrue = "num_true";
+constexpr auto kUnique = "unique";
+constexpr auto kNumSampled = "num_sampled";
+constexpr auto kRangeMax = "range_max";
 constexpr auto kPooledHeight = "pooled_height";
 constexpr auto kPooledWidth = "pooled_width";
 constexpr auto kSpatialScale = "spatial_scale";
--- a/mindspore/core/utils/shape_utils.h
+++ b/mindspore/core/utils/shape_utils.h
@ -18,6 +18,8 @@
 #define MINDSPORE_SHAPE_UTILS_INFO_H_

 #include <algorithm>
+#include <vector>
+#include <string>
 #include "abstract/dshape.h"
 #include "utils/log_adapter.h"

@ -56,6 +58,21 @@ inline bool IsDynamic(const ShapeVector &shape) {
  }
  return IsDynamicRank(shape) || IsDynamicShape(shape);
 }
+
+template <typename T>
+std::string VectorToString(const std::vector<T> &values) {
+  std::stringstream ss;
+  ss << "[";
+  auto size = values.size();
+  for (size_t i = 0; i < size; ++i) {
+    ss << values[i];
+    if (i != size - 1) {
+      ss << ", ";
+    }
+  }
+  ss << "]";
+  return ss.str();
+}
 }  // namespace mindspore

 #endif  // MINDSPORE_SHAPE_UTILS_INFO_H_
--- a/mindspore/lite/src/extendrt/kernel/ascend/src/custom_ascend_kernel.h
+++ b/mindspore/lite/src/extendrt/kernel/ascend/src/custom_ascend_kernel.h
@ -49,6 +49,7 @@ class CustomAscendKernelMod : public kernel::KernelMod {
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override;

  std::vector<KernelTensorPtr> RetrieveOutputShape() { return outputs_; }
+  std::vector<KernelAttr> GetOpSupport() override { return {}; }

 private:
  void RecordInputDataIndex(const std::vector<KernelTensorPtr> &inputs);
--- a/mindspore/lite/src/extendrt/kernel/cpu/less_test_kernel_mod.h
+++ b/mindspore/lite/src/extendrt/kernel/cpu/less_test_kernel_mod.h
@ -21,6 +21,7 @@
 #include <string>

 #include "plugin/device/cpu/kernel/cpu_kernel_mod.h"
+#include "kernel/common_utils.h"

 namespace mindspore::kernel {
 class LessTestKernelMod : public CpuKernelMod {
@ -35,6 +36,7 @@ class LessTestKernelMod : public CpuKernelMod {

  virtual bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
                    const std::vector<KernelTensorPtr> &outputs);
+  std::vector<KernelAttr> GetOpSupport() override { return {}; }
 };
 }  // namespace mindspore::kernel

--- a/mindspore/lite/src/extendrt/kernel/cpu/transpose_kernel_mod.h
+++ b/mindspore/lite/src/extendrt/kernel/cpu/transpose_kernel_mod.h
@ -41,6 +41,7 @@ class TransposeKernelMod : public CpuKernelMod {

  int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
             const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;
+  std::vector<KernelAttr> GetOpSupport() override { return {}; }

 private:
  template <typename T>
--- a/mindspore/lite/src/extendrt/subgraph_kernel.h
+++ b/mindspore/lite/src/extendrt/subgraph_kernel.h
@ -22,6 +22,7 @@
 #include "kernel/kernel.h"
 #include "ir/func_graph.h"
 #include "runtime/hardware/device_context.h"
+#include "kernel/common_utils.h"
 namespace mindspore::kernel {
 class SubgraphKernel : public KernelMod {
 public:
@ -37,6 +38,7 @@ class SubgraphKernel : public KernelMod {

  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
+  std::vector<KernelAttr> GetOpSupport() override { return {}; }

 protected:
  FuncGraphPtr subgraph_;
--- a/mindspore/python/mindspore/ops/function/init.py
+++ b/mindspore/python/mindspore/ops/function/init.py
@ -384,6 +384,7 @@ from .random_func import (
    random_gamma,
    uniform_candidate_sampler,
    random_poisson,
+    log_uniform_candidate_sampler,
    shuffle,
    choice_with_mask
 )
--- a/mindspore/python/mindspore/ops/function/random_func.py
+++ b/mindspore/python/mindspore/ops/function/random_func.py
@ -123,7 +123,8 @@ def standard_laplace(shape, seed=0, seed2=0):
        >>> print(result)
        (4, 4)
    """
-    standard_laplace_op = _get_cache_prim(P.StandardLaplace)(seed=seed, seed2=seed2)
+    standard_laplace_op = _get_cache_prim(
+        P.StandardLaplace)(seed=seed, seed2=seed2)
    return standard_laplace_op(shape)


@ -222,11 +223,13 @@ def uniform(shape, minval, maxval, seed=None, dtype=mstype.float32):
        (3, 2, 2)
    """
    if not isinstance(minval, Tensor) or not isinstance(maxval, Tensor):
-        raise TypeError(f"For functional operator[uniform], the input[minval] and input[maxval] must be a Tensor.")
+        raise TypeError(
+            f"For functional operator[uniform], the input[minval] and input[maxval] must be a Tensor.")

    minval_dtype = F.dtype(minval)
    maxval_dtype = F.dtype(maxval)
-    const_utils.check_type_valid(dtype, [mstype.int32, mstype.float32], 'uniform')
+    const_utils.check_type_valid(
+        dtype, [mstype.int32, mstype.float32], 'uniform')
    const_utils.check_tensors_dtype_same(minval_dtype, dtype, "uniform")
    const_utils.check_tensors_dtype_same(maxval_dtype, dtype, "uniform")
    seed1, seed2 = _get_seed(seed, "uniform")
@ -274,7 +277,8 @@ def standard_normal(shape, seed=0, seed2=0):
        >>> print(result)
        (4, 4)
    """
-    standard_normal_op = _get_cache_prim(P.StandardNormal)(seed=seed, seed2=seed2)
+    standard_normal_op = _get_cache_prim(
+        P.StandardNormal)(seed=seed, seed2=seed2)
    return standard_normal_op(shape)


@ -326,7 +330,8 @@ def uniform_candidate_sampler(true_classes, num_true, num_sampled, unique, range
    """
    sampler_op = _get_cache_prim(P.UniformCandidateSampler)(num_true, num_sampled, unique, range_max, seed=seed,
                                                            remove_accidental_hits=remove_accidental_hits)
-    sampled_candidates, true_expected_count, sampled_expected_count = sampler_op(true_classes)
+    sampled_candidates, true_expected_count, sampled_expected_count = sampler_op(
+        true_classes)
    return sampled_candidates, true_expected_count, sampled_expected_count


@ -420,6 +425,57 @@ def shuffle(x, seed=None):
    return output


+def log_uniform_candidate_sampler(true_classes, num_true, num_sampled, unique,
+                                  range_max, seed=0):
+    r"""
+    Generates random labels with a log-uniform distribution for sampled_candidates.
+
+    Randomly samples a tensor of sampled classes from the range of integers [0, range_max).
+
+    Args:
+        true_classes (Tensor) - The target classes. With data type of int64 and
+          shape :math:`(batch\_size, num\_true)` .
+        num_true (int): The number of target classes per training example. Default: 1.
+        num_sampled (int): The number of classes to randomly sample. Default: 5.
+        unique (bool): Determines whether sample with rejection. If `unique` is True,
+          all sampled classes in a batch are unique. Default: True.
+        range_max (int): The number of possible classes. When `unique` is True,
+          `range_max` must be greater than or equal to `num_sampled`. Default: 5.
+        seed (int): Random seed, must be non-negative. Default: 0.
+
+    Returns:
+        Tuple of 3 Tensors.
+
+        - **sampled_candidates** (Tensor) - A Tensor with shape :math:`(num\_sampled,)`
+          and the same type as `true_classes`.
+        - **true_expected_count** (Tensor) - A Tensor with the same shape as `true_classes and` type float32.
+        - **sampled_expected_count** (Tensor) - A Tensor with the same shape as `sampled_candidates` and type float32.
+
+    Raises:
+        TypeError: If neither `num_true` nor `num_sampled` is an int.
+        TypeError: If `unique` is not a bool.
+
+    Supported Platforms:
+        ``Ascend`` ``CPU``
+
+    Examples:
+        >>> from mindspore.ops import functional as F
+        >>> output1, output2, output3 = F.log_uniform_candidate_sampler(
+        ... Tensor(np.array([[1, 7], [0, 4], [3, 3]])), 2, 5, True, 5)
+        >>> print(output1, output2, output3)
+        [3 2 0 4 1]
+        [[0.92312991 0.49336370]
+         [0.99248987 0.65806371]
+         [0.73553443 0.73553443]]
+        [0.73553443 0.82625800 0.99248987 0.65806371 0.92312991]
+
+    """
+
+    sampler = _get_cache_prim(P.LogUniformCandidateSampler)(
+        num_true, num_sampled, unique, range_max, seed)
+    return sampler(true_classes)
+
+
 def choice_with_mask(input_x, count=256, seed=0, seed2=0):
    """
    Generates a random sample as index tensor with a mask tensor from a given tensor.
@ -460,7 +516,8 @@ def choice_with_mask(input_x, count=256, seed=0, seed2=0):
        >>> print(result)
        (256,)
    """
-    choice_with_mask_ = _get_cache_prim(RandomChoiceWithMask)(count=count, seed=seed, seed2=seed2)
+    choice_with_mask_ = _get_cache_prim(RandomChoiceWithMask)(
+        count=count, seed=seed, seed2=seed2)
    output = choice_with_mask_(input_x)
    return output

@ -473,6 +530,7 @@ __all__ = [
    'random_gamma',
    'uniform_candidate_sampler',
    'random_poisson',
+    'log_uniform_candidate_sampler',
    'shuffle',
    'choice_with_mask'
 ]
--- a/mindspore/python/mindspore/ops/operations/random_ops.py
+++ b/mindspore/python/mindspore/ops/operations/random_ops.py
@ -366,7 +366,8 @@ class Gamma(PrimitiveWithInfer):
    @prim_attr_register
    def __init__(self, seed=0, seed2=0):
        """Initialize RandomGamma"""
-        self.init_prim_io_names(inputs=['shape', 'alpha', 'beta'], outputs=['output'])
+        self.init_prim_io_names(
+            inputs=['shape', 'alpha', 'beta'], outputs=['output'])
        self.add_prim_attr("side_effect_hidden", True)
        Validator.check_non_negative_int(seed, "seed", self.name)
        Validator.check_non_negative_int(seed2, "seed2", self.name)
@ -452,7 +453,8 @@ class ParameterizedTruncatedNormal(Primitive):
    @prim_attr_register
    def __init__(self, seed=0, seed2=0):
        """Initialize ParameterizedTruncatedNormal"""
-        self.init_prim_io_names(inputs=['shape', 'mean', 'stdevs', 'min', 'max'], outputs=['y'])
+        self.init_prim_io_names(
+            inputs=['shape', 'mean', 'stdevs', 'min', 'max'], outputs=['y'])
        Validator.check_value_type('seed', seed, [int], self.name)
        Validator.check_value_type('seed2', seed2, [int], self.name)

@ -511,8 +513,10 @@ class Poisson(PrimitiveWithInfer):
        Validator.check_value_type("shape", shape_v, [tuple], self.name)
        for i, shape_i in enumerate(shape_v):
            Validator.check_positive_int(shape_i, f'shape[{i}]', self.name)
-        Validator.check_tensor_dtype_valid("mean", mean["dtype"], [mstype.float32], self.name)
-        broadcast_shape = get_broadcast_shape(mean['shape'], shape_v, self.name, arg_name1="mean", arg_name2="shape")
+        Validator.check_tensor_dtype_valid(
+            "mean", mean["dtype"], [mstype.float32], self.name)
+        broadcast_shape = get_broadcast_shape(
+            mean['shape'], shape_v, self.name, arg_name1="mean", arg_name2="shape")
        out = {
            'shape': broadcast_shape,
            'dtype': mstype.int32,
@ -568,7 +572,8 @@ class RandomPoisson(Primitive):
        self.init_prim_io_names(inputs=['shape', 'rate'], outputs=['output'])
        Validator.check_value_type('seed', seed, [int], self.name)
        Validator.check_value_type('seed2', seed2, [int], self.name)
-        valid_values = (mstype.int64, mstype.int32, mstype.float16, mstype.float32, mstype.float64)
+        valid_values = (mstype.int64, mstype.int32,
+                        mstype.float16, mstype.float32, mstype.float64)
        Validator.check_type_name("dtype", dtype, valid_values, self.name)


@ -623,7 +628,8 @@ class UniformInt(Primitive):
    @prim_attr_register
    def __init__(self, seed=0, seed2=0):
        """Initialize UniformInt"""
-        self.init_prim_io_names(inputs=['shape', 'minval', 'maxval'], outputs=['output'])
+        self.init_prim_io_names(
+            inputs=['shape', 'minval', 'maxval'], outputs=['output'])
        self.add_prim_attr("side_effect_hidden", True)
        Validator.check_non_negative_int(seed, "seed", self.name)
        Validator.check_non_negative_int(seed2, "seed2", self.name)
@ -807,7 +813,8 @@ class Multinomial(Primitive):
        """Initialize Multinomial."""
        Validator.check_non_negative_int(seed, "seed", self.name)
        Validator.check_non_negative_int(seed2, "seed2", self.name)
-        self.init_prim_io_names(inputs=['x', 'num_samples'], outputs=['output'])
+        self.init_prim_io_names(
+            inputs=['x', 'num_samples'], outputs=['output'])
        Validator.check_value_type("dtype", dtype, [mstype.Type], self.name)
        valid_values = (mstype.int64, mstype.int32)
        Validator.check_type_name("dtype", dtype, valid_values, self.name)
@ -839,66 +846,49 @@ class UniformCandidateSampler(PrimitiveWithInfer):
    def __init__(self, num_true, num_sampled, unique, range_max, seed=0, remove_accidental_hits=False):
        """Initialize UniformCandidateSampler"""
        Validator.check_value_type("num_true", num_true, [int], self.name)
-        Validator.check_value_type("num_sampled", num_sampled, [int], self.name)
+        Validator.check_value_type(
+            "num_sampled", num_sampled, [int], self.name)
        Validator.check_value_type("unique", unique, [bool], self.name)
        Validator.check_value_type("range_max", range_max, [int], self.name)
        Validator.check_value_type("seed", seed, [int], self.name)
-        Validator.check_value_type("remove_accidental_hits", remove_accidental_hits, [bool], self.name)
-        Validator.check("value of num_true", num_true, '', 0, Rel.GT, self.name)
-        Validator.check("value of num_sampled", num_sampled, '', 0, Rel.GT, self.name)
-        Validator.check("value of range_max", range_max, '', 0, Rel.GT, self.name)
+        Validator.check_value_type(
+            "remove_accidental_hits", remove_accidental_hits, [bool], self.name)
+        Validator.check("value of num_true", num_true,
+                        '', 0, Rel.GT, self.name)
+        Validator.check("value of num_sampled", num_sampled,
+                        '', 0, Rel.GT, self.name)
+        Validator.check("value of range_max", range_max,
+                        '', 0, Rel.GT, self.name)
        self.num_true = num_true
        if unique:
-            Validator.check('value of num_sampled', num_sampled, "value of range_max", range_max, Rel.LE, self.name)
+            Validator.check('value of num_sampled', num_sampled,
+                            "value of range_max", range_max, Rel.LE, self.name)
        Validator.check("value of seed", seed, '', 0, Rel.GE, self.name)
        self.num_sampled = num_sampled

    def infer_dtype(self, true_classes_type):
-        Validator.check_subclass("true_classes_type", true_classes_type, mstype.tensor, self.name)
+        Validator.check_subclass(
+            "true_classes_type", true_classes_type, mstype.tensor, self.name)
        Validator.check_tensor_dtype_valid("true_classes_type", true_classes_type,
                                           (mstype.int32, mstype.int64), self.name)
        return true_classes_type, mstype.float32, mstype.float32

    def infer_shape(self, true_classes_shape):
-        Validator.check("true_class.shape[1]", true_classes_shape[1], "num_true", self.num_true, Rel.EQ, self.name)
+        Validator.check("true_class.shape[1]", true_classes_shape[1],
+                        "num_true", self.num_true, Rel.EQ, self.name)
        return [self.num_sampled], true_classes_shape, [self.num_sampled]


-class LogUniformCandidateSampler(PrimitiveWithInfer):
+class LogUniformCandidateSampler(Primitive):
    r"""
    Generates random labels with a log-uniform distribution for sampled_candidates.

    Randomly samples a tensor of sampled classes from the range of integers [0, range_max).

-    Args:
-        num_true (int): The number of target classes per training example. Default: 1.
-        num_sampled (int): The number of classes to randomly sample. Default: 5.
-        unique (bool): Determines whether sample with rejection. If `unique` is True,
-          all sampled classes in a batch are unique. Default: True.
-        range_max (int): The number of possible classes. When `unique` is True,
-          `range_max` must be greater than or equal to `num_sampled`. Default: 5.
-        seed (int): Random seed, must be non-negative. Default: 0.
-
-    Inputs:
-        - **true_classes** (Tensor) - The target classes. With data type of int64 and
-          shape :math:`(batch\_size, num\_true)` .
-
-    Outputs:
-        Tuple of 3 Tensors.
-
-        - **sampled_candidates** (Tensor) - A Tensor with shape :math:`(num\_sampled,)`
-          and the same type as `true_classes`.
-        - **true_expected_count** (Tensor) - A Tensor with the same shape as `true_classes and` type float32.
-        - **sampled_expected_count** (Tensor) - A Tensor with the same shape as `sampled_candidates` and type float32.
-
-    Raises:
-        TypeError: If neither `num_true` nor `num_sampled` is an int.
-        TypeError: If `unique` is not a bool.
-        TypeError: If neither `range_max` nor `seed` is an int.
-        TypeError: If `true_classes` is not a Tensor.
+    Refer to :func:`mindspore.ops.log_uniform_candidate_sampler` for more details.

    Supported Platforms:
-        ``Ascend``
+        ``Ascend`` ``CPU``

    Examples:
        >>> sampler = ops.LogUniformCandidateSampler(2, 5, True, 5)
@ -918,31 +908,23 @@ class LogUniformCandidateSampler(PrimitiveWithInfer):
        self.init_prim_io_names(inputs=['true_classes'],
                                outputs=['sampled_candidates', 'true_expected_count', 'sampled_expected_count'])
        Validator.check_value_type("num_true", num_true, [int], self.name)
-        Validator.check_value_type("num_sampled", num_sampled, [int], self.name)
+        Validator.check_value_type(
+            "num_sampled", num_sampled, [int], self.name)
        Validator.check_value_type("unique", unique, [bool], self.name)
        Validator.check_value_type("range_max", range_max, [int], self.name)
        Validator.check_value_type("seed", seed, [int], self.name)
-        self.num_true = Validator.check_number("num_true", num_true, 1, Rel.GE, self.name)
-        self.num_sampled = Validator.check_number("num_sampled", num_sampled, 1, Rel.GE, self.name)
+        self.num_true = Validator.check_number(
+            "num_true", num_true, 1, Rel.GE, self.name)
+        self.num_sampled = Validator.check_number(
+            "num_sampled", num_sampled, 1, Rel.GE, self.name)
        Validator.check_number("range_max", range_max, 1, Rel.GE, self.name)
        if unique:
-            Validator.check("range_max", range_max, "num_sampled", num_sampled, Rel.GE, self.name)
+            Validator.check("range_max", range_max, "num_sampled",
+                            num_sampled, Rel.GE, self.name)
        self.range_max = range_max
        self.unique = unique
        self.seed = Validator.check_number("seed", seed, 0, Rel.GE, self.name)

-    def infer_shape(self, true_classes_shape):
-        Validator.check_int(len(true_classes_shape), 2, Rel.EQ, "dim of true_classes", self.name)
-        Validator.check("true_classes_shape[1]", true_classes_shape[1], "num_true", self.num_true, Rel.EQ, self.name)
-        return (self.num_sampled,), true_classes_shape, (self.num_sampled,)
-
-    def infer_dtype(self, true_classes_type):
-        Validator.check_subclass("true_classes_type", true_classes_type, mstype.tensor, self.name)
-        valid_types = (mstype.int64,)
-        Validator.check_tensor_dtype_valid("true_classes_type", true_classes_type, valid_types, self.name)
-        expected_type = mstype.float32
-        return true_classes_type, expected_type, expected_type
-

 class RandomShuffle(Primitive):
    r"""
--- a/tests/st/ops/cpu/test_uniform_candidate_sampler_op.py
+++ b/tests/st/ops/cpu/test_uniform_candidate_sampler_op.py
@ -16,6 +16,7 @@
 import numpy as np
 import pytest

+import mindspore as ms
 from mindspore import Tensor
 from mindspore.ops import operations as P
 from mindspore.ops import functional as F
@ -27,18 +28,14 @@ from mindspore.ops.functional import vmap
 class UniformCandidateSamplerNet(nn.Cell):
    def __init__(self, num_true, num_sampled, unique, range_max):
        super(UniformCandidateSamplerNet, self).__init__()
-        self.sampler = P.UniformCandidateSampler(num_true, num_sampled,
-                                                 unique, range_max)
+        self.sampler = P.UniformCandidateSampler(num_true, num_sampled, unique, range_max)

    def construct(self, x):
        return self.sampler(x)


 def uniform_candidate_sampler(x, num_true, num_sampled, unique, range_max):
-    uniform_candidate_sampler_net = UniformCandidateSamplerNet(num_true,
-                                                               num_sampled,
-                                                               unique,
-                                                               range_max)
+    uniform_candidate_sampler_net = UniformCandidateSamplerNet(num_true, num_sampled, unique, range_max)
    out1, out2, out3 = uniform_candidate_sampler_net(Tensor(x.astype(np.int32)))
    return out1.shape, out2.shape, out3.shape

@ -49,10 +46,7 @@ def uniform_candidate_sampler_functional(x, num_true, num_sample, unique, range_


 def uniform_candidate_sampler_int64(x, num_true, num_sampled, unique, range_max):
-    uniform_candidate_sampler_net = UniformCandidateSamplerNet(num_true,
-                                                               num_sampled,
-                                                               unique,
-                                                               range_max)
+    uniform_candidate_sampler_net = UniformCandidateSamplerNet(num_true, num_sampled, unique, range_max)
    out1, out2, out3 = uniform_candidate_sampler_net(Tensor(x.astype(np.int64)))
    return out1.shape, out2.shape, out3.shape

@ -60,21 +54,19 @@ def uniform_candidate_sampler_int64(x, num_true, num_sampled, unique, range_max)
 class UniformCandidateSamplerHitNet(nn.Cell):
    def __init__(self, num_true, num_sampled, unique, range_max, seed, remove_accidental_hits):
        super(UniformCandidateSamplerHitNet, self).__init__()
-        self.sampler = P.UniformCandidateSampler(num_true, num_sampled, unique,
-                                                 range_max, seed=seed,
+        self.sampler = P.UniformCandidateSampler(num_true,
+                                                 num_sampled,
+                                                 unique,
+                                                 range_max,
+                                                 seed=seed,
                                                 remove_accidental_hits=remove_accidental_hits)

    def construct(self, x):
        return self.sampler(x)


-def uniform_candidate_sampler_hit(x, num_true, num_sampled, unique, range_max, seed,
-                                  remove_accidental_hits):
-    uniform_candidate_sampler_net = UniformCandidateSamplerHitNet(num_true,
-                                                                  num_sampled,
-                                                                  unique,
-                                                                  range_max,
-                                                                  seed,
+def uniform_candidate_sampler_hit(x, num_true, num_sampled, unique, range_max, seed, remove_accidental_hits):
+    uniform_candidate_sampler_net = UniformCandidateSamplerHitNet(num_true, num_sampled, unique, range_max, seed,
                                                                  remove_accidental_hits)
    out1, out2, out3 = uniform_candidate_sampler_net(Tensor(x.astype(np.int32)))
    return out1.shape, out2.shape, out3.shape
@ -90,8 +82,7 @@ def test_uniform_candidate_sampler_unique_1_true():
    Expectation: The shape of output are the expected values.
    """
    context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
-    ms1, ms2, ms3 = uniform_candidate_sampler(np.array([[1], [3], [4], [6], [3]]),
-                                              1, 3, True, 4)
+    ms1, ms2, ms3 = uniform_candidate_sampler(np.array([[1], [3], [4], [6], [3]]), 1, 3, True, 4)
    expected_1 = (3,)
    expected_2 = (5, 1)
    expected_3 = (3,)
@ -110,8 +101,7 @@ def test_uniform_candidate_sampler_not_unique_1_true():
    Expectation: The shape of output are the expected values.
    """
    context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
-    ms1, ms2, ms3 = uniform_candidate_sampler(np.array([[1], [3], [4], [6], [3]]),
-                                              1, 3, False, 4)
+    ms1, ms2, ms3 = uniform_candidate_sampler(np.array([[1], [3], [4], [6], [3]]), 1, 3, False, 4)
    expected_1 = (3,)
    expected_2 = (5, 1)
    expected_3 = (3,)
@ -130,9 +120,7 @@ def test_uniform_candidate_sampler_unique_2_true():
    Expectation: The value and shape of output are the expected values.
    """
    context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
-    ms1, ms2, ms3 = uniform_candidate_sampler(np.array([[1, 2], [3, 2], [4, 2],
-                                                        [6, 2], [3, 2]]),
-                                              2, 3, True, 4)
+    ms1, ms2, ms3 = uniform_candidate_sampler(np.array([[1, 2], [3, 2], [4, 2], [6, 2], [3, 2]]), 2, 3, True, 4)
    expected_1 = (3,)
    expected_2 = (5, 2)
    expected_3 = (3,)
@ -151,10 +139,7 @@ def test_uniform_candidate_sampler_not_unique_2_true():
    Expectation: The value and shape of output are the expected values.
    """
    context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
-    ms1, ms2, ms3 = uniform_candidate_sampler(np.array([[1, 2], [3, 2],
-                                                        [4, 2], [6, 2],
-                                                        [3, 2]]),
-                                              2, 3, False, 4)
+    ms1, ms2, ms3 = uniform_candidate_sampler(np.array([[1, 2], [3, 2], [4, 2], [6, 2], [3, 2]]), 2, 3, False, 4)
    expected_1 = (3,)
    expected_2 = (5, 2)
    expected_3 = (3,)
@ -173,12 +158,9 @@ def test_uniform_candidate_sampler_large():
    Expectation: The shape of output are the expected values.
    """
    context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
-    ms1, ms2, ms3 = uniform_candidate_sampler(np.array([[12221, 41414],
-                                                        [3312, 5125152],
-                                                        [3312454, 51252],
-                                                        [65125, 225125],
-                                                        [35125, 5125122]]),
-                                              2, 5, False, 100)
+    ms1, ms2, ms3 = uniform_candidate_sampler(
+        np.array([[12221, 41414], [3312, 5125152], [3312454, 51252], [65125, 225125], [35125, 5125122]]), 2, 5, False,
+        100)
    expected_1 = (5,)
    expected_2 = (5, 2)
    expected_3 = (5,)
@ -197,8 +179,7 @@ def test_uniform_candidate_sampler_large_random():
    Expectation: The shape of output are the expected values.
    """
    context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
-    ms1, ms2, ms3 = uniform_candidate_sampler(np.arange(2142).reshape(34, 63),
-                                              63, 10, False, 12)
+    ms1, ms2, ms3 = uniform_candidate_sampler(np.arange(2142).reshape(34, 63), 63, 10, False, 12)
    expected_1 = (10,)
    expected_2 = (34, 63)
    expected_3 = (10,)
@ -217,8 +198,7 @@ def test_uniform_candidate_sampler_large_random_int64_input():
    Expectation: The value and shape of output are the expected values.
    """
    context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
-    ms1, ms2, ms3 = uniform_candidate_sampler_int64(np.arange(2142).reshape(34, 63),
-                                                    63, 10, False, 12)
+    ms1, ms2, ms3 = uniform_candidate_sampler_int64(np.arange(2142).reshape(34, 63), 63, 10, False, 12)
    expected_1 = (10,)
    expected_2 = (34, 63)
    expected_3 = (10,)
@ -331,8 +311,8 @@ def test_uniform_candidate_sampler_vmap_unique_1_true():
    """
    context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
    in_axes = (0)
-    ms1, ms2, ms3 = uniform_candidate_sampler_vmap(np.array([[[1], [3], [4], [6], [3]], [[1], [3], [4], [6], [3]]]),
-                                                   1, 3, True, 4, in_axes)
+    ms1, ms2, ms3 = uniform_candidate_sampler_vmap(np.array([[[1], [3], [4], [6], [3]], [[1], [3], [4], [6], [3]]]), 1,
+                                                   3, True, 4, in_axes)

    expected_1 = (2, 3)
    expected_2 = (2, 5, 1)
@ -391,8 +371,7 @@ def test_uniform_candidate_sampler_functional_unique_1_true():
    Expectation: The shape of output are the expected values.
    """
    context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
-    ms1, ms2, ms3 = uniform_candidate_sampler_functional(np.array([[1], [3], [4], [6], [3]]),
-                                                         1, 3, True, 4)
+    ms1, ms2, ms3 = uniform_candidate_sampler_functional(np.array([[1], [3], [4], [6], [3]]), 1, 3, True, 4)
    expected_1 = (3,)
    expected_2 = (5, 1)
    expected_3 = (3,)
@ -411,10 +390,8 @@ def test_uniform_candidate_sampler_functional_not_unique_2_true():
    Expectation: The value and shape of output are the expected values.
    """
    context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
-    ms1, ms2, ms3 = uniform_candidate_sampler_functional(np.array([[1, 2], [3, 2],
-                                                                   [4, 2], [6, 2],
-                                                                   [3, 2]]),
-                                                         2, 3, False, 4)
+    ms1, ms2, ms3 = uniform_candidate_sampler_functional(np.array([[1, 2], [3, 2], [4, 2], [6, 2], [3, 2]]), 2, 3,
+                                                         False, 4)
    expected_1 = (3,)
    expected_2 = (5, 2)
    expected_3 = (3,)
@ -433,11 +410,31 @@ def test_uniform_candidate_sampler_functional_large_random():
    Expectation: The shape of output are the expected values.
    """
    context.set_context(mode=context.PYNATIVE_MODE, device_target="CPU")
-    ms1, ms2, ms3 = uniform_candidate_sampler_functional(np.arange(2142).reshape(34, 63),
-                                                         63, 10, False, 12)
+    ms1, ms2, ms3 = uniform_candidate_sampler_functional(np.arange(2142).reshape(34, 63), 63, 10, False, 12)
    expected_1 = (10,)
    expected_2 = (34, 63)
    expected_3 = (10,)
    np.testing.assert_array_equal(ms1, expected_1)
    np.testing.assert_array_equal(ms2, expected_2)
    np.testing.assert_array_equal(ms3, expected_3)
+
+
+@pytest.mark.level0
+@pytest.mark.platform_x86_cpu
+@pytest.mark.env_onecard
+def test_log_uniform_candidate_sampler_unique():
+    """
+    Feature: LogUniformCandidateSampler CPU TEST.
+    Description: The unique is true and num_true is 2 for LogUniformCandidateSampler
+    Expectation: The value and shape of output are the expected values.
+    """
+    context.set_context(mode=context.GRAPH_MODE, device_target="CPU")
+    sampled_candidates, true_expected_count, sampled_expected_count = F.log_uniform_candidate_sampler(
+        Tensor(np.array([[1, 7], [0, 4], [3, 3]]), ms.int64), 2, 5, True, 5, 1)
+
+    expected_1 = np.array([4, 1, 2, 0, 3])
+    expected_2 = np.array([[0.99236274, 0.7252593], [0.99990803, 0.8698345], [0.9201084, 0.9201084]])
+    expected_3 = np.array([0.8698345, 0.99236274, 0.96404004, 0.99990803, 0.9201084])
+    assert np.array_equal(sampled_candidates.asnumpy(), expected_1)
+    assert np.allclose(true_expected_count.asnumpy(), expected_2)
+    assert np.allclose(sampled_expected_count.asnumpy(), expected_3)
--- a/tests/ut/cpp/runtime/graph_scheduler/graph_compiler_test.cc
+++ b/tests/ut/cpp/runtime/graph_scheduler/graph_compiler_test.cc
@ -1,24 +1,25 @@
 /**
-* Copyright 2022 Huawei Technologies Co., Ltd
-*
-* Licensed under the Apache License, Version 2.0 (the "License");
-* you may not use this file except in compliance with the License.
-* You may obtain a copy of the License at
-*
-* http://www.apache.org/licenses/LICENSE-2.0
-*
-* Unless required by applicable law or agreed to in writing, software
-* distributed under the License is distributed on an "AS IS" BASIS,
-* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-* See the License for the specific language governing permissions and
-* limitations under the License.
-*/
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */

 #include "common/common_test.h"
 #include "abstract/abstract_function.h"
 #include "runtime/graph_scheduler/graph_compiler.h"
 #include "runtime/hardware/device_context.h"
 #include "kernel/kernel.h"
+#include "kernel/common_utils.h"

 namespace mindspore {
 namespace runtime {
@ -32,143 +33,143 @@ using DeviceType = device::DeviceType;
 using AddressPtr = kernel::AddressPtr;

 class TestDeviceAddress : public DeviceAddress {
-public:
- TestDeviceAddress(void *ptr, size_t size) : DeviceAddress(ptr, size) {}
- ~TestDeviceAddress() {}
- virtual bool SyncDeviceToHost(const ShapeVector &shape, size_t size, TypeId type, void *host_ptr) const {
-   return true;
- }
- virtual bool SyncHostToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *host_ptr,
-                               const std::string &format) const {
-   return true;
- }
- virtual void *GetMutablePtr() const { return nullptr; }
- virtual void ClearDeviceMemory() {}
+ public:
+  TestDeviceAddress(void *ptr, size_t size) : DeviceAddress(ptr, size) {}
+  ~TestDeviceAddress() {}
+  virtual bool SyncDeviceToHost(const ShapeVector &shape, size_t size, TypeId type, void *host_ptr) const {
+    return true;
+  }
+  virtual bool SyncHostToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *host_ptr,
+                                const std::string &format) const {
+    return true;
+  }
+  virtual void *GetMutablePtr() const { return nullptr; }
+  virtual void ClearDeviceMemory() {}
 };

 class TestKernelMod : public kernel::KernelMod {
-public:
- TestKernelMod() = default;
- ~TestKernelMod() override = default;
- virtual bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
-                     const std::vector<AddressPtr> &outputs, void *stream_ptr) {
-   return true;
- }
+ public:
+  TestKernelMod() = default;
+  ~TestKernelMod() override = default;
+  virtual bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+                      const std::vector<AddressPtr> &outputs, void *stream_ptr) {
+    return true;
+  }
+  std::vector<kernel::KernelAttr> GetOpSupport() override { return {}; }
 };

 class TestADeviceResManager : public device::DeviceResManager {
-public:
- TestADeviceResManager() = default;
- ~TestADeviceResManager() override = default;
+ public:
+  TestADeviceResManager() = default;
+  ~TestADeviceResManager() override = default;

- virtual bool AllocateMemory(DeviceAddress *const &address, size_t size) const { return true; }
- virtual void FreeMemory(DeviceAddress *const &address) const {}
- virtual void *AllocateMemory(size_t size) const { return nullptr; }
- virtual void FreeMemory(void *const ptr) const {}
- virtual DeviceAddressPtr CreateDeviceAddress(void *const device_ptr, size_t device_size, const string &format,
-                                              TypeId type_id, const ShapeVector &shape) const {
-   return std::make_shared<TestDeviceAddress>(nullptr, 0);
- }
+  virtual bool AllocateMemory(DeviceAddress *const &address, size_t size) const { return true; }
+  virtual void FreeMemory(DeviceAddress *const &address) const {}
+  virtual void *AllocateMemory(size_t size) const { return nullptr; }
+  virtual void FreeMemory(void *const ptr) const {}
+  virtual DeviceAddressPtr CreateDeviceAddress(void *const device_ptr, size_t device_size, const string &format,
+                                               TypeId type_id, const ShapeVector &shape) const {
+    return std::make_shared<TestDeviceAddress>(nullptr, 0);
+  }
 };

 class TestAKernelExecutor : public device::KernelExecutor {
-public:
- TestAKernelExecutor() = default;
- ~TestAKernelExecutor() override = default;
- virtual void CreateKernel(const std::vector<CNodePtr> &nodes) const {
-   for (const auto node : nodes) {
-     MS_EXCEPTION_IF_NULL(node);
-     if (node->kernel_info() == nullptr) {
-       auto kernel_info = std::make_shared<device::KernelInfo>();
-       std::shared_ptr<KernelBuildInfoBuilder> builder = std::make_shared<KernelBuildInfoBuilder>();
-       kernel_info->set_select_kernel_build_info(builder->Build());
-       node->set_kernel_info(kernel_info);
-     } else {
-       const auto &kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
-       if (kernel_info->select_kernel_build_info() == nullptr) {
-         std::shared_ptr<KernelBuildInfoBuilder> builder = std::make_shared<KernelBuildInfoBuilder>();
-         kernel_info->set_select_kernel_build_info(builder->Build());
-       }
-     }
-     AnfAlgo::SetOutputAddr(std::make_shared<TestDeviceAddress>(nullptr, 0), 0, node.get());
-     auto kernel_mod_ptr = std::make_shared<TestKernelMod>();
-     kernel_mod_ptr->SetInputSizeList({4});
-     kernel_mod_ptr->SetOutputSizeList({4});
-     kernel_mod_ptr->SetWorkspaceSizeList({4});
-     AnfAlgo::SetKernelMod(kernel_mod_ptr, node.get());
-   }
- }
+ public:
+  TestAKernelExecutor() = default;
+  ~TestAKernelExecutor() override = default;
+  virtual void CreateKernel(const std::vector<CNodePtr> &nodes) const {
+    for (const auto node : nodes) {
+      MS_EXCEPTION_IF_NULL(node);
+      if (node->kernel_info() == nullptr) {
+        auto kernel_info = std::make_shared<device::KernelInfo>();
+        std::shared_ptr<KernelBuildInfoBuilder> builder = std::make_shared<KernelBuildInfoBuilder>();
+        kernel_info->set_select_kernel_build_info(builder->Build());
+        node->set_kernel_info(kernel_info);
+      } else {
+        const auto &kernel_info = dynamic_cast<device::KernelInfo *>(node->kernel_info());
+        if (kernel_info->select_kernel_build_info() == nullptr) {
+          std::shared_ptr<KernelBuildInfoBuilder> builder = std::make_shared<KernelBuildInfoBuilder>();
+          kernel_info->set_select_kernel_build_info(builder->Build());
+        }
+      }
+      AnfAlgo::SetOutputAddr(std::make_shared<TestDeviceAddress>(nullptr, 0), 0, node.get());
+      auto kernel_mod_ptr = std::make_shared<TestKernelMod>();
+      kernel_mod_ptr->SetInputSizeList({4});
+      kernel_mod_ptr->SetOutputSizeList({4});
+      kernel_mod_ptr->SetWorkspaceSizeList({4});
+      AnfAlgo::SetKernelMod(kernel_mod_ptr, node.get());
+    }
+  }
 };

 class TestADeviceContext : public device::DeviceInterface<TestAKernelExecutor, TestADeviceResManager> {
-public:
- explicit TestADeviceContext(const DeviceContextKey &device_context_key)
-     : DeviceInterface(device_context_key) {}
- ~TestADeviceContext() override = default;
+ public:
+  explicit TestADeviceContext(const DeviceContextKey &device_context_key) : DeviceInterface(device_context_key) {}
+  ~TestADeviceContext() override = default;

- virtual void Initialize() {}
- virtual DeviceType GetDeviceType() const { return DeviceType::kCPU; }
- device::RunMode GetRunMode(const FuncGraphPtr &func_graph) const override { return device::RunMode::kKernelMode; }
+  virtual void Initialize() {}
+  virtual DeviceType GetDeviceType() const { return DeviceType::kCPU; }
+  device::RunMode GetRunMode(const FuncGraphPtr &func_graph) const override { return device::RunMode::kKernelMode; }
 };

 class GraphCompilerTest : public UT::Common {
-public:
- GraphCompilerTest() {}
+ public:
+  GraphCompilerTest() {}
 };

 /// Feature: control flow support dynamic shape.
 /// Description: Test the parse interface.
 /// Expectation: As expected.
 TEST_F(GraphCompilerTest, CompileGraph) {
- std::vector<int64_t> shp{2, 2};
- abstract::AbstractTensorPtr abs;
+  std::vector<int64_t> shp{2, 2};
+  abstract::AbstractTensorPtr abs;

- // Func graph.
- auto func_graph = std::make_shared<FuncGraph>();
+  // Func graph.
+  auto func_graph = std::make_shared<FuncGraph>();

- // Parameter.
- auto abstract_x = std::make_shared<abstract::AbstractTensor>(kFloat32, shp);
- auto parameter_x = func_graph->add_parameter();
- parameter_x->set_abstract(abstract_x);
+  // Parameter.
+  auto abstract_x = std::make_shared<abstract::AbstractTensor>(kFloat32, shp);
+  auto parameter_x = func_graph->add_parameter();
+  parameter_x->set_abstract(abstract_x);

- auto abstract_y = std::make_shared<abstract::AbstractTensor>(kFloat32, shp);
- auto parameter_y = func_graph->add_parameter();
- parameter_y->set_abstract(abstract_y);
- auto parameters = func_graph->parameters();
+  auto abstract_y = std::make_shared<abstract::AbstractTensor>(kFloat32, shp);
+  auto parameter_y = func_graph->add_parameter();
+  parameter_y->set_abstract(abstract_y);
+  auto parameters = func_graph->parameters();

- // Add.
- std::vector<AnfNodePtr> add_inputs{NewValueNode(prim::kPrimAdd), parameters[0], parameters[1]};
- auto add_node = func_graph->NewCNode(add_inputs);
- abs = std::make_shared<abstract::AbstractTensor>(kFloat32, shp);
- add_node->set_abstract(abs);
+  // Add.
+  std::vector<AnfNodePtr> add_inputs{NewValueNode(prim::kPrimAdd), parameters[0], parameters[1]};
+  auto add_node = func_graph->NewCNode(add_inputs);
+  abs = std::make_shared<abstract::AbstractTensor>(kFloat32, shp);
+  add_node->set_abstract(abs);

- // Reshape.
- std::vector<AnfNodePtr> reshape_inputs{NewValueNode(prim::kPrimReshape), add_node};
- auto reshape_node = func_graph->NewCNode(reshape_inputs);
- abs = std::make_shared<abstract::AbstractTensor>(kFloat32, shp);
- reshape_node->set_abstract(abs);
+  // Reshape.
+  std::vector<AnfNodePtr> reshape_inputs{NewValueNode(prim::kPrimReshape), add_node};
+  auto reshape_node = func_graph->NewCNode(reshape_inputs);
+  abs = std::make_shared<abstract::AbstractTensor>(kFloat32, shp);
+  reshape_node->set_abstract(abs);

- // sub.
- std::vector<AnfNodePtr> sub_inputs{NewValueNode(prim::kPrimSub), reshape_node, parameters[0]};
- auto sub_node = func_graph->NewCNode(sub_inputs);
- abs = std::make_shared<abstract::AbstractTensor>(kFloat32, shp);
- sub_node->set_abstract(abs);
+  // sub.
+  std::vector<AnfNodePtr> sub_inputs{NewValueNode(prim::kPrimSub), reshape_node, parameters[0]};
+  auto sub_node = func_graph->NewCNode(sub_inputs);
+  abs = std::make_shared<abstract::AbstractTensor>(kFloat32, shp);
+  sub_node->set_abstract(abs);

- // Return.
- std::vector<AnfNodePtr> return_inputs{NewValueNode(prim::kPrimReturn), sub_node};
- auto return_node = func_graph->NewCNode(return_inputs);
- func_graph->set_return(return_node);
+  // Return.
+  std::vector<AnfNodePtr> return_inputs{NewValueNode(prim::kPrimReturn), sub_node};
+  auto return_node = func_graph->NewCNode(return_inputs);
+  func_graph->set_return(return_node);

- std::vector<AnfNodePtr> nodes{add_node, reshape_node, sub_node};
- std::vector<AnfNodePtr> outputs{sub_node};
- auto segment = std::make_shared<GraphSegment>(nodes, false);
+  std::vector<AnfNodePtr> nodes{add_node, reshape_node, sub_node};
+  std::vector<AnfNodePtr> outputs{sub_node};
+  auto segment = std::make_shared<GraphSegment>(nodes, false);

- auto compiler = std::make_shared<GraphCompiler>();
- DeviceContextKey device_context_key{"CPU", 0};
- auto device_context = std::make_shared<TestADeviceContext>(device_context_key);
- auto graph_id = compiler->CompileGraph(segment, outputs, device_context.get(), device::RunMode::kKernelMode, false);
- const auto &kernel_graph = compiler->Fetch(graph_id);
- ASSERT_EQ(2, kernel_graph->execution_order().size());
+  auto compiler = std::make_shared<GraphCompiler>();
+  DeviceContextKey device_context_key{"CPU", 0};
+  auto device_context = std::make_shared<TestADeviceContext>(device_context_key);
+  auto graph_id = compiler->CompileGraph(segment, outputs, device_context.get(), device::RunMode::kKernelMode, false);
+  const auto &kernel_graph = compiler->Fetch(graph_id);
+  ASSERT_EQ(2, kernel_graph->execution_order().size());
 }
 }  // namespace runtime
 }  // namespace mindspore