!44458 Refractor AdaptiveAvgPool2DKernelMod/AngleAtomEnergyGpuKernelMod/ConcatCpuKernelMod/ConcatV2FwdGpuKernelMod

Merge pull request !44458 from hanhuifeng/kernel_mod_new
2022-10-26 06:59:19 +00:00 · 2022-10-26 06:59:19 +00:00 · 5fe262a7c3
parent b4be20eaa2 07f863963a
commit 5fe262a7c3
17 changed files with 680 additions and 476 deletions
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/concat_cpu_kernel.cc
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/concat_cpu_kernel.cc
@ -17,47 +17,59 @@
 #include "plugin/device/cpu/kernel/concat_cpu_kernel.h"
 #include <algorithm>
 #include <utility>
+#include <map>
 #include "plugin/device/cpu/hal/device/cpu_device_address.h"
+#include "mindspore/core/ops/concat.h"

 namespace mindspore {
 namespace kernel {
 namespace {
 constexpr size_t kConcatOutputsNum = 1;
 }  // namespace
-void ConcatCpuKernelMod::InitKernel(const CNodePtr &kernel_node) {
-  MS_EXCEPTION_IF_NULL(kernel_node);
-  kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
-  cnode_ptr_ = kernel_node;
-  axis_ = LongToInt(common::AnfAlgo::GetNodeAttr<int64_t>(kernel_node, AXIS));
-  auto input_1_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
-  if (axis_ < 0) {
-    axis_ = axis_ + SizeToInt(input_1_shape.size());
-  }

-  auto kernel_attr = GetKernelAttrFromNode(kernel_node);
+bool ConcatCpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
+                              const std::vector<KernelTensorPtr> &outputs) {
+  auto kernel_attr = GetKernelAttrFromTensors(inputs, outputs);
  auto [is_match, index] = MatchKernelAttr(kernel_attr, GetOpSupport());
  if (!is_match) {
    MS_LOG(EXCEPTION) << "Concat does not support this kernel data type: " << kernel_attr;
  }
-
  kernel_func_ = func_list_[index].second;
+  kernel_name_ = base_operator->name();
+  auto kernel_ptr = std::dynamic_pointer_cast<ops::Concat>(base_operator);
+  MS_EXCEPTION_IF_NULL(kernel_ptr);
+  ori_axis_ = kernel_ptr->get_axis();
+  return true;
+}
+
+int ConcatCpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
+                               const std::vector<KernelTensorPtr> &outputs,
+                               const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost) {
+  if (int ret = KernelMod::Resize(base_operator, inputs, outputs, inputsOnHost); ret != KRET_OK) {
+    return ret;
+  }
+  inputs_shape_.clear();
+  for (size_t i = 0; i < inputs.size(); ++i) {
+    inputs_shape_.push_back(inputs[i]->GetShapeVector());
+  }
+  axis_ = ori_axis_;
+  if (axis_ < 0) {
+    axis_ = axis_ + SizeToInt(inputs_shape_[0].size());
+  }
+  return KRET_OK;
 }

 template <typename T>
 bool ConcatCpuKernelMod::LaunchKernel(const std::vector<kernel::AddressPtr> &inputs,
                                      const std::vector<kernel::AddressPtr> &outputs) {
-  auto node_ = cnode_ptr_.lock();
-  if (!node_) {
-    MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', cnode_ptr_(kernel_node) is expired. Error no: " << node_;
-  }
-  const size_t input_num = common::AnfAlgo::GetInputTensorNum(node_);
+  const size_t input_num = inputs.size();
  CHECK_KERNEL_INPUTS_NUM(inputs.size(), input_num, kernel_name_);
  CHECK_KERNEL_OUTPUTS_NUM(outputs.size(), kConcatOutputsNum, kernel_name_);

  std::vector<ShapeVector> input_flat_shape_list;
  input_flat_shape_list.reserve(input_num);
  for (size_t i = 0; i < input_num; i++) {
-    auto input_shape_i = common::AnfAlgo::GetPrevNodeOutputInferShape(node_, i);
+    auto input_shape_i = inputs_shape_[i];
    auto flat_shape = CPUKernelUtils::FlatShapeByAxis(input_shape_i, axis_);
    (void)input_flat_shape_list.emplace_back(flat_shape);
  }
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/concat_cpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/concat_cpu_kernel.h
@ -14,12 +14,13 @@
 * limitations under the License.
 */

-#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CONCAT_CPU_KERNEL_H_
-#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CONCAT_CPU_KERNEL_H_
+#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_CONCAT_CPU_KERNEL_H_
+#define MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_CONCAT_CPU_KERNEL_H_

 #include <vector>
 #include <utility>
 #include <complex>
+#include <map>

 #include "plugin/device/cpu/kernel/cpu_kernel.h"
 #include "plugin/factory/ms_factory.h"
@ -29,12 +30,16 @@ namespace kernel {
 using complex64 = std::complex<float>;
 using complex128 = std::complex<double>;

-class ConcatCpuKernelMod : public DeprecatedNativeCpuKernelMod {
+class ConcatCpuKernelMod : public NativeCpuKernelMod {
 public:
  ConcatCpuKernelMod() = default;
  ~ConcatCpuKernelMod() override = default;

-  void InitKernel(const CNodePtr &kernel_node) override;
+  bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
+            const std::vector<KernelTensorPtr> &outputs) override;
+
+  int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
+             const std::vector<KernelTensorPtr> &outputs, const std::map<uint32_t, tensor::TensorPtr> &) override;

  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
              const std::vector<AddressPtr> &outputs) override {
@ -50,9 +55,11 @@ class ConcatCpuKernelMod : public DeprecatedNativeCpuKernelMod {
                                        const std::vector<kernel::AddressPtr> &)>;
  static std::vector<std::pair<KernelAttr, ConcatFunc>> func_list_;
  ConcatFunc kernel_func_;
+  int ori_axis_{0};
  int axis_{0};
+  std::vector<ShapeVector> inputs_shape_;
 };
 }  // namespace kernel
 }  // namespace mindspore

-#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_CPU_CONCAT_CPU_KERNEL_H_
+#endif  // MINDSPORE_CCSRC_PLUGIN_DEVICE_CPU_KERNEL_CONCAT_CPU_KERNEL_H_
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/concatv2_gpu_kernel.cc
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/concatv2_gpu_kernel.cc
@ -14,6 +14,8 @@
 * limitations under the License.
 */

+#include <utility>
+#include <map>
 #include "plugin/device/gpu/kernel/arrays/concatv2_gpu_kernel.h"
 #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/complex.h"
 namespace mindspore {
@ -21,50 +23,138 @@ namespace kernel {
 template <typename T>
 using Complex = mindspore::utils::Complex<T>;

-MS_REG_GPU_KERNEL_ONE(
-  Concat, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeComplex128).AddOutputAttr(kNumberTypeComplex128),
-  ConcatV2FwdGpuKernelMod, Complex<double>)
-MS_REG_GPU_KERNEL_ONE(
-  Concat, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeComplex64).AddOutputAttr(kNumberTypeComplex64),
-  ConcatV2FwdGpuKernelMod, Complex<float>)
-MS_REG_GPU_KERNEL_ONE(
-  Concat, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
-  ConcatV2FwdGpuKernelMod, double)
-MS_REG_GPU_KERNEL_ONE(
-  Concat, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
-  ConcatV2FwdGpuKernelMod, float)
-MS_REG_GPU_KERNEL_ONE(
-  Concat, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
-  ConcatV2FwdGpuKernelMod, half)
+const std::vector<std::pair<KernelAttr, ConcatV2FwdGpuKernelMod::KernelRunFunc>> &ConcatV2FwdGpuKernelMod::GetFuncList()
+  const {
+  static const std::vector<std::pair<KernelAttr, ConcatV2FwdGpuKernelMod::KernelRunFunc>> func_list = {
+    {KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeComplex128).AddOutputAttr(kNumberTypeComplex128),
+     &ConcatV2FwdGpuKernelMod::LaunchKernel<Complex<double>>},
+    {KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeComplex64).AddOutputAttr(kNumberTypeComplex64),
+     &ConcatV2FwdGpuKernelMod::LaunchKernel<Complex<float>>},
+    {KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+     &ConcatV2FwdGpuKernelMod::LaunchKernel<double>},
+    {KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
+     &ConcatV2FwdGpuKernelMod::LaunchKernel<float>},
+    {KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
+     &ConcatV2FwdGpuKernelMod::LaunchKernel<half>},
+    {KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
+     &ConcatV2FwdGpuKernelMod::LaunchKernel<int64_t>},
+    {KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
+     &ConcatV2FwdGpuKernelMod::LaunchKernel<int32_t>},
+    {KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16),
+     &ConcatV2FwdGpuKernelMod::LaunchKernel<int16_t>},
+    {KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8),
+     &ConcatV2FwdGpuKernelMod::LaunchKernel<char>},
+    {KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt64).AddOutputAttr(kNumberTypeUInt64),
+     &ConcatV2FwdGpuKernelMod::LaunchKernel<uint64_t>},
+    {KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt32).AddOutputAttr(kNumberTypeUInt32),
+     &ConcatV2FwdGpuKernelMod::LaunchKernel<uint>},
+    {KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt16).AddOutputAttr(kNumberTypeUInt16),
+     &ConcatV2FwdGpuKernelMod::LaunchKernel<uint16_t>},
+    {KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
+     &ConcatV2FwdGpuKernelMod::LaunchKernel<uchar>},
+    {KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool),
+     &ConcatV2FwdGpuKernelMod::LaunchKernel<bool>}};
+  return func_list;
+}

-MS_REG_GPU_KERNEL_ONE(Concat,
-                      KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeInt64).AddOutputAttr(kNumberTypeInt64),
-                      ConcatV2FwdGpuKernelMod, int64_t)
-MS_REG_GPU_KERNEL_ONE(Concat,
-                      KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeInt32).AddOutputAttr(kNumberTypeInt32),
-                      ConcatV2FwdGpuKernelMod, int)
-MS_REG_GPU_KERNEL_ONE(Concat,
-                      KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeInt16).AddOutputAttr(kNumberTypeInt16),
-                      ConcatV2FwdGpuKernelMod, short)  // NOLINT
-MS_REG_GPU_KERNEL_ONE(Concat,
-                      KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeInt8).AddOutputAttr(kNumberTypeInt8),
-                      ConcatV2FwdGpuKernelMod, char)
+bool ConcatV2FwdGpuKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+                                     const std::vector<AddressPtr> &outputs, void *stream_ptr) {
+  stream_ptr_ = stream_ptr;
+  return kernel_func_(this, inputs, workspace, outputs);
+}

-MS_REG_GPU_KERNEL_ONE(
-  Concat, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt64).AddOutputAttr(kNumberTypeUInt64),
-  ConcatV2FwdGpuKernelMod, uint64_t)
-MS_REG_GPU_KERNEL_ONE(
-  Concat, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt32).AddOutputAttr(kNumberTypeUInt32),
-  ConcatV2FwdGpuKernelMod, uint)
-MS_REG_GPU_KERNEL_ONE(
-  Concat, KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt16).AddOutputAttr(kNumberTypeUInt16),
-  ConcatV2FwdGpuKernelMod, uint16_t)
-MS_REG_GPU_KERNEL_ONE(Concat,
-                      KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeUInt8).AddOutputAttr(kNumberTypeUInt8),
-                      ConcatV2FwdGpuKernelMod, uchar)
+template <typename T>
+bool ConcatV2FwdGpuKernelMod::LaunchKernel(const std::vector<AddressPtr> &inputs,
+                                           const std::vector<AddressPtr> &workspace,
+                                           const std::vector<AddressPtr> &outputs) {
+  if (input_num_ == 0) {
+    return true;
+  }
+  T *output = GetDeviceAddress<T>(outputs, 0);
+  T **inputs_device = GetDeviceAddress<T *>(workspace, 0);
+  int *len_axis_device = GetDeviceAddress<int>(workspace, 1);
+  for (int i = 0; i < input_num_; i++) {
+    auto input_index = not_null_input_index_[i];
+    inputs_host_[i] = GetDeviceAddress<T>(inputs, input_index);
+  }
+  CHECK_CUDA_RET_WITH_ERROR_NOTRACE(
+    cudaMemcpyAsync(inputs_device, inputs_host_.data(), sizeof(T *) * input_num_, cudaMemcpyHostToDevice,
+                    reinterpret_cast<cudaStream_t>(stream_ptr_)),
+    "ConcatV2 opt cudaMemcpyAsync inputs failed");
+  CHECK_CUDA_RET_WITH_ERROR_NOTRACE(
+    cudaMemcpyAsync(len_axis_device, len_axis_.data(), sizeof(int) * input_num_, cudaMemcpyHostToDevice,
+                    reinterpret_cast<cudaStream_t>(stream_ptr_)),
+    "ConcatV2 opt cudaMemcpyAsync length on axis failed");
+  output_size_ = output_size_list_[0] / sizeof(T);
+  ConcatKernel(output_size_, input_num_, all_size_before_axis_, all_size_axis_, len_axis_device, inputs_device, output,
+               reinterpret_cast<cudaStream_t>(stream_ptr_));
+  return true;
+}

-MS_REG_GPU_KERNEL_ONE(Concat,
-                      KernelAttr().AddAllSameAttr(true).AddInputAttr(kNumberTypeBool).AddOutputAttr(kNumberTypeBool),
-                      ConcatV2FwdGpuKernelMod, bool)
+bool ConcatV2FwdGpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
+                                   const std::vector<KernelTensorPtr> &outputs) {
+  if (!MatchKernelFunc(base_operator, inputs, outputs)) {
+    return false;
+  }
+  kernel_name_ = base_operator->name();
+  auto prim = base_operator->GetPrim();
+  MS_EXCEPTION_IF_NULL(prim);
+  ori_axis_ = GetValue<int64_t>(prim->GetAttr("axis"));
+  origin_data_format_ = GetValue<std::string>(prim->GetAttr("operator_origin_format"));
+  len_axis_.resize(inputs.size());
+  return true;
+}
+
+int ConcatV2FwdGpuKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
+                                    const std::vector<KernelTensorPtr> &outputs,
+                                    const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost) {
+  if (int ret = KernelMod::Resize(base_operator, inputs, outputs, inputsOnHost); ret != KRET_OK) {
+    return ret;
+  }
+  auto input_0_shape = inputs[0]->GetDeviceShapeAdaptively();
+  int dims = SizeToInt(input_0_shape.size());
+  axis_ = ori_axis_;
+  if (axis_ < -dims || axis_ >= dims) {
+    MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'axis' must be in the range [-" << dims << "," << dims
+                      << "), but got " << axis_;
+  }
+  if (axis_ < 0) {
+    axis_ += dims;
+  }
+  auto input_format = mindspore::FormatEnumToString(inputs[0]->GetFormat());
+  axis_ = AxisTransform(origin_data_format_, input_format, axis_);
+
+  not_null_input_index_.clear();
+  len_axis_.clear();
+  input_num_ = inputs.size();
+  for (int i = 0; i < input_num_; i++) {
+    auto input_shape = inputs[i]->GetDeviceShapeAdaptively();
+    auto is_null_input = CHECK_NULL_INPUT(input_shape);
+    if (!is_null_input) {
+      not_null_input_index_.push_back(i);
+      len_axis_.push_back(LongToInt(input_shape[axis_]));
+    }
+  }
+  input_num_ = not_null_input_index_.size();
+  workspace_size_list_.push_back(sizeof(void *) * input_num_);
+  workspace_size_list_.push_back(sizeof(int) * input_num_);
+  inputs_host_.resize(input_num_);
+
+  auto output_shape = outputs[0]->GetDeviceShapeAdaptively();
+  all_size_before_axis_ = 1;
+  all_size_axis_ = 1;
+  for (int i = 0; i < SizeToInt(output_shape.size()); i++) {
+    if (i > axis_) {
+      all_size_before_axis_ *= LongToInt(output_shape[i]);
+      all_size_axis_ *= LongToInt(output_shape[i]);
+    }
+    if (i == axis_) {
+      all_size_before_axis_ *= LongToInt(output_shape[i]);
+    }
+  }
+  return KRET_OK;
+}
+
+MS_KERNEL_FACTORY_REG(NativeGpuKernelMod, Concat, ConcatV2FwdGpuKernelMod);
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/concatv2_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/concatv2_gpu_kernel.h
@ -14,152 +14,58 @@
 * limitations under the License.
 */

-#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_CONCATV2_GPU_KERNEL_H_
-#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_CONCATV2_GPU_KERNEL_H_
+#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_ARRAYS_CONCATV2_GPU_KERNEL_H_
+#define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_ARRAYS_CONCATV2_GPU_KERNEL_H_

 #include <vector>
 #include <string>
 #include <memory>
+#include <utility>
+#include <map>
 #include "plugin/device/gpu/kernel/gpu_kernel.h"
 #include "plugin/device/gpu/kernel/gpu_kernel_factory.h"
 #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/concatv2_impl.cuh"

 namespace mindspore {
 namespace kernel {
-template <typename T>
-class ConcatV2FwdGpuKernelMod : public DeprecatedNativeGpuKernelMod {
+class ConcatV2FwdGpuKernelMod : public NativeGpuKernelMod, public MatchKernelHelper<ConcatV2FwdGpuKernelMod> {
 public:
-  ConcatV2FwdGpuKernelMod()
-      : axis_(0),
-        input_num_(1),
-        output_size_(0),
-        all_size_before_axis_(1),
-        all_size_axis_(1),
-        kernel_name_("ConcatV2"),
-        inputs_host_(nullptr),
-        len_axis_(nullptr) {}
+  ConcatV2FwdGpuKernelMod() = default;
  ~ConcatV2FwdGpuKernelMod() override = default;

+  const std::vector<std::pair<KernelAttr, KernelRunFunc>> &GetFuncList() const override;
+
+  bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
+            const std::vector<KernelTensorPtr> &outputs) override;
+
+  int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
+             const std::vector<KernelTensorPtr> &outputs,
+             const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost) override;
+
  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
-              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
-    if (input_num_ == 0) {
-      return true;
-    }
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override;

-    T *output = GetDeviceAddress<T>(outputs, 0);
-    T **inputs_device = GetDeviceAddress<T *>(workspace, 0);
-    int *len_axis_device = GetDeviceAddress<int>(workspace, 1);
-    int current_dim = 0;
-    for (size_t i = 0; i < inputs.size(); i++) {
-      T *input = GetPossiblyNullDeviceAddress<T>(inputs, i);
-      if (input != nullptr) {
-        inputs_host_[current_dim] = input;
-        current_dim++;
-      }
-    }
-    CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
-                               cudaMemcpyAsync(inputs_device, inputs_host_.get(), sizeof(T *) * input_num_,
-                                               cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)),
-                               "ConcatV2 opt cudaMemcpyAsync inputs failed");
-    CHECK_CUDA_RET_WITH_EXCEPT(kernel_node_,
-                               cudaMemcpyAsync(len_axis_device, len_axis_.get(), sizeof(int) * input_num_,
-                                               cudaMemcpyHostToDevice, reinterpret_cast<cudaStream_t>(stream_ptr)),
-                               "ConcatV2 opt cudaMemcpyAsync length on axis failed");
-    ConcatKernel(output_size_, input_num_, all_size_before_axis_, all_size_axis_, len_axis_device, inputs_device,
-                 output, reinterpret_cast<cudaStream_t>(stream_ptr));
-    return true;
-  }
-  bool Init(const CNodePtr &kernel_node) override {
-    kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
-    kernel_node_ = kernel_node;
-    if (!CheckParam(kernel_node)) {
-      return false;
-    }
-    auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, 0);
-    int dims = SizeToInt(input_shape.size());
-    axis_ = static_cast<int>(GetAttr<int64_t>(kernel_node, "axis"));
-    if (axis_ < -dims || axis_ >= dims) {
-      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the 'axis' must be in the range [-" << dims << "," << dims
-                        << "), but got " << axis_;
-    }
-    if (axis_ < 0) {
-      axis_ += dims;
-    }
-    auto origin_data_format = AnfAlgo::GetOriginDataFormat(kernel_node);
-    auto input_format = AnfAlgo::GetInputFormat(kernel_node, 0);
-    axis_ = AxisTransform(origin_data_format, input_format, axis_);
-
-    input_num_ = SizeToInt(common::AnfAlgo::GetInputTensorNum(kernel_node));
-    inputs_host_ = std::make_unique<T *[]>(input_num_);
-    len_axis_ = std::make_unique<int[]>(input_num_);
-    int current_dim = 0;
-    for (int i = 0; i < input_num_; i++) {
-      size_t input_size = 1;
-      auto input_shape = AnfAlgo::GetInputDeviceShapeAdaptively(kernel_node, i);
-      for (size_t j = 0; j < input_shape.size(); j++) {
-        input_size *= static_cast<size_t>(input_shape[j]);
-      }
-
-      if (input_size == 0) {
-        input_num_--;
-      } else {
-        input_size_list_.push_back(input_size * sizeof(T));
-        len_axis_[current_dim] = LongToInt(input_shape[axis_]);
-        current_dim++;
-      }
-    }
-    workspace_size_list_.push_back(sizeof(T *) * input_num_);
-    workspace_size_list_.push_back(sizeof(int) * input_num_);
-
-    auto output_shape = AnfAlgo::GetOutputDeviceShape(kernel_node, 0);
-    output_size_ = SizeOf(output_shape);
-    for (int i = 0; i < SizeToInt(output_shape.size()); i++) {
-      if (i > axis_) {
-        all_size_before_axis_ *= LongToInt(output_shape[i]);
-        all_size_axis_ *= LongToInt(output_shape[i]);
-      }
-      if (i == axis_) {
-        all_size_before_axis_ *= LongToInt(output_shape[i]);
-      }
-    }
-    output_size_list_.push_back(output_size_ * sizeof(T));
-    InitSizeLists();
-    return true;
-  }
-
-  void ResetResource() noexcept override {
-    ResetSizeLists();
-    axis_ = 0;
-    input_num_ = 1;
-    output_size_ = 0;
-    all_size_before_axis_ = 1;
-    all_size_axis_ = 1;
-    kernel_name_ = "ConcatV2";
-    inputs_host_ = nullptr;
-    len_axis_ = nullptr;
-  }
-
- protected:
-  void InitSizeLists() override {}
+  std::vector<KernelAttr> GetOpSupport() override { return OpSupport(); }

 private:
-  bool CheckParam(const CNodePtr &kernel_node) {
-    size_t output_num = common::AnfAlgo::GetOutputTensorNum(kernel_node);
-    if (output_num != 1) {
-      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of outputs must be 1, but got " << output_num;
-    }
-    return true;
-  }
-  int axis_;
-  int input_num_;
-  size_t output_size_;
-  int all_size_before_axis_;
-  int all_size_axis_;
-  std::string kernel_name_;
-  std::unique_ptr<T *[]> inputs_host_;
-  std::unique_ptr<int[]> len_axis_;
+  template <typename T>
+  bool LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+                    const std::vector<AddressPtr> &outputs);
+
+  int axis_{0};
+  int ori_axis_{0};
+  int input_num_{1};
+  size_t output_size_{0};
+  int all_size_before_axis_{1};
+  int all_size_axis_{1};
+  std::string kernel_name_{"ConcatV2"};
+  std::vector<int> not_null_input_index_;
+  std::vector<int> len_axis_;
+  std::vector<void *> inputs_host_;
+  std::string origin_data_format_;
+  void *stream_ptr_{nullptr};
 };
 }  // namespace kernel
 }  // namespace mindspore

-#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_ARRAYS_CONCATV2_GPU_KERNEL_H_
+#endif  // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_ARRAYS_CONCATV2_GPU_KERNEL_H_
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/slice_gpu_kernel.cc
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/arrays/slice_gpu_kernel.cc
@ -189,6 +189,7 @@ bool SliceGpuKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std:
 bool SliceGpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
                             const std::vector<KernelTensorPtr> &outputs) {
  auto kernel_ptr = std::dynamic_pointer_cast<ops::Slice>(base_operator);
+  MS_EXCEPTION_IF_NULL(kernel_ptr);
  kernel_name_ = kernel_ptr->name();
  auto tensor_attr = GetKernelAttrFromTensors(inputs, outputs);
  auto [is_match, index] = MatchKernelAttr(tensor_attr, GetOpSupport());
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/adaptive_avg_pool2d_gpu_kernel.cc
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/adaptive_avg_pool2d_gpu_kernel.cc
@ -15,17 +15,92 @@
 */

 #include "plugin/device/gpu/kernel/nn/adaptive_avg_pool2d_gpu_kernel.h"
+#include "ops/adaptive_avg_pool_2d.h"

 namespace mindspore {
 namespace kernel {
-MS_REG_GPU_KERNEL_ONE(AdaptiveAvgPool2D,
-                      KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
-                      AdaptiveAvgPool2DKernelMod, half)
-MS_REG_GPU_KERNEL_ONE(AdaptiveAvgPool2D,
-                      KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
-                      AdaptiveAvgPool2DKernelMod, float)
-MS_REG_GPU_KERNEL_ONE(AdaptiveAvgPool2D,
-                      KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
-                      AdaptiveAvgPool2DKernelMod, double)
+constexpr uint kNumberTwo = 2;
+constexpr uint kNumberThree = 2;
+
+using KernelRunFunc = AdaptiveAvgPool2DKernelMod::KernelRunFunc;
+const std::vector<std::pair<KernelAttr, KernelRunFunc>> &AdaptiveAvgPool2DKernelMod::GetFuncList() const {
+  static const std::vector<std::pair<KernelAttr, KernelRunFunc>> func_list = {
+    {KernelAttr().AddInputAttr(kNumberTypeFloat16).AddOutputAttr(kNumberTypeFloat16),
+     &AdaptiveAvgPool2DKernelMod::LaunchKernel<half>},
+    {KernelAttr().AddInputAttr(kNumberTypeFloat32).AddOutputAttr(kNumberTypeFloat32),
+     &AdaptiveAvgPool2DKernelMod::LaunchKernel<float>},
+    {KernelAttr().AddInputAttr(kNumberTypeFloat64).AddOutputAttr(kNumberTypeFloat64),
+     &AdaptiveAvgPool2DKernelMod::LaunchKernel<double>}};
+  return func_list;
+}
+
+template <typename T>
+bool AdaptiveAvgPool2DKernelMod::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+                                              const std::vector<AddressPtr> &outputs) {
+  if (is_null_input_) {
+    return true;
+  }
+  T *input_addr = GetDeviceAddress<T>(inputs, 0);
+  T *output_addr = GetDeviceAddress<T>(outputs, 0);
+
+  ApplyAdaptiveAvgPool2D(size_, input_height_, input_width_, output_height_, output_width_, input_addr, output_addr,
+                         reinterpret_cast<cudaStream_t>(stream_ptr_));
+
+  return true;
+}
+
+bool AdaptiveAvgPool2DKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+                                        const std::vector<AddressPtr> &outputs, void *stream_ptr) {
+  stream_ptr_ = stream_ptr;
+  return kernel_func_(this, inputs, workspace, outputs);
+}
+
+bool AdaptiveAvgPool2DKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
+                                      const std::vector<KernelTensorPtr> &outputs) {
+  if (!MatchKernelFunc(base_operator, inputs, outputs)) {
+    return false;
+  }
+  kernel_name_ = base_operator->name();
+  size_t input_num = inputs.size();
+  if (input_num != 1) {
+    MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs must be 1, but got " << input_num;
+  }
+  return true;
+}
+
+int AdaptiveAvgPool2DKernelMod::Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
+                                       const std::vector<KernelTensorPtr> &outputs,
+                                       const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost) {
+  if (int ret = KernelMod::Resize(base_operator, inputs, outputs, inputsOnHost); ret != KRET_OK) {
+    return ret;
+  }
+
+  auto input_shape = inputs[0]->GetShapeVector();
+  auto output_shape = outputs[0]->GetShapeVector();
+  is_null_input_ =
+    CHECK_SHAPE_NULL(input_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
+  if (is_null_input_) {
+    return KRET_OK;
+  }
+  len_ = static_cast<uint>(input_shape.size());
+  if (len_ < kNumberTwo) {
+    MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be less than " << kNumberTwo
+                      << ", but got " << len_;
+  }
+  input_height_ = static_cast<uint>(input_shape[len_ - kNumberTwo]);
+  input_width_ = static_cast<uint>(input_shape[len_ - 1]);
+  size_ = static_cast<uint>(len_ == kNumberThree ? input_shape[0] : input_shape[0] * input_shape[1]);
+
+  uint out_len = static_cast<uint>(output_shape.size());
+  if (out_len < kNumberTwo) {
+    MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of output cannot be less than " << kNumberTwo
+                      << ", but got " << out_len;
+  }
+  output_height_ = static_cast<uint>(output_shape[out_len - kNumberTwo]);
+  output_width_ = static_cast<uint>(output_shape[out_len - 1]);
+  return KRET_OK;
+}
+
+MS_KERNEL_FACTORY_REG(NativeGpuKernelMod, AdaptiveAvgPool2D, AdaptiveAvgPool2DKernelMod);
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/nn/adaptive_avg_pool2d_gpu_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/nn/adaptive_avg_pool2d_gpu_kernel.h
@ -14,115 +14,56 @@
 * limitations under the License.
 */

-#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_ADAPTIVEAVGPOOL2D_GPU_KERNEL_H_
-#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_ADAPTIVEAVGPOOL2D_GPU_KERNEL_H_
+#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_NN_ADAPTIVE_AVG_POOL2D_GPU_KERNEL_H_
+#define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_NN_ADAPTIVE_AVG_POOL2D_GPU_KERNEL_H_

 #include <vector>
 #include <string>
 #include <algorithm>
+#include <utility>
+#include <map>
 #include "plugin/device/gpu/kernel/gpu_kernel.h"
 #include "plugin/device/gpu/kernel/gpu_kernel_factory.h"
 #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/adaptive_avg_pool2d_impl.cuh"

 namespace mindspore {
 namespace kernel {
-template <typename T>
-class AdaptiveAvgPool2DKernelMod : public DeprecatedNativeGpuKernelMod {
+
+class AdaptiveAvgPool2DKernelMod : public NativeGpuKernelMod, public MatchKernelHelper<AdaptiveAvgPool2DKernelMod> {
 public:
-  AdaptiveAvgPool2DKernelMod()
-      : input_size_(0),
-        output_size_(0),
-        len(0),
-        input_height(0),
-        input_width(0),
-        output_height(0),
-        output_width(0),
-        size(0),
-        is_null_input_(false),
-        kernel_name_("AdaptiveAvgPool2D") {}
+  AdaptiveAvgPool2DKernelMod() = default;
  ~AdaptiveAvgPool2DKernelMod() override = default;

-  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> & /*workspace*/,
-              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
-    if (is_null_input_) {
-      return true;
-    }
-    T *input_addr = GetDeviceAddress<T>(inputs, 0);
-    T *output_addr = GetDeviceAddress<T>(outputs, 0);
+  const std::vector<std::pair<KernelAttr, KernelRunFunc>> &GetFuncList() const override;

-    ApplyAdaptiveAvgPool2D(size, input_height, input_width, output_height, output_width, input_addr, output_addr,
-                           reinterpret_cast<cudaStream_t>(stream_ptr));
+  std::vector<KernelAttr> GetOpSupport() override { return OpSupport(); }

-    return true;
-  }
+  bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
+            const std::vector<KernelTensorPtr> &outputs) override;

-  bool Init(const CNodePtr &kernel_node) override {
-    kernel_name_ = common::AnfAlgo::GetCNodeName(kernel_node);
-    auto shape_addr = common::AnfAlgo::GetNodeAttr<std::vector<int64_t>>(kernel_node, "output_size");
-    kernel_node_ = kernel_node;
-    if (shape_addr.size() == 1) {
-      output_height = shape_addr[0];
-      output_width = shape_addr[0];
-    } else if (shape_addr.size() == 2) {
-      output_height = static_cast<uint>(shape_addr[0]);
-      output_width = static_cast<uint>(shape_addr[1]);
-    } else {
-      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the length of 'output_size' must be 1 or 2, but got "
-                        << shape_addr.size();
-    }
+  int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
+             const std::vector<KernelTensorPtr> &outputs,
+             const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost) override;

-    size_t input_num = common::AnfAlgo::GetInputTensorNum(kernel_node);
-    if (input_num != 1) {
-      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the number of inputs must be 1, but got " << input_num;
-    }
-
-    input_size_ = sizeof(T);
-    output_size_ = sizeof(T);
-
-    auto input_shape = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
-    auto output_shape = common::AnfAlgo::GetOutputInferShape(kernel_node, 0);
-    is_null_input_ =
-      CHECK_SHAPE_NULL(input_shape, kernel_name_, "input") || CHECK_SHAPE_NULL(output_shape, kernel_name_, "output");
-    if (is_null_input_ || AnfAlgo::IsShapesDynamic({input_shape, output_shape})) {
-      InitSizeLists();
-      return true;
-    }
-    len = static_cast<uint>(input_shape.size());
-
-    if (len < 2) {
-      MS_LOG(EXCEPTION) << "For '" << kernel_name_ << "', the dimension of input cannot be less than 2, but got "
-                        << len;
-    }
-
-    input_height = static_cast<uint>(input_shape[len - 2]);
-    input_width = static_cast<uint>(input_shape[len - 1]);
-    size = static_cast<uint>(len == 3 ? input_shape[0] : input_shape[0] * input_shape[1]);
-    input_size_ *= SizeOf(input_shape);
-    output_size_ *= SizeOf(output_shape);
-
-    InitSizeLists();
-    return true;
-  }
-
- protected:
-  void InitSizeLists() override {
-    input_size_list_.push_back(input_size_);
-    output_size_list_.push_back(output_size_);
-  }
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override;

 private:
-  size_t input_size_;
-  size_t output_size_;
-  uint len;
-  uint input_height;
-  uint input_width;
-  uint output_height;
-  uint output_width;
-  uint size;
-  bool is_null_input_;
-  std::string kernel_name_;
+  template <typename T>
+  bool LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+                    const std::vector<AddressPtr> &outputs);
+
+  uint len_{0};
+  uint input_height_{0};
+  uint input_width_{0};
+  uint output_height_{0};
+  uint output_width_{0};
+  uint size_{0};
+  bool is_null_input_{false};
+  std::string kernel_name_{"AdaptiveAvgPool2D"};
+  void *stream_ptr_{nullptr};
 };
 }  // namespace kernel
 }  // namespace mindspore

-#endif  // MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_NN_ADAPTIVEAVGPOOL2D_GPU_KERNEL_H_
+#endif  //  MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_NN_ADAPTIVE_AVG_POOL2D_GPU_KERNEL_H_
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/angle/angle_atom_energy_kernel.cc
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/angle/angle_atom_energy_kernel.cc
@ -15,19 +15,74 @@
 */

 #include "plugin/device/gpu/kernel/sponge/angle/angle_atom_energy_kernel.h"
+#include "ops/angle_atom_energy.h"

 namespace mindspore {
 namespace kernel {
-MS_REG_GPU_KERNEL_TWO(AngleAtomEnergy,
-                      KernelAttr()
-                        .AddInputAttr(kNumberTypeUInt32)
-                        .AddInputAttr(kNumberTypeFloat32)
-                        .AddInputAttr(kNumberTypeInt32)
-                        .AddInputAttr(kNumberTypeInt32)
-                        .AddInputAttr(kNumberTypeInt32)
-                        .AddInputAttr(kNumberTypeFloat32)
-                        .AddInputAttr(kNumberTypeFloat32)
-                        .AddOutputAttr(kNumberTypeFloat32),
-                      AngleAtomEnergyGpuKernelMod, float, int)
+using KernelRunFunc = AngleAtomEnergyGpuKernelMod::KernelRunFunc;
+const std::vector<std::pair<KernelAttr, KernelRunFunc>> &AngleAtomEnergyGpuKernelMod::GetFuncList() const {
+  static const std::vector<std::pair<KernelAttr, KernelRunFunc>> func_list = {
+    {KernelAttr()
+       .AddInputAttr(kNumberTypeUInt32)
+       .AddInputAttr(kNumberTypeFloat32)
+       .AddInputAttr(kNumberTypeInt32)
+       .AddInputAttr(kNumberTypeInt32)
+       .AddInputAttr(kNumberTypeInt32)
+       .AddInputAttr(kNumberTypeFloat32)
+       .AddInputAttr(kNumberTypeFloat32)
+       .AddOutputAttr(kNumberTypeFloat32),
+     &AngleAtomEnergyGpuKernelMod::LaunchKernel<float, int>}};
+  return func_list;
+}
+
+bool AngleAtomEnergyGpuKernelMod::Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
+                                       const std::vector<KernelTensorPtr> &outputs) {
+  if (!MatchKernelFunc(base_operator, inputs, outputs)) {
+    return false;
+  }
+  auto kernel_ptr = std::dynamic_pointer_cast<ops::AngleAtomEnergy>(base_operator);
+  MS_EXCEPTION_IF_NULL(kernel_ptr);
+  kernel_name_ = kernel_ptr->name();
+  angle_numbers_ = static_cast<int>(kernel_ptr->get_angle_numbers());
+  return true;
+}
+
+int AngleAtomEnergyGpuKernelMod::Resize(const BaseOperatorPtr &base_operator,
+                                        const std::vector<KernelTensorPtr> &inputs,
+                                        const std::vector<KernelTensorPtr> &outputs,
+                                        const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost) {
+  if (int ret = KernelMod::Resize(base_operator, inputs, outputs, inputsOnHost); ret != KRET_OK) {
+    return ret;
+  }
+  auto shape_uint_crd = inputs[0]->GetShapeVector();
+  ele_uint_crd_ = SizeOf(shape_uint_crd);
+  return KRET_OK;
+}
+
+template <typename T, typename T1>
+bool AngleAtomEnergyGpuKernelMod::LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
+                                               const std::vector<AddressPtr> &outputs) {
+  auto uint_crd_f = GetDeviceAddress<const T1>(inputs, 0);
+  auto scaler_f = GetDeviceAddress<T>(inputs, 1);
+  auto atom_a = GetDeviceAddress<const T1>(inputs, 2);
+  auto atom_b = GetDeviceAddress<const T1>(inputs, 3);
+  auto atom_c = GetDeviceAddress<const T1>(inputs, 4);
+  auto angle_k = GetDeviceAddress<T>(inputs, 5);
+  auto angle_theta0 = GetDeviceAddress<T>(inputs, 6);
+
+  auto ene = GetDeviceAddress<T>(outputs, 0);
+  AngleAtomEnergy(angle_numbers_, ele_uint_crd_, uint_crd_f, scaler_f, atom_a, atom_b, atom_c, angle_k, angle_theta0,
+                  ene, reinterpret_cast<cudaStream_t>(stream_ptr_));
+  return true;
+}
+
+bool AngleAtomEnergyGpuKernelMod::Launch(const std::vector<AddressPtr> &inputs,
+                                         const std::vector<AddressPtr> &workspace,
+                                         const std::vector<AddressPtr> &outputs, void *stream_ptr) {
+  stream_ptr_ = stream_ptr;
+  return kernel_func_(this, inputs, workspace, outputs);
+}
+
+MS_KERNEL_FACTORY_REG(NativeGpuKernelMod, AngleAtomEnergy, AngleAtomEnergyGpuKernelMod);
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/angle/angle_atom_energy_kernel.h
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/sponge/angle/angle_atom_energy_kernel.h
@ -14,87 +14,49 @@
 * limitations under the License.
 */

-#ifndef MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_ANGLE_ANGLE_ATOM_ENERGY_KERNEL_H_
-#define MINDSPORE_CCSRC_BACKEND_KERNEL_COMPILER_GPU_SPONGE_ANGLE_ANGLE_ATOM_ENERGY_KERNEL_H_
+#ifndef MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_SPONGE_ANGLE_ANGLE_ATOM_ENERGY_KERNEL_H_
+#define MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_SPONGE_ANGLE_ANGLE_ATOM_ENERGY_KERNEL_H_
 #include <cuda_runtime_api.h>
 #include <vector>
 #include <string>
 #include <map>
+#include <utility>
 #include "plugin/device/gpu/kernel/gpu_kernel.h"
 #include "plugin/device/gpu/kernel/gpu_kernel_factory.h"
 #include "plugin/device/gpu/kernel/cuda_impl/cuda_ops/cuda_common.h"
 #include "plugin/device/gpu/kernel/cuda_impl/sponge/angle/angle_atom_energy_impl.cuh"
+
 namespace mindspore {
 namespace kernel {
-template <typename T, typename T1>
-class AngleAtomEnergyGpuKernelMod : public DeprecatedNativeGpuKernelMod {
+class AngleAtomEnergyGpuKernelMod : public NativeGpuKernelMod, public MatchKernelHelper<AngleAtomEnergyGpuKernelMod> {
 public:
-  AngleAtomEnergyGpuKernelMod() : ele_uint_crd(1) {}
+  AngleAtomEnergyGpuKernelMod() = default;
  ~AngleAtomEnergyGpuKernelMod() override = default;

-  bool Init(const CNodePtr &kernel_node) override {
-    kernel_node_ = kernel_node;
-    angle_numbers = static_cast<int>(GetAttr<int64_t>(kernel_node, "angle_numbers"));
-    auto shape_uint_crd = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 0);
-    auto shape_scaler = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 1);
-    auto shape_atom_a = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 2);
-    auto shape_atom_b = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 3);
-    auto shape_atom_c = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 4);
-    auto shape_angle_k = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 5);
-    auto shape_angle_theta0 = common::AnfAlgo::GetPrevNodeOutputInferShape(kernel_node, 6);
+  const std::vector<std::pair<KernelAttr, KernelRunFunc>> &GetFuncList() const override;

-    ele_uint_crd *= SizeOf(shape_uint_crd);
-    ele_scaler *= SizeOf(shape_scaler);
-    ele_atom_a *= SizeOf(shape_atom_a);
-    ele_atom_b *= SizeOf(shape_atom_b);
-    ele_atom_c *= SizeOf(shape_atom_c);
-    ele_angle_k *= SizeOf(shape_angle_k);
-    ele_angle_theta0 *= SizeOf(shape_angle_theta0);
+  std::vector<KernelAttr> GetOpSupport() override { return OpSupport(); };

-    InitSizeLists();
-    return true;
-  }
+  bool Init(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
+            const std::vector<KernelTensorPtr> &outputs) override;

-  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &,
-              const std::vector<AddressPtr> &outputs, void *stream_ptr) override {
-    auto uint_crd_f = GetDeviceAddress<const T1>(inputs, 0);
-    auto scaler_f = GetDeviceAddress<T>(inputs, 1);
-    auto atom_a = GetDeviceAddress<const T1>(inputs, 2);
-    auto atom_b = GetDeviceAddress<const T1>(inputs, 3);
-    auto atom_c = GetDeviceAddress<const T1>(inputs, 4);
-    auto angle_k = GetDeviceAddress<T>(inputs, 5);
-    auto angle_theta0 = GetDeviceAddress<T>(inputs, 6);
+  int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
+             const std::vector<KernelTensorPtr> &outputs,
+             const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost) override;

-    auto ene = GetDeviceAddress<T>(outputs, 0);
-    AngleAtomEnergy(angle_numbers, ele_uint_crd, uint_crd_f, scaler_f, atom_a, atom_b, atom_c, angle_k, angle_theta0,
-                    ene, reinterpret_cast<cudaStream_t>(stream_ptr));
-    return true;
-  }
-
- protected:
-  void InitSizeLists() override {
-    input_size_list_.push_back(ele_uint_crd * sizeof(T1));
-    input_size_list_.push_back(ele_scaler * sizeof(T));
-    input_size_list_.push_back(ele_atom_a * sizeof(T1));
-    input_size_list_.push_back(ele_atom_b * sizeof(T1));
-    input_size_list_.push_back(ele_atom_c * sizeof(T1));
-    input_size_list_.push_back(ele_angle_k * sizeof(T));
-    input_size_list_.push_back(ele_angle_theta0 * sizeof(T));
-
-    output_size_list_.push_back(ele_uint_crd * sizeof(T));
-  }
+  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+              const std::vector<AddressPtr> &outputs, void *stream_ptr) override;

 private:
-  size_t ele_uint_crd = 1;
-  size_t ele_scaler = 1;
-  size_t ele_atom_a = 1;
-  size_t ele_atom_b = 1;
-  size_t ele_atom_c = 1;
-  size_t ele_angle_k = 1;
-  size_t ele_angle_theta0 = 1;
+  template <typename T, typename T1>
+  bool LaunchKernel(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
+                    const std::vector<AddressPtr> &outputs);

-  int angle_numbers;
+  int angle_numbers_{0};
+  size_t ele_uint_crd_{1};
+  void *stream_ptr_{nullptr};
 };
 }  // namespace kernel
 }  // namespace mindspore
-#endif
+
+#endif  // MINDSPORE_CCSRC_PLUGIN_DEVICE_GPU_KERNEL_SPONGE_ANGLE_ANGLE_ATOM_ENERGY_KERNEL_H_
--- a/mindspore/core/ops/adaptive_avg_pool_2d.h
+++ b/mindspore/core/ops/adaptive_avg_pool_2d.h
@ -0,0 +1,39 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CORE_OPS_ADAPTIVE_AVG_POOL_2D_H_
+#define MINDSPORE_CORE_OPS_ADAPTIVE_AVG_POOL_2D_H_
+#include <memory>
+#include <vector>
+
+#include "ops/base_operator.h"
+#include "mindapi/base/types.h"
+#include "ops/adaptive_avg_pool_2d_v1.h"
+
+namespace mindspore {
+namespace ops {
+constexpr auto kNameAdaptiveAvgPool2D = "AdaptiveAvgPool2D";
+class MIND_API AdaptiveAvgPool2D : public AdaptiveAvgPool2DV1 {
+ public:
+  MIND_API_BASE_MEMBER(AdaptiveAvgPool2D);
+
+  /// \brief Constructor.
+  AdaptiveAvgPool2D() : AdaptiveAvgPool2DV1(kNameAdaptiveAvgPool2D) {}
+};
+}  // namespace ops
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CORE_OPS_ADAPTIVE_AVG_POOL_2D_H_
--- a/mindspore/core/ops/adaptive_avg_pool_2d_v1.cc
+++ b/mindspore/core/ops/adaptive_avg_pool_2d_v1.cc
@ -15,6 +15,7 @@
 */

 #include "ops/adaptive_avg_pool_2d_v1.h"
+#include "ops/adaptive_avg_pool_2d.h"

 #include <algorithm>
 #include <set>
@ -34,10 +35,18 @@ abstract::ShapePtr AdaptiveAvgPool2DV1InferShape(const PrimitivePtr &primitive,
                                                 const std::vector<AbstractBasePtr> &input_args) {
  auto op_name = primitive->name();
  auto x_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(input_args[0]->BuildShape())[kShape];
-  const int64_t input_num_dims = SizeToLong(x_shape.size());
-  CheckAndConvertUtils::CheckInRange("dim of x", input_num_dims, kIncludeBoth, {3, 4}, op_name);
-  for (size_t i = 0; i < x_shape.size(); i++) {
-    CheckAndConvertUtils::CheckInteger(std::to_string(i) + "th dimension of x", x_shape[i], kGreaterEqual, 1, op_name);
+  if (!IsDynamicRank(x_shape)) {
+    const int64_t input_num_dims = SizeToLong(x_shape.size());
+    CheckAndConvertUtils::CheckInRange("dim of x", input_num_dims, kIncludeBoth, {3, 4}, op_name);
+  } else {
+    return std::make_shared<abstract::Shape>(x_shape);
+  }
+
+  if (!IsDynamicShape(x_shape)) {
+    for (size_t i = 0; i < x_shape.size(); i++) {
+      CheckAndConvertUtils::CheckInteger(std::to_string(i) + "th dimension of x", x_shape[i], kGreaterEqual, 1,
+                                         op_name);
+    }
  }

  const auto &output_size_ptr = primitive->GetAttr("output_size");
@ -67,6 +76,7 @@ TypePtr AdaptiveAvgPool2DV1InferType(const PrimitivePtr &primitive, const std::v
 }  // namespace

 MIND_API_OPERATOR_IMPL(AdaptiveAvgPool2DV1, BaseOperator);
+MIND_API_OPERATOR_IMPL(AdaptiveAvgPool2D, AdaptiveAvgPool2DV1);
 AbstractBasePtr AdaptiveAvgPool2DV1Infer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
                                         const std::vector<AbstractBasePtr> &input_args) {
  MS_EXCEPTION_IF_NULL(primitive);
@ -76,8 +86,8 @@ AbstractBasePtr AdaptiveAvgPool2DV1Infer(const abstract::AnalysisEnginePtr &, co
  auto shapes = AdaptiveAvgPool2DV1InferShape(primitive, input_args);
  return abstract::MakeAbstract(shapes, types);
 }
-
 REGISTER_PRIMITIVE_EVAL_IMPL(AdaptiveAvgPool2DV1, prim::kPrimAdaptiveAvgPool2DV1, AdaptiveAvgPool2DV1Infer, nullptr,
                             true);
+REGISTER_PRIMITIVE_EVAL_IMPL(AdaptiveAvgPool2D, prim::kPrimAdaptiveAvgPool2D, AdaptiveAvgPool2DV1Infer, nullptr, true);
 }  // namespace ops
 }  // namespace mindspore
--- a/mindspore/core/ops/adaptive_avg_pool_2d_v1.h
+++ b/mindspore/core/ops/adaptive_avg_pool_2d_v1.h
@ -18,6 +18,7 @@
 #define MINDSPORE_CORE_OPS_ADAPTIVE_AVG_POOL_2D_V1_H_
 #include <memory>
 #include <vector>
+#include <string>

 #include "ops/base_operator.h"
 #include "mindapi/base/types.h"
@ -29,6 +30,7 @@ class MIND_API AdaptiveAvgPool2DV1 : public BaseOperator {
 public:
  MIND_API_BASE_MEMBER(AdaptiveAvgPool2DV1);
  AdaptiveAvgPool2DV1() : BaseOperator(kNameAdaptiveAvgPool2DV1) { InitIOName({"x"}, {"y"}); }
+  explicit AdaptiveAvgPool2DV1(const std::string &kName) : BaseOperator(kName) { InitIOName({"x"}, {"y"}); }
 };
 abstract::AbstractBasePtr AdaptiveAvgPool2DV1Infer(const abstract::AnalysisEnginePtr &, const PrimitivePtr &primitive,
                                                   const std::vector<abstract::AbstractBasePtr> &input_args);
--- a/mindspore/core/ops/angle_atom_energy.cc
+++ b/mindspore/core/ops/angle_atom_energy.cc
@ -0,0 +1,105 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "ops/angle_atom_energy.h"
+
+#include <set>
+
+#include "ops/op_utils.h"
+#include "utils/check_convert_utils.h"
+#include "mindapi/src/helper.h"
+
+namespace mindspore {
+namespace ops {
+MIND_API_OPERATOR_IMPL(AngleAtomEnergy, BaseOperator);
+class AngleAtomEnergyInfer : public abstract::OpInferBase {
+ public:
+  BaseShapePtr InferShape(const PrimitivePtr &primitive,
+                          const std::vector<AbstractBasePtr> &input_args) const override {
+    auto prim_name = primitive->name();
+    (void)CheckAndConvertUtils::CheckInteger("input number", SizeToLong(input_args.size()), kEqual, kInputNum,
+                                             prim_name);
+    auto uint_crd_f_shape_ptr = input_args[kInputIndex0]->BuildShape();
+    auto uint_crd_f_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(uint_crd_f_shape_ptr)[kShape];
+    if (!IsDynamic(uint_crd_f_shape)) {
+      (void)CheckAndConvertUtils::CheckInteger("uint_crd_f_shape", SizeToLong(uint_crd_f_shape.size()), kEqual, kTwo,
+                                               prim_name);
+      (void)CheckAndConvertUtils::CheckInteger("uint_crd_f_shape[1]", SizeToLong(uint_crd_f_shape[1]), kEqual, kThree,
+                                               prim_name);
+    }
+    auto scaler_f_shape_ptr = input_args[kInputIndex1]->BuildShape();
+    auto scaler_f_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(scaler_f_shape_ptr)[kShape];
+    (void)CheckAndConvertUtils::CheckInteger("scaler_f_shape", SizeToLong(scaler_f_shape.size()), kEqual, 1, prim_name);
+    if (!IsDynamic(scaler_f_shape)) {
+      (void)CheckAndConvertUtils::CheckInteger("scaler_f_shape", SizeToLong(scaler_f_shape[0]), kEqual, kThree,
+                                               prim_name);
+    }
+    auto angle_numbers = GetValue<int64_t>(primitive->GetAttr("angle_numbers"));
+    for (size_t input_index = 2; input_index < kInputNum; ++input_index) {
+      auto cur_input_shape_ptr = input_args[input_index]->BuildShape();
+      auto cur_input_shape = CheckAndConvertUtils::ConvertShapePtrToShapeMap(cur_input_shape_ptr)[kShape];
+      (void)CheckAndConvertUtils::CheckInteger("input_dim", SizeToLong(cur_input_shape.size()), kEqual, 1, prim_name);
+      if (!IsDynamic(cur_input_shape)) {
+        (void)CheckAndConvertUtils::CheckInteger("input_shape", SizeToLong(cur_input_shape[0]), kEqual, angle_numbers,
+                                                 prim_name);
+      }
+    }
+    ShapeVector out_shape{uint_crd_f_shape[0]};
+    return std::make_shared<abstract::Shape>(out_shape);
+  }
+
+  TypePtr InferType(const PrimitivePtr &primitive, const std::vector<AbstractBasePtr> &input_args) const override {
+    auto prim_name = primitive->name();
+    std::set<TypePtr> uint32_type = {kUInt32};
+    auto uint_crd_f_dtype = input_args[kInputIndex0]->BuildType();
+    (void)CheckAndConvertUtils::CheckTensorTypeValid("uint_crd_f", uint_crd_f_dtype, uint32_type, prim_name);
+    std::set<TypePtr> float32_type = {kFloat32};
+    auto scaler_f_type = input_args[kInputIndex1]->BuildType();
+    (void)CheckAndConvertUtils::CheckTensorTypeValid("scaler_f", scaler_f_type, float32_type, prim_name);
+    std::set<TypePtr> int32_type = {kInt32};
+    auto atom_a_type = input_args[kInputIndex2]->BuildType();
+    (void)CheckAndConvertUtils::CheckTensorTypeValid("atom_a", atom_a_type, int32_type, prim_name);
+    auto atom_b_type = input_args[kInputIndex3]->BuildType();
+    (void)CheckAndConvertUtils::CheckTensorTypeValid("atom_b", atom_b_type, int32_type, prim_name);
+    auto atom_c_type = input_args[kInputIndex4]->BuildType();
+    (void)CheckAndConvertUtils::CheckTensorTypeValid("atom_c", atom_c_type, int32_type, prim_name);
+    auto angle_k_type = input_args[kInputIndex5]->BuildType();
+    (void)CheckAndConvertUtils::CheckTensorTypeValid("angle_k", angle_k_type, float32_type, prim_name);
+    auto angle_theta0_type = input_args[kInputIndex6]->BuildType();
+    (void)CheckAndConvertUtils::CheckTensorTypeValid("angle_theta0", angle_theta0_type, float32_type, prim_name);
+    return angle_k_type;
+  }
+
+ private:
+  static constexpr size_t kInputNum = 7;
+  static constexpr size_t kTwo = 2;
+  static constexpr size_t kThree = 3;
+};
+
+void AngleAtomEnergy::Init(const int64_t angle_numbers) { this->set_angle_numbers(angle_numbers); }
+
+void AngleAtomEnergy::set_angle_numbers(const int64_t angle_numbers) {
+  (void)this->AddAttr("angle_numbers", api::MakeValue(angle_numbers));
+}
+
+int64_t AngleAtomEnergy::get_angle_numbers() const {
+  auto value_ptr = GetAttr("angle_numbers");
+  return GetValue<int64_t>(value_ptr);
+}
+
+REGISTER_PRIMITIVE_OP_INFER_IMPL(AngleAtomEnergy, prim::kPrimAngleAtomEnergy, AngleAtomEnergyInfer, false);
+}  // namespace ops
+}  // namespace mindspore
--- a/mindspore/core/ops/angle_atom_energy.h
+++ b/mindspore/core/ops/angle_atom_energy.h
@ -0,0 +1,51 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_CORE_OPS_ANGLE_ATOM_ENERGY_H_
+#define MINDSPORE_CORE_OPS_ANGLE_ATOM_ENERGY_H_
+
+#include <map>
+#include <vector>
+#include <string>
+#include <memory>
+
+#include "ops/base_operator.h"
+#include "mindapi/base/types.h"
+
+namespace mindspore {
+namespace ops {
+constexpr auto kNameAngleAtomEnergy = "AngleAtomEnergy";
+/// \brief AngleAtomEnergy operation. Refer to Python API @ref mindspore.ops.AngleAtomEnergy for more details.
+class MIND_API AngleAtomEnergy : public BaseOperator {
+ public:
+  MIND_API_BASE_MEMBER(AngleAtomEnergy);
+  /// \brief Constructor.
+  AngleAtomEnergy() : BaseOperator(kNameAngleAtomEnergy) {
+    InitIOName({"uint_crd_f", "scaler_f", "atom_a", "atom_b", "atom_c", "angle_k", "angle_theta0"}, {"ene"});
+  }
+  /// \brief Init. Refer to the parameters of Python API @ref mindspore.ops.AngleAtomEnergy for the inputs.
+  void Init(const int64_t angle_numbers);
+  /// \brief Set angle_numbers.
+  void set_angle_numbers(const int64_t angle_numbers);
+  /// \brief Get angle_numbers.
+  ///
+  /// \return angle_numbers.
+  int64_t get_angle_numbers() const;
+};
+}  // namespace ops
+}  // namespace mindspore
+
+#endif  // MINDSPORE_CORE_OPS_ANGLE_ATOM_ENERGY_H_
--- a/mindspore/core/ops/core_ops.h
+++ b/mindspore/core/ops/core_ops.h
@ -741,6 +741,7 @@ GVAR_DEF(PrimitivePtr, kPrimApplyGradientDescent, std::make_shared<Primitive>("A
 GVAR_DEF(PrimitivePtr, kPrimApplyPowerSignD, std::make_shared<Primitive>("ApplyPowerSign"));
 GVAR_DEF(PrimitivePtr, kPrimAdaptiveAvgPool3D, std::make_shared<Primitive>("AdaptiveAvgPool3D"));
 GVAR_DEF(PrimitivePtr, kPrimAdaptiveAvgPool3DGrad, std::make_shared<Primitive>("AdaptiveAvgPool3DGrad"));
+GVAR_DEF(PrimitivePtr, kPrimAdaptiveAvgPool2D, std::make_shared<Primitive>("AdaptiveAvgPool2D"));
 GVAR_DEF(PrimitivePtr, kPrimAdaptiveAvgPool2DV1, std::make_shared<Primitive>("AdaptiveAvgPool2DV1"));
 GVAR_DEF(PrimitivePtr, kPrimAdaptiveAvgPool2DGradV1, std::make_shared<Primitive>("AdaptiveAvgPool2DGradV1"));
 GVAR_DEF(PrimitivePtr, kPrimBesselI0e, std::make_shared<Primitive>("BesselI0e"));
@ -1563,6 +1564,9 @@ GVAR_DEF(PrimitivePtr, kPrimAdamApplyOneWithDecayAssign, std::make_shared<Primit
 // OCR Ops
 GVAR_DEF(PrimitivePtr, kPrimOCRRecognitionPreHandle, std::make_shared<Primitive>("OCRRecognitionPreHandle"));

+// Sponge Ops
+GVAR_DEF(PrimitivePtr, kPrimAngleAtomEnergy, std::make_shared<Primitive>("AngleAtomEnergy"));
+
 class DoSignaturePrimitive : public Primitive {
 public:
  explicit DoSignaturePrimitive(const std::string &name, const ValuePtr &function)
--- a/mindspore/python/mindspore/ops/operations/nn_ops.py
+++ b/mindspore/python/mindspore/ops/operations/nn_ops.py
@ -211,80 +211,6 @@ class AdaptiveAvgPool3D(Primitive):
        self.init_prim_io_names(inputs=['x'], outputs=['y'])


-class AdaptiveAvgPool2D(PrimitiveWithInfer):
-    r"""
-    2D adaptive average pooling for temporal data.
-
-    Refer to :func:`mindspore.ops.adaptive_avg_pool2d` for more detail.
-
-    Supported Platforms:
-        ``GPU``
-
-    Examples:
-        >>> # case 1: output_size=(None, 2)
-        >>> input_x = Tensor(np.array([[[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
-        ...                             [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
-        ...                             [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]]]), mindspore.float32)
-        >>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2D((None, 2))
-        >>> output = adaptive_avg_pool_2d(input_x)
-        >>> print(output)
-        [[[[1.5 2.5]
-           [4.5 5.5]
-           [7.5 8.5]]
-          [[1.5 2.5]
-           [4.5 5.5]
-           [7.5 8.5]]
-          [[1.5 2.5]
-           [4.5 5.5]
-           [7.5 8.5]]]]
-        >>> # case 2: output_size=2
-        >>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2D(2)
-        >>> output = adaptive_avg_pool_2d(input_x)
-        >>> print(output)
-        [[[[3. 4.]
-           [6. 7.]]
-          [[3. 4.]
-           [6. 7.]]
-          [[3. 4.]
-           [6. 7.]]]]
-        >>> # case 3: output_size=(1, 2)
-        >>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2D((1, 2))
-        >>> output = adaptive_avg_pool_2d(input_x)
-        >>> print(output)
-        [[[[4.5 5.5]]
-          [[4.5 5.5]]
-          [[4.5 5.5]]]]
-    """
-
-    @prim_attr_register
-    def __init__(self, output_size):
-        """Initialize AdaptiveAvgPool2D."""
-        validator.check_value_type("output_size", output_size, [int, tuple], self.name)
-        if isinstance(output_size, tuple):
-            validator.check_int(len(output_size), 2, Rel.EQ, 'length of output_size', self.name)
-        self.output_size = (output_size, output_size) if isinstance(self.output_size, int) else output_size
-
-    def infer_shape(self, x_shape):
-        if len(x_shape) <= len(self.output_size):
-            raise ValueError("input_x {} dimension must be larger than output_size {} "
-                             "dimension".format(x_shape, self.output_size))
-        validator.check_int(len(x_shape), 5, Rel.LT, 'input_x_dimensions', self.name)
-        for input_x_dimension in x_shape:
-            validator.check_int(input_x_dimension, 0, Rel.GT, 'input_x dimension', self.name)
-        zipped = zip(self.output_size, x_shape[-len(self.output_size):])
-        out_size = [i if i is not None else j for i, j in zipped]
-        for item in out_size:
-            validator.check_value_type("item of output_size", item, [int], self.name)
-        self.add_prim_attr('output_size', out_size)
-        output_shape = x_shape[:len(x_shape) - len(out_size)] + out_size
-        return output_shape
-
-    def infer_dtype(self, x_dtype):
-        validator.check_tensor_dtype_valid("x_dtype", x_dtype, [mstype.float16, mstype.float32, mstype.float64],
-                                           self.name)
-        return x_dtype
-
-
 class AdaptiveAvgPool2DV1(Primitive):
    r"""
    AdaptiveAvgPool2DV1 operation.
@ -393,6 +319,57 @@ class AdaptiveAvgPool2DV1(Primitive):
        self.add_prim_attr('output_size', self.output_size)


+class AdaptiveAvgPool2D(AdaptiveAvgPool2DV1):
+    r"""
+    2D adaptive average pooling for temporal data.
+
+    Refer to :func:`mindspore.ops.adaptive_avg_pool2d` for more detail.
+
+    Supported Platforms:
+        ``GPU``
+
+    Examples:
+        >>> # case 1: output_size=(None, 2)
+        >>> input_x = Tensor(np.array([[[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
+        ...                             [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]],
+        ...                             [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0]]]]), mindspore.float32)
+        >>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2D((None, 2))
+        >>> output = adaptive_avg_pool_2d(input_x)
+        >>> print(output)
+        [[[[1.5 2.5]
+           [4.5 5.5]
+           [7.5 8.5]]
+          [[1.5 2.5]
+           [4.5 5.5]
+           [7.5 8.5]]
+          [[1.5 2.5]
+           [4.5 5.5]
+           [7.5 8.5]]]]
+        >>> # case 2: output_size=2
+        >>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2D(2)
+        >>> output = adaptive_avg_pool_2d(input_x)
+        >>> print(output)
+        [[[[3. 4.]
+           [6. 7.]]
+          [[3. 4.]
+           [6. 7.]]
+          [[3. 4.]
+           [6. 7.]]]]
+        >>> # case 3: output_size=(1, 2)
+        >>> adaptive_avg_pool_2d = ops.AdaptiveAvgPool2D((1, 2))
+        >>> output = adaptive_avg_pool_2d(input_x)
+        >>> print(output)
+        [[[[4.5 5.5]]
+          [[4.5 5.5]]
+          [[4.5 5.5]]]]
+    """
+
+    @prim_attr_register
+    def __init__(self, output_size):
+        """Initialize AdaptiveAvgPool2D."""
+        super(AdaptiveAvgPool2D, self).__init__(output_size)
+
+
 class AdaptiveMaxPool2D(Primitive):
    r"""
    AdaptiveMaxPool2D operation.
--- a/mindspore/python/mindspore/ops/operations/sponge_ops.py
+++ b/mindspore/python/mindspore/ops/operations/sponge_ops.py
@ -20,7 +20,7 @@ Note:

 import math

-from ..primitive import PrimitiveWithInfer, prim_attr_register
+from ..primitive import Primitive, PrimitiveWithInfer, prim_attr_register
 from ..._checkparam import Rel
 from ..._checkparam import Validator as validator
 from ...common import dtype as mstype
@ -1053,7 +1053,7 @@ class AngleEnergy(PrimitiveWithInfer):
        return angle_k_type


-class AngleAtomEnergy(PrimitiveWithInfer):
+class AngleAtomEnergy(Primitive):
    """
    Add the potential energy caused by angle terms to the total potential
    energy of each atom. Assume the number of angles is m and the
@ -1103,39 +1103,6 @@ class AngleAtomEnergy(PrimitiveWithInfer):
                                outputs=['ene'])
        self.add_prim_attr('angle_numbers', self.angle_numbers)

-    def infer_shape(self, uint_crd_f_shape, scaler_f_shape, atom_a_shape, atom_b_shape, atom_c_shape, angle_k_shape,
-                    angle_theta0_shape):
-        cls_name = self.name
-        n = uint_crd_f_shape[0]
-        m = self.angle_numbers
-        validator.check_int(len(uint_crd_f_shape), 2, Rel.EQ, "uint_crd_f_dim", cls_name)
-        validator.check_int(len(scaler_f_shape), 1, Rel.EQ, "scaler_f_dim", cls_name)
-        validator.check_int(len(atom_a_shape), 1, Rel.EQ, "atom_a_dim", cls_name)
-        validator.check_int(len(atom_b_shape), 1, Rel.EQ, "atom_b_dim", cls_name)
-        validator.check_int(len(atom_c_shape), 1, Rel.EQ, "atom_c_dim", cls_name)
-        validator.check_int(len(angle_k_shape), 1, Rel.EQ, "angle_k_dim", cls_name)
-        validator.check_int(len(angle_theta0_shape), 1, Rel.EQ, "angle_theta0_dim", cls_name)
-
-        validator.check_int(uint_crd_f_shape[1], 3, Rel.EQ, "uint_crd_f_shape[1]", cls_name)
-        validator.check_int(scaler_f_shape[0], 3, Rel.EQ, "scaler_f_shape", cls_name)
-        validator.check_int(atom_a_shape[0], m, Rel.EQ, "atom_a_shape", cls_name)
-        validator.check_int(atom_b_shape[0], m, Rel.EQ, "atom_b_shape", cls_name)
-        validator.check_int(atom_c_shape[0], m, Rel.EQ, "atom_c_shape", cls_name)
-        validator.check_int(angle_k_shape[0], m, Rel.EQ, "angle_k_shape", cls_name)
-        validator.check_int(angle_theta0_shape[0], m, Rel.EQ, "angle_theta0_shape", cls_name)
-        return [n,]
-
-    def infer_dtype(self, uint_crd_f_dtype, scaler_f_type, atom_a_type, atom_b_type, atom_c_type, angle_k_type,
-                    angle_theta0_type):
-        validator.check_tensor_dtype_valid('uint_crd_f', uint_crd_f_dtype, [mstype.uint32], self.name)
-        validator.check_tensor_dtype_valid('scaler_f', scaler_f_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('atom_a', atom_a_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('atom_b', atom_b_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('atom_c', atom_c_type, [mstype.int32], self.name)
-        validator.check_tensor_dtype_valid('angle_k', angle_k_type, [mstype.float32], self.name)
-        validator.check_tensor_dtype_valid('angle_theta0', angle_theta0_type, [mstype.float32], self.name)
-        return angle_k_type
-

 class AngleForceWithAtomEnergy(PrimitiveWithInfer):
    """