optimize infer

2024-03-05 14:18:15 +08:00 · 2024-03-05 14:18:15 +08:00 · c0421eec03
parent 56ddd6c010
commit c0421eec03
55 changed files with 81 additions and 217 deletions
--- a/mindspore/ccsrc/backend/graph_compiler/backend.cc
+++ b/mindspore/ccsrc/backend/graph_compiler/backend.cc
@ -859,7 +859,7 @@ void MindRTBackend::RunGraphBySingleOp(const GraphCompilerInfo &graph_compiler_i
          MS_LOG(DEBUG) << "Run " << primitive->name() << " by pyboost";
          graph_compiler_->GetSingleOpInputTensors(kernel, op_output_map, parameter_index, inputs[graph_index], true,
                                                   &input_info);
-          kernel::pyboost::OpRunnerInfo op_runner_info{
+          runtime::OpRunnerInfo op_runner_info{
            primitive, device_target, input_info.input_values, input_info.input_abs, {}, kernel->abstract()};
          runtime::PyBoostOpExecute::GetInstance().RunPyBoostCall(&op_runner_info, &op_outputs);
        } else {
--- a/mindspore/ccsrc/kernel/pyboost/op_runner.h
+++ b/mindspore/ccsrc/kernel/pyboost/op_runner.h
@ -37,15 +37,6 @@ namespace pyboost {
 using GradFunc = std::function<void()>;
 constexpr size_t kAbstractCacheSize = 8192;
 struct OpRunnerInfo {
  const PrimitivePtr &prim;
  const std::string &device_target;
  const vector<ValuePtr> &inputs;
  const abstract::AbstractBasePtrList &inputs_abs;
  const std::vector<InputType> &inputs_mask;
  abstract::AbstractBasePtr output_abs;
 };
 // OpRunner is a base class for operators.
 // OpRunner records the operator's input abstract,
 // output abstract and output Tensors for grad,
@ -124,40 +115,18 @@ class BACKEND_EXPORT OpRunner : public std::enable_shared_from_this<OpRunner> {
  }
  template <typename... T>
-  void GenerateAbstract(T &...args) {
+  void GenerateAbstract(T &... args) {
    (input_abs_.emplace_back(ConvertAbstract(args)), ...);
  }
  // Member function for Infer and creating output tensors.
  template <typename... T>
-  void InferOutput(T &...args) {
+  void InferOutput(T &... args) {
    runtime::ProfilerRecorder profiler(runtime::ProfilerModule::kPynative, runtime::ProfilerEvent::kPyBoostInferOutput,
                                       primitive_->name(), false);
    (input_abs_.emplace_back(ConvertAbstract(args)), ...);
    output_abs_ = PyBoostUtils::InferByOpDef(primitive_, input_abs_);
    MS_EXCEPTION_IF_NULL(output_abs_);
    CreateOutput();
  }
  void InferOutput(OpRunnerInfo *op_runner_info) {
    MS_EXCEPTION_IF_NULL(op_runner_info);
    runtime::ProfilerRecorder profiler(runtime::ProfilerModule::kPynative, runtime::ProfilerEvent::kPyBoostInferOutput,
                                       primitive_->name(), false);
    if (op_runner_info->inputs_abs.empty()) {
      MS_LOG(EXCEPTION) << "Get empty input abstract";
    }
    input_abs_ = op_runner_info->inputs_abs;
    if (op_runner_info->output_abs == nullptr) {
      output_abs_ = PyBoostUtils::InferByOpDef(primitive_, input_abs_);
      MS_EXCEPTION_IF_NULL(output_abs_);
      op_runner_info->output_abs = output_abs_;
    } else {
      output_abs_ = op_runner_info->output_abs;
    }
    CreateOutput();
  }
  void CreateOutput() {
    MS_LOG(DEBUG) << "PyBoost infer output " << output_abs_->ToString();
    PyBoostUtils::CreateOutputTensor(output_abs_, &outputs_);
    abstract_cache_.Push(output_abs_);
@ -165,7 +134,7 @@ class BACKEND_EXPORT OpRunner : public std::enable_shared_from_this<OpRunner> {
  // A static function used for the "customize" operator to generate the operator's output Tensor.
  template <typename... T>
-  static void InferOpOutput(const std::shared_ptr<OpRunner> &op, T &...args) {
+  static void InferOpOutput(const std::shared_ptr<OpRunner> &op, T &... args) {
    runtime::ProfilerRecorder profiler(runtime::ProfilerModule::kPynative, runtime::ProfilerEvent::kPyBoostInferOutput,
                                       op->primitive()->name(), false);
    (op->input_abs_.emplace_back(ConvertAbstract(args)), ...);
@ -174,26 +143,6 @@ class BACKEND_EXPORT OpRunner : public std::enable_shared_from_this<OpRunner> {
    abstract_cache_.Push(op->output_abs_);
  }
  // A static function used for the "customize" operator to generate the operator's output Tensor for grad op.
  static void InferOpOutput(const std::shared_ptr<OpRunner> &op, OpRunnerInfo *op_runner_info) {
    MS_EXCEPTION_IF_NULL(op_runner_info);
    runtime::ProfilerRecorder profiler(runtime::ProfilerModule::kPynative, runtime::ProfilerEvent::kPyBoostInferOutput,
                                       op->primitive()->name(), false);
    if (op_runner_info->inputs_abs.empty()) {
      MS_LOG(EXCEPTION) << "Get empty input abstract";
    }
    op->input_abs_ = op_runner_info->inputs_abs;
    if (op_runner_info->output_abs == nullptr) {
      op->output_abs_ = PyBoostUtils::InferByOpDef(op->primitive(), op->input_abs_);
      MS_EXCEPTION_IF_NULL(op->output_abs_);
      op_runner_info->output_abs = op->output_abs_;
    } else {
      op->output_abs_ = op_runner_info->output_abs;
    }
    PyBoostUtils::CreateOutputTensor(op->output_abs_, &op->outputs_);
    abstract_cache_.Push(op->output_abs_);
  }
 protected:
  // Op primitive, may delete latter.
  PrimitivePtr primitive_{nullptr};
--- a/mindspore/ccsrc/kernel/pyboost/template/pyboost_op_header.tpl
+++ b/mindspore/ccsrc/kernel/pyboost/template/pyboost_op_header.tpl
@ -29,7 +29,7 @@ class BACKEND_EXPORT ${op_name} : public pyboost::OpRunner {
      : OpRunner(std::move(primitive), device_context) {}
  ~${op_name}() override = default;
-  virtual ${return_type} Call(${call_args}, OpRunnerInfo *op_run_info = nullptr) = 0;
+  virtual ${return_type} Call(${call_args}) = 0;
 protected:
  static const std::string &op_name() {return op_name_;}
--- a/mindspore/ccsrc/pipeline/pynative/grad/function/func_builder.cc
+++ b/mindspore/ccsrc/pipeline/pynative/grad/function/func_builder.cc
@ -132,7 +132,7 @@ NodePtr FuncBuilder::EmitOp(const PrimitivePtr &prim, const NodePtrList &inputs)
                << PyNativeAlgo::Common::PrintDebugInfo(op_inputs);
  MS_LOG(DEBUG) << "Get input abs size " << input_abs.size() << ", " << PyNativeAlgo::Common::PrintDebugInfo(input_abs);
  VectorRef outputs;
-  kernel::pyboost::OpRunnerInfo op_runner_info{prim, device_target_, op_inputs, input_abs, input_mask, nullptr};
+  runtime::OpRunnerInfo op_runner_info{prim, device_target_, op_inputs, input_abs, input_mask, nullptr};
  runtime::PyBoostOpExecute::GetInstance().Execute(&op_runner_info, &outputs);
  auto real_outputs = common::AnfAlgo::TransformVectorRefToMultiValue(outputs);
  MS_LOG(DEBUG) << "Get output value size " << real_outputs.size() << ", "
--- a/mindspore/ccsrc/pipeline/pynative/grad/function/func_grad.cc
+++ b/mindspore/ccsrc/pipeline/pynative/grad/function/func_grad.cc
@ -19,7 +19,6 @@
 #include <memory>
 #include <string>
 #include <vector>
 #include "kernel/pyboost/auto_generate/add.h"
 #include "include/common/utils/primitive_utils.h"
 #include "pipeline/pynative/pynative_utils.h"
 #include "ops/framework_ops.h"
--- a/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/hal/hardware/ge_kernel_executor.cc
@ -515,14 +515,14 @@ bool GeKernelExecutor::ExecuteKernelTask(const runtime::KernelTaskType &task_typ
    if (input_addr_list.size() != kCopyTaskInputsNum) {
      MS_LOG(EXCEPTION) << "input_addr_list.size() is invalid, input_addr_list.size():" << input_addr_list.size();
    }
-    kernel::pyboost::CustomizeCopyAscend(device_context_, input_addr_list[1], input_addr_list[0], stream_id, nullptr);
+    kernel::pyboost::CustomizeCopyAscend(device_context_, input_addr_list[1], input_addr_list[0], stream_id);
  } else {
    // For contiguous task, there must be at least one input and one output.
    if (input_addr_list.empty() || output_addr_list.empty()) {
      MS_LOG(EXCEPTION) << "input_addr_list.size() or output_addr_list.size() is invalid, input_addr_list.size():"
                        << input_addr_list.size() << ", output_addr_list.size():" << output_addr_list.size();
    }
-    kernel::pyboost::CustomizeCopyAscend(device_context_, input_addr_list[0], output_addr_list[0], stream_id, nullptr);
+    kernel::pyboost::CustomizeCopyAscend(device_context_, input_addr_list[0], output_addr_list[0], stream_id);
  }
  return true;
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/add.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/add.cc
@ -25,12 +25,7 @@ namespace mindspore {
 namespace kernel {
 namespace pyboost {
 tensor::TensorPtr AddAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor,
-                                     const TensorPtr &y_tensor, OpRunnerInfo *op_runner_info) {
+                                     const TensorPtr &y_tensor) {
  if (op_runner_info != nullptr) {
    OpRunner::InferOpOutput(op, op_runner_info);
  } else {
    OpRunner::InferOpOutput(op, x_tensor, y_tensor);
  }
  OpRunner::InferOpOutput(op, x_tensor, y_tensor);
  // No need to convert input
  PyBoostUtils::PrepareOpInputs(op->device_context(), op->stream_id(), x_tensor, y_tensor);
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/add.h
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/add.h
@ -27,7 +27,7 @@ namespace mindspore {
 namespace kernel {
 namespace pyboost {
 tensor::TensorPtr AddAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor,
-                                     const TensorPtr &y_tensor, OpRunnerInfo *op_runner_info);
+                                     const TensorPtr &y_tensor);
 }  // namespace pyboost
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/argmax_with_value.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/argmax_with_value.cc
@ -30,14 +30,8 @@ namespace pyboost {
 std::tuple<tensor::TensorPtr, tensor::TensorPtr> ArgMaxWithValueAscendCustomize(const std::shared_ptr<OpRunner> &op,
                                                                                const TensorPtr &input_tensor,
                                                                                const Int64ImmPtr &axis,
-                                                                                const BoolImmPtr &keep_dims,
+                                                                                const BoolImmPtr &keep_dims) {
-                                                                                OpRunnerInfo *op_runner_info) {
+  OpRunner::InferOpOutput(op, input_tensor, axis, keep_dims);
  if (op_runner_info != nullptr) {
    OpRunner::InferOpOutput(op, op_runner_info);
  } else {
    OpRunner::InferOpOutput(op, input_tensor, axis, keep_dims);
  }
  // Convert ValuePtr to c++ scalar
  auto axis_imm = GetValue<int64_t>(axis);
  auto keep_dims_imm = GetValue<bool>(keep_dims);
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/argmax_with_value.h
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/argmax_with_value.h
@ -31,8 +31,7 @@ namespace pyboost {
 std::tuple<tensor::TensorPtr, tensor::TensorPtr> ArgMaxWithValueAscendCustomize(const std::shared_ptr<OpRunner> &op,
                                                                                const TensorPtr &input_tensor,
                                                                                const Int64ImmPtr &axis,
-                                                                                const BoolImmPtr &keep_dims,
+                                                                                const BoolImmPtr &keep_dims);
                                                                                OpRunnerInfo *op_runner_info);
 }  // namespace pyboost
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/argmin_with_value.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/argmin_with_value.cc
@ -30,14 +30,8 @@ namespace pyboost {
 std::tuple<tensor::TensorPtr, tensor::TensorPtr> ArgMinWithValueAscendCustomize(const std::shared_ptr<OpRunner> &op,
                                                                                const TensorPtr &input_tensor,
                                                                                const Int64ImmPtr &axis,
-                                                                                const BoolImmPtr &keep_dims,
+                                                                                const BoolImmPtr &keep_dims) {
-                                                                                OpRunnerInfo *op_runner_info) {
+  OpRunner::InferOpOutput(op, input_tensor, axis, keep_dims);
  if (op_runner_info != nullptr) {
    OpRunner::InferOpOutput(op, op_runner_info);
  } else {
    OpRunner::InferOpOutput(op, input_tensor, axis, keep_dims);
  }
  // Convert ValuePtr to c++ scalar
  auto axis_imm = GetValue<int64_t>(axis);
  auto keep_dims_imm = GetValue<bool>(keep_dims);
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/argmin_with_value.h
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/argmin_with_value.h
@ -31,8 +31,7 @@ namespace pyboost {
 std::tuple<tensor::TensorPtr, tensor::TensorPtr> ArgMinWithValueAscendCustomize(const std::shared_ptr<OpRunner> &op,
                                                                                const TensorPtr &input_tensor,
                                                                                const Int64ImmPtr &axis,
-                                                                                const BoolImmPtr &keep_dims,
+                                                                                const BoolImmPtr &keep_dims);
                                                                                OpRunnerInfo *op_runner_info);
 }  // namespace pyboost
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/contiguous.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/contiguous.cc
@ -23,8 +23,7 @@
 namespace mindspore {
 namespace kernel {
 namespace pyboost {
-tensor::TensorPtr ContiguousAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor,
+tensor::TensorPtr ContiguousAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor) {
                                            OpRunnerInfo *op_runner_info) {
  MS_LOG(DEBUG) << "Call start";
  MS_EXCEPTION_IF_NULL(input_tensor);
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/contiguous.h
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/contiguous.h
@ -27,8 +27,7 @@
 namespace mindspore {
 namespace kernel {
 namespace pyboost {
-tensor::TensorPtr ContiguousAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor,
+tensor::TensorPtr ContiguousAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor);
                                            OpRunnerInfo *op_runner_info);
 }  // namespace pyboost
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/conv2d.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/conv2d.cc
@ -64,14 +64,8 @@ tensor::TensorPtr Conv2DAscendCall(const std::shared_ptr<OpRunner> &op, const de
 tensor::TensorPtr Conv2DAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor,
                                        const TensorPtr &weight_tensor, const std::optional<TensorPtr> &bias_tensor,
                                        const ValueTuplePtr &stride, const ValueTuplePtr &padding,
-                                        const ValueTuplePtr &dilation, const Int64ImmPtr &groups,
+                                        const ValueTuplePtr &dilation, const Int64ImmPtr &groups) {
-                                        OpRunnerInfo *op_runner_info) {
+  OpRunner::InferOpOutput(op, input_tensor, weight_tensor, bias_tensor, stride, padding, dilation, groups);
  if (op_runner_info != nullptr) {
    OpRunner::InferOpOutput(op, op_runner_info);
  } else {
    OpRunner::InferOpOutput(op, input_tensor, weight_tensor, bias_tensor, stride, padding, dilation, groups);
  }
  // Convert ValueTuple to std::vector
  std::vector<int64_t> stride_vector = ConvertValueTupleToVector<int64_t>(stride);
  std::vector<int64_t> padding_vector = ConvertValueTupleToVector<int64_t>(padding);
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/conv2d.h
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/conv2d.h
@ -30,8 +30,7 @@ namespace pyboost {
 tensor::TensorPtr Conv2DAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor,
                                        const TensorPtr &weight_tensor, const std::optional<TensorPtr> &bias_tensor,
                                        const ValueTuplePtr &stride, const ValueTuplePtr &padding,
-                                        const ValueTuplePtr &dilation, const Int64ImmPtr &groups,
+                                        const ValueTuplePtr &dilation, const Int64ImmPtr &groups);
                                        OpRunnerInfo *op_runner_info);
 }  // namespace pyboost
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/copy.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/copy.cc
@ -21,8 +21,7 @@
 namespace mindspore {
 namespace kernel {
 namespace pyboost {
-tensor::TensorPtr CopyAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor,
+tensor::TensorPtr CopyAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor) {
                                      OpRunnerInfo *op_runner_info) {
  MS_LOG(DEBUG) << "Call start";
  auto input_abs = input_tensor->ToAbstract();
  input_abs->set_value(kValueAny);
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/copy.h
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/copy.h
@ -27,8 +27,7 @@
 namespace mindspore {
 namespace kernel {
 namespace pyboost {
-tensor::TensorPtr CopyAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor,
+tensor::TensorPtr CopyAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor);
                                      OpRunnerInfo *op_runner_info);
 }  // namespace pyboost
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/customize_copy.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/customize_copy.cc
@ -23,8 +23,7 @@ namespace kernel {
 namespace pyboost {
 // Unconventional pyboost writing. Please do not refer to this to implement other operators!
 void CustomizeCopyAscend(device::DeviceContext *device_context, const device::DeviceAddressPtr &input_addr,
-                         const device::DeviceAddressPtr &output_addr, const size_t &stream_id,
+                         const device::DeviceAddressPtr &output_addr, const size_t &stream_id) {
                         OpRunnerInfo *op_runner_info) {
  MS_LOG(DEBUG) << "Call start";
  MS_EXCEPTION_IF_NULL(input_addr);
  MS_EXCEPTION_IF_NULL(output_addr);
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/customize_copy.h
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/customize_copy.h
@ -28,8 +28,7 @@ namespace mindspore {
 namespace kernel {
 namespace pyboost {
 void CustomizeCopyAscend(device::DeviceContext *device_context, const device::DeviceAddressPtr &input_addr,
-                         const device::DeviceAddressPtr &output_addr, const size_t &stream_id,
+                         const device::DeviceAddressPtr &output_addr, const size_t &stream_id);
                         OpRunnerInfo *op_runner_info);
 }  // namespace pyboost
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/gather_d_grad_v2.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/gather_d_grad_v2.cc
@ -25,19 +25,14 @@ namespace mindspore {
 namespace kernel {
 namespace pyboost {
 tensor::TensorPtr GatherDGradAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x,
-                                             const Int64ImmPtr dim, const TensorPtr &index, const TensorPtr &d_out,
+                                             const Int64ImmPtr dim, const TensorPtr &index, const TensorPtr &d_out) {
                                             OpRunnerInfo *op_runner_info) {
  MS_EXCEPTION_IF_NULL(dim);
  MS_EXCEPTION_IF_NULL(op);
  MS_EXCEPTION_IF_NULL(x);
  MS_EXCEPTION_IF_NULL(index);
  MS_EXCEPTION_IF_NULL(d_out);
  if (op_runner_info != nullptr) {
    OpRunner::InferOpOutput(op, op_runner_info);
  } else {
    OpRunner::InferOpOutput(op, x, dim, index, d_out);
  }
  OpRunner::InferOpOutput(op, x, dim, index, d_out);
  auto dim_value = dim->value();
  PyBoostUtils::PrepareOpInputs(op->device_context(), op->stream_id(), d_out);
  PyBoostUtils::PrepareOpOutputs(op->device_context(), op->stream_id(), op->outputs());
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/gather_d_grad_v2.h
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/gather_d_grad_v2.h
@ -26,8 +26,7 @@ namespace mindspore {
 namespace kernel {
 namespace pyboost {
 tensor::TensorPtr GatherDGradAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x,
-                                             const Int64ImmPtr dim, const TensorPtr &index, const TensorPtr &d_out,
+                                             const Int64ImmPtr dim, const TensorPtr &index, const TensorPtr &d_out);
                                             OpRunnerInfo *op_runner_info);
 }  // namespace pyboost
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/gelu_grad.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/gelu_grad.cc
@ -24,14 +24,8 @@ namespace mindspore {
 namespace kernel {
 namespace pyboost {
 tensor::TensorPtr GeLUGradAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &dy_tensor,
-                                          const TensorPtr &x_tensor, const TensorPtr &y_tensor,
+                                          const TensorPtr &x_tensor, const TensorPtr &y_tensor) {
-                                          OpRunnerInfo *op_runner_info) {
+  OpRunner::InferOpOutput(op, dy_tensor, x_tensor, y_tensor);
  if (op_runner_info != nullptr) {
    OpRunner::InferOpOutput(op, op_runner_info);
  } else {
    OpRunner::InferOpOutput(op, dy_tensor, x_tensor, y_tensor);
  }
  // Create device address for input/output tensors
  PyBoostUtils::PrepareOpInputs(op->device_context(), op->stream_id(), dy_tensor, x_tensor, y_tensor);
  PyBoostUtils::PrepareOpOutputs(op->device_context(), op->stream_id(), op->outputs());
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/gelu_grad.h
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/gelu_grad.h
@ -28,8 +28,7 @@ namespace mindspore {
 namespace kernel {
 namespace pyboost {
 tensor::TensorPtr GeLUGradAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &dy_tensor,
-                                          const TensorPtr &x_tensor, const TensorPtr &y_tensor,
+                                          const TensorPtr &x_tensor, const TensorPtr &y_tensor);
                                          OpRunnerInfo *op_runner_info);
 }  // namespace pyboost
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/identity.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/identity.cc
@ -140,13 +140,8 @@ void IdentityCustomizeCall(const std::shared_ptr<OpRunner> &op, const TensorPtr
  }));
 }
-tensor::TensorPtr IdentityAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor,
+tensor::TensorPtr IdentityAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor) {
-                                          OpRunnerInfo *op_runner_info) {
+  OpRunner::InferOpOutput(op, x_tensor);
  if (op_runner_info != nullptr) {
    OpRunner::InferOpOutput(op, op_runner_info);
  } else {
    OpRunner::InferOpOutput(op, x_tensor);
  }
  PyBoostUtils::PrepareOpInputs(op->device_context(), op->stream_id(), x_tensor);
  PyBoostUtils::PrepareOpOutputs(op->device_context(), op->stream_id(), op->outputs());
  FillHostInfoForAclOp(x_tensor);
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/identity.h
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/identity.h
@ -26,8 +26,7 @@
 namespace mindspore {
 namespace kernel {
 namespace pyboost {
-tensor::TensorPtr IdentityAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor,
+tensor::TensorPtr IdentityAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor);
                                          OpRunnerInfo *op_runner_info);
 }  // namespace pyboost
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/masked_fill.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/masked_fill.cc
@ -35,13 +35,8 @@ tensor::TensorPtr MaskedFillAscendCall(const std::shared_ptr<OpRunner> &op, cons
 }  // namespace
 tensor::TensorPtr MaskedFillAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor,
-                                            const TensorPtr &mask_tensor, const TensorPtr &value_tensor,
+                                            const TensorPtr &mask_tensor, const TensorPtr &value_tensor) {
-                                            OpRunnerInfo *op_runner_info) {
+  OpRunner::InferOpOutput(op, input_tensor, mask_tensor, value_tensor);
  if (op_runner_info != nullptr) {
    OpRunner::InferOpOutput(op, op_runner_info);
  } else {
    OpRunner::InferOpOutput(op, input_tensor, mask_tensor, value_tensor);
  }
  PyBoostUtils::PrepareOpInputs(op->device_context(), op->stream_id(), input_tensor, mask_tensor, value_tensor);
  PyBoostUtils::PrepareOpOutputs(op->device_context(), op->stream_id(), op->outputs());
  // Async
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/masked_fill.h
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/masked_fill.h
@ -28,8 +28,7 @@ namespace mindspore {
 namespace kernel {
 namespace pyboost {
 tensor::TensorPtr MaskedFillAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor,
-                                            const TensorPtr &mask_tensor, const TensorPtr &value_tensor,
+                                            const TensorPtr &mask_tensor, const TensorPtr &value_tensor);
                                            OpRunnerInfo *op_runner_info);
 }  // namespace pyboost
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/reshape.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/reshape.cc
@ -26,7 +26,7 @@ namespace mindspore {
 namespace kernel {
 namespace pyboost {
 tensor::TensorPtr ReshapeAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor,
-                                         const ValueTuplePtr &shape, OpRunnerInfo *op_runner_info) {
+                                         const ValueTuplePtr &shape) {
  MS_LOG(DEBUG) << "Call start";
  MS_EXCEPTION_IF_NULL(input_tensor);
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/reshape.h
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/reshape.h
@ -28,7 +28,7 @@ namespace mindspore {
 namespace kernel {
 namespace pyboost {
 tensor::TensorPtr ReshapeAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor,
-                                         const ValueTuplePtr &shape, OpRunnerInfo *op_runner_info);
+                                         const ValueTuplePtr &shape);
 }  // namespace pyboost
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/sigmoid_grad.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/sigmoid_grad.cc
@ -32,13 +32,8 @@ void SigmoidGradAscendCall(const std::shared_ptr<OpRunner> &op, const device::De
 }  // namespace
 tensor::TensorPtr SigmoidGradAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &y_tensor,
-                                             const TensorPtr &dy_tensor, OpRunnerInfo *op_runner_info) {
+                                             const TensorPtr &dy_tensor) {
-  if (op_runner_info != nullptr) {
+  OpRunner::InferOpOutput(op, dy_tensor, y_tensor);
    OpRunner::InferOpOutput(op, op_runner_info);
  } else {
    OpRunner::InferOpOutput(op, dy_tensor, y_tensor);
  }
  // Create device address for input/output tensors
  PyBoostUtils::PrepareOpInputs(op->device_context(), op->stream_id(), dy_tensor, y_tensor);
  PyBoostUtils::PrepareOpOutputs(op->device_context(), op->stream_id(), op->outputs());
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/sigmoid_grad.h
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/sigmoid_grad.h
@ -28,7 +28,7 @@ namespace mindspore {
 namespace kernel {
 namespace pyboost {
 tensor::TensorPtr SigmoidGradAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &y_tensor,
-                                             const TensorPtr &dy_tensor, OpRunnerInfo *op_runner_info);
+                                             const TensorPtr &dy_tensor);
 }  // namespace pyboost
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/softmax.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/softmax.cc
@ -35,13 +35,8 @@ void SoftmaxAscendCall(const std::shared_ptr<OpRunner> &op, const device::Device
 }  // namespace
 tensor::TensorPtr SoftmaxAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &logits_tensor,
-                                         const ValueTuplePtr &axis, OpRunnerInfo *op_runner_info) {
+                                         const ValueTuplePtr &axis) {
-  if (op_runner_info != nullptr) {
+  OpRunner::InferOpOutput(op, logits_tensor, axis);
    OpRunner::InferOpOutput(op, op_runner_info);
  } else {
    OpRunner::InferOpOutput(op, logits_tensor, axis);
  }
  // ValueTuple to std::vector
  auto axis_vector = ConvertValueTupleToVector<int64_t>(axis);
  auto dim = axis_vector[0];
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/softmax.h
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/softmax.h
@ -28,7 +28,7 @@ namespace mindspore {
 namespace kernel {
 namespace pyboost {
 tensor::TensorPtr SoftmaxAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &logits_tensor,
-                                         const ValueTuplePtr &axis, OpRunnerInfo *op_runner_info);
+                                         const ValueTuplePtr &axis);
 }  // namespace pyboost
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/square.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/square.cc
@ -36,14 +36,8 @@ void SquareAscendCall(const std::shared_ptr<OpRunner> &op, const device::DeviceC
 }
 }  // namespace
-tensor::TensorPtr SquareAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor,
+tensor::TensorPtr SquareAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor) {
-                                        OpRunnerInfo *op_runner_info) {
+  OpRunner::InferOpOutput(op, x_tensor);
  if (op_runner_info != nullptr) {
    OpRunner::InferOpOutput(op, op_runner_info);
  } else {
    OpRunner::InferOpOutput(op, x_tensor);
  }
  // No need to convert input
  PyBoostUtils::PrepareOpInputs(op->device_context(), op->stream_id(), x_tensor);
  PyBoostUtils::PrepareOpOutputs(op->device_context(), op->stream_id(), op->outputs());
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/square.h
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/square.h
@ -27,8 +27,7 @@
 namespace mindspore {
 namespace kernel {
 namespace pyboost {
-tensor::TensorPtr SquareAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor,
+tensor::TensorPtr SquareAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &x_tensor);
                                        OpRunnerInfo *op_runner_info);
 }  // namespace pyboost
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/tile.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/tile.cc
@ -26,13 +26,9 @@
 namespace mindspore::kernel::pyboost {
 void TileAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_x_tensor,
-                         const ValueTuplePtr &dims, OpRunnerInfo *op_runner_info) {
+                         const ValueTuplePtr &dims) {
  MS_EXCEPTION_IF_NULL(op);
-  if (op_runner_info != nullptr) {
+  OpRunner::InferOpOutput(op, input_x_tensor, dims);
    OpRunner::InferOpOutput(op, op_runner_info);
  } else {
    OpRunner::InferOpOutput(op, input_x_tensor, dims);
  }
  std::vector<int64_t> multiples_vector = ConvertValueTupleToVector<int64_t>(dims);
  // Expand dims with 1 in head when its length is less than x rank.
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/tile.h
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/tile.h
@ -25,6 +25,6 @@
 namespace mindspore::kernel::pyboost {
 void TileAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor,
-                         const ValueTuplePtr &multiples, OpRunnerInfo *op_runner_info);
+                         const ValueTuplePtr &multiples);
 }  // namespace mindspore::kernel::pyboost
 #endif  // MINDSPORE_MINDSPORE_CCSRC_PLUGIN_DEVICE_ASCEND_KERNEL_PYBOOST_CUSTOMIZE_TILE_H_
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/upsample_nearest1d.cc
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/upsample_nearest1d.cc
@ -34,14 +34,9 @@ tensor::TensorPtr UpsampleNearest1dAscendCall(const std::shared_ptr<OpRunner> &o
 }  // namespace
 tensor::TensorPtr UpsampleNearest1dAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor,
-                                                   const ValueTuplePtr &output_size, const ValueTuplePtr &scale_factors,
+                                                   const ValueTuplePtr &output_size,
-                                                   OpRunnerInfo *op_runner_info) {
+                                                   const ValueTuplePtr &scale_factors) {
-  if (op_runner_info != nullptr) {
+  OpRunner::InferOpOutput(op, input_tensor, output_size, scale_factors);
    OpRunner::InferOpOutput(op, op_runner_info);
  } else {
    OpRunner::InferOpOutput(op, input_tensor, output_size, scale_factors);
  }
  std::vector<int64_t> output_size_vector = ConvertValueTupleToVector<int64_t>(output_size);
  PyBoostUtils::PrepareOpInputs(op->device_context(), op->stream_id(), input_tensor);
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/upsample_nearest1d.h
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/customize/upsample_nearest1d.h
@ -28,8 +28,8 @@ namespace mindspore {
 namespace kernel {
 namespace pyboost {
 tensor::TensorPtr UpsampleNearest1dAscendCustomize(const std::shared_ptr<OpRunner> &op, const TensorPtr &input_tensor,
-                                                   const ValueTuplePtr &output_size, const ValueTuplePtr &scale_factors,
+                                                   const ValueTuplePtr &output_size,
-                                                   OpRunnerInfo *op_runner_info);
+                                                   const ValueTuplePtr &scale_factors);
 }  // namespace pyboost
 }  // namespace kernel
 }  // namespace mindspore
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/template/pyboost_aclnn_header_template.tpl
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/template/pyboost_aclnn_header_template.tpl
@ -30,7 +30,7 @@ class ${op_name}Ascend : public pyboost::${op_name} {
      : ${op_name}(std::move(primitive), device_context) {}
  ~${op_name}Ascend() = default;
-  ${return_type} Call(${call_args_with_type}, OpRunnerInfo * op_runner_info = nullptr) override;
+  ${return_type} Call(${call_args_with_type}) override;
 };
 }  // namespace pyboost
 }  // namespace kernel
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/template/pyboost_aclnn_source_template.tpl
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/template/pyboost_aclnn_source_template.tpl
@ -23,7 +23,7 @@ ${customize_include}
 namespace mindspore {
 namespace kernel {
 namespace pyboost {
-${return_type} ${op_name}Ascend::Call(${call_args_with_type}, OpRunnerInfo * op_runner_info) {
+${return_type} ${op_name}Ascend::Call(${call_args_with_type}) {
  ${call_impl}
 }
 MS_REG_PYBOOST_OP(Ascend, ${op_name});
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/template/pyboost_ascend_call_template.tpl
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/template/pyboost_ascend_call_template.tpl
@ -1,9 +1,5 @@
 MS_LOG(DEBUG) << op_name() << " call start";
-if (op_runner_info != nullptr) {
+InferOutput(${call_args});
  InferOutput(op_runner_info);
 } else {
  InferOutput(${call_args});
 }
 // ValueTuple to std::vector
 ${value_tuple_convert}
 // Convert ValuePtr to c++ scalar
--- a/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/template/pyboost_ascend_customize_call_template.tpl
+++ b/mindspore/ccsrc/plugin/device/ascend/kernel/pyboost/template/pyboost_ascend_customize_call_template.tpl
@ -1,2 +1,2 @@
-  ${customize_func}(get_op(), ${call_args}, op_runner_info);
+  ${customize_func}(get_op(), ${call_args});
  return ${return_values};
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/template/pyboost_cpu_call_template.tpl
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/template/pyboost_cpu_call_template.tpl
@ -1,9 +1,5 @@
 MS_LOG(DEBUG) << op_name() << " call start";
-if (op_runner_info != nullptr) {
+InferOutput(${call_args});
  InferOutput(op_runner_info);
 } else {
  InferOutput(${call_args});
 }
 ${tensor_list_convert}
 MS_EXCEPTION_IF_NULL(primitive());
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/template/pyboost_cpu_header_template.tpl
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/template/pyboost_cpu_header_template.tpl
@ -30,7 +30,7 @@ class ${op_name}CPU : public pyboost::${op_name} {
    : ${op_name}(std::move(primitive), device_context) {}
  ~${op_name}CPU() = default;
-  ${return_type} Call(${call_args_with_type}, OpRunnerInfo * op_runner_info = nullptr) override;
+  ${return_type} Call(${call_args_with_type}) override;
 };
 }  // namespace pyboost
 }  // namespace kernel
--- a/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/template/pyboost_cpu_source_template.tpl
+++ b/mindspore/ccsrc/plugin/device/cpu/kernel/pyboost/template/pyboost_cpu_source_template.tpl
@ -21,7 +21,7 @@ ${customize_include}
 namespace mindspore {
 namespace kernel {
 namespace pyboost {
-${return_type} ${op_name}CPU::Call(${call_args_with_type}, OpRunnerInfo * op_runner_info) {
+${return_type} ${op_name}CPU::Call(${call_args_with_type}) {
  ${call_impl}
 }
 MS_REG_PYBOOST_OP(CPU, ${op_name});
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/template/pyboost_gpu_call_template.tpl
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/template/pyboost_gpu_call_template.tpl
@ -1,9 +1,5 @@
 MS_LOG(DEBUG) << op_name() << " call start";
-if (op_runner_info != nullptr) {
+InferOutput(${call_args});
  InferOutput(op_runner_info);
 } else {
  InferOutput(${call_args});
 }
 ${tensor_list_convert}
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/template/pyboost_gpu_header_template.tpl
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/template/pyboost_gpu_header_template.tpl
@ -30,7 +30,7 @@ class ${op_name}GPU : public pyboost::${op_name} {
    : ${op_name}(std::move(primitive), device_context) {}
  ~${op_name}GPU() = default;
-  ${return_type} Call(${call_args_with_type}, OpRunnerInfo * op_runner_info = nullptr) override;
+  ${return_type} Call(${call_args_with_type}) override;
 };
 }  // namespace pyboost
 }  // namespace kernel
--- a/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/template/pyboost_gpu_source_template.tpl
+++ b/mindspore/ccsrc/plugin/device/gpu/kernel/pyboost/template/pyboost_gpu_source_template.tpl
@ -22,7 +22,7 @@ ${customize_include}
 namespace mindspore {
 namespace kernel {
 namespace pyboost {
-${return_type} ${op_name}GPU::Call(${call_args_with_type}, OpRunnerInfo * op_runner_info) {
+${return_type} ${op_name}GPU::Call(${call_args_with_type}) {
  ${call_impl}
 }
 MS_REG_PYBOOST_OP(GPU, ${op_name});
--- a/mindspore/ccsrc/pybind_api/utils/fork_utils_py.cc
+++ b/mindspore/ccsrc/pybind_api/utils/fork_utils_py.cc
@ -106,7 +106,7 @@ void ChildAtFork() {
    MS_LOG(DEBUG) << "Release GIL lock acquired manually before fork.";
    PyGILState_Release(static_cast<PyGILState_STATE>(ForkUtils::GetInstance().GetGilState()));
  }
-  device::DeviceContextManager::GetInstance().ChildAfterFork();
+  
  // Trigger ChildAfterFork callbacks in child process.
  ForkUtils::GetInstance().ChildAtFork();
 }
--- a/mindspore/ccsrc/runtime/pynative/op_function/pyboost_grad_functions.h
+++ b/mindspore/ccsrc/runtime/pynative/op_function/pyboost_grad_functions.h
@ -21,11 +21,11 @@
 #include <string>
 #include <vector>
 #include "kernel/pyboost/op_runner.h"
 #include "runtime/pynative/op_runner.h"
 #include "runtime/pynative/op_function/func_object.h"
 #include "backend/graph_compiler/backend.h"
 namespace mindspore::runtime {
 using OpRunnerInfo = kernel::pyboost::OpRunnerInfo;
 using Func = std::function<void(OpRunnerInfo *, VectorRef *)>;
 class PyBoostOpExecute {
--- a/mindspore/ccsrc/runtime/pynative/op_function/template/pyboost_grad_function.tpl
+++ b/mindspore/ccsrc/runtime/pynative/op_function/template/pyboost_grad_function.tpl
@ -6,8 +6,10 @@ void ${func_name}(OpRunnerInfo* op_runner_info, VectorRef *op_outputs) {
  // Run op
  ${convert_body}
-  (void)op->Call(${call_args}, op_runner_info);
+  (void)op->Call(${call_args});
  op_runner_info->output_abs = op->output_abs();
  MS_EXCEPTION_IF_NULL(op_outputs);
  MS_EXCEPTION_IF_NULL(op_runner_info->output_abs);
  (void)std::transform(op->outputs().begin(), op->outputs().end(), std::back_inserter(*op_outputs),
                       [] (const auto &item) {return item;});
 }
--- a/mindspore/ccsrc/runtime/pynative/op_function/template/pyboost_grad_function_header.tpl
+++ b/mindspore/ccsrc/runtime/pynative/op_function/template/pyboost_grad_function_header.tpl
@ -17,7 +17,7 @@
 #include "runtime/pynative/op_function/pyboost_grad_functions.h"
 #include "runtime/pynative/op_executor.h"
 #include "runtime/pynative/op_function/value_converter.h"
-#include "kernel/pyboost/py_boost_utils.h"
+#include "kernel/pyboost/pyboost_utils.h"
 #include "runtime/pynative/op_function/pyboost_grad_functions.h"
 #include "backend/graph_compiler/vmimpl.h"
 #include "include/common/utils/python_adapter.h"
--- a/mindspore/ccsrc/runtime/pynative/op_runner.h
+++ b/mindspore/ccsrc/runtime/pynative/op_runner.h
@ -24,6 +24,15 @@
 #include "runtime/hardware/device_context.h"
 namespace mindspore::runtime {
 struct OpRunnerInfo {
  const PrimitivePtr &prim;
  const std::string &device_target;
  const vector<ValuePtr> &inputs;
  const abstract::AbstractBasePtrList &inputs_abs;
  const std::vector<InputType> &inputs_mask;
  abstract::AbstractBasePtr output_abs;
 };
 class OpRunner {
 public:
  // Update Tensor or input node DeviceAddress before PyNative async running.
`@ -1,2 +1,2 @@`
	`${customize_func}(get_op(), ${call_args}, op_runner_info);`	`${customize_func}(get_op(), ${call_args});`
	`return ${return_values};`	`return ${return_values};`