!40257 [MS][Lite]introduce extendrt for cloud inference

Merge pull request !40257 from zhaizhiqiang/master
2022-09-06 12:11:26 +00:00 · 2022-09-06 12:11:26 +00:00 · 2f7531af8f
parent 877e2474c3 47bf45d4d8
commit 2f7531af8f
74 changed files with 625 additions and 4892 deletions
--- a/include/api/context.h
+++ b/include/api/context.h
@ -36,6 +36,7 @@ enum DeviceType {
 };

 class Allocator;
+class AbstractDelegate;
 class Delegate;
 class DeviceInfoContext;

@ -46,37 +47,37 @@ class MS_API Context {
  Context();
  ~Context() = default;

-  /// \brief Set the number of threads at runtime. Only valid for Lite.
+  /// \brief Set the number of threads at runtime.
  ///
  /// \param[in] thread_num the number of threads at runtime.
  void SetThreadNum(int32_t thread_num);

-  /// \brief Get the current thread number setting. Only valid for Lite.
+  /// \brief Get the current thread number setting.
  ///
  /// \return The current thread number setting.
  int32_t GetThreadNum() const;

-  /// \brief Set the parallel number of operators at runtime. Only valid for Lite.
+  /// \brief Set the parallel number of operators at runtime.
  ///
  /// \param[in] parallel_num the parallel number of operators at runtime.
  void SetInterOpParallelNum(int32_t parallel_num);

-  /// \brief Get the current operators parallel number setting. Only valid for Lite.
+  /// \brief Get the current operators parallel number setting.
  ///
  /// \return The current operators parallel number setting.
  int32_t GetInterOpParallelNum() const;

-  /// \brief Set the thread affinity to CPU cores. Only valid for Lite.
+  /// \brief Set the thread affinity to CPU cores.
  ///
  /// \param[in] mode: 0: no affinities, 1: big cores first, 2: little cores first
  void SetThreadAffinity(int mode);

-  /// \brief Get the thread affinity of CPU cores. Only valid for Lite.
+  /// \brief Get the thread affinity of CPU cores.
  ///
  /// \return Thread affinity to CPU cores. 0: no affinities, 1: big cores first, 2: little cores first
  int GetThreadAffinityMode() const;

-  /// \brief Set the thread lists to CPU cores. Only valid for Lite.
+  /// \brief Set the thread lists to CPU cores.
  ///
  /// \note If core_list and mode are set by SetThreadAffinity at the same time, the core_list is effective, but the
  /// mode is not effective.
@ -84,29 +85,35 @@ class MS_API Context {
  /// \param[in] core_list: a vector of thread core lists.
  void SetThreadAffinity(const std::vector<int> &core_list);

-  /// \brief Get the thread lists of CPU cores. Only valid for Lite.
+  /// \brief Get the thread lists of CPU cores.
  ///
  /// \return core_list: a vector of thread core lists.
  std::vector<int32_t> GetThreadAffinityCoreList() const;

-  /// \brief Set the status whether to perform model inference or training in parallel. Only valid for Lite.
+  /// \brief Set the status whether to perform model inference or training in parallel.
  ///
  /// \param[in] is_parallel: true, parallel; false, not in parallel.
  void SetEnableParallel(bool is_parallel);

-  /// \brief Get the status whether to perform model inference or training in parallel. Only valid for Lite.
+  /// \brief Get the status whether to perform model inference or training in parallel.
  ///
  /// \return Bool value that indicates whether in parallel.
  bool GetEnableParallel() const;

-  /// \brief Set Delegate to access third-party AI framework. Only valid for Lite.
+  /// \brief Set Delegate to access third-party AI framework.
  ///
  /// \param[in] delegate the custom delegate.
+  void set_delegate(const std::shared_ptr<AbstractDelegate> &delegate);
+
+  // deprecated
  void SetDelegate(const std::shared_ptr<Delegate> &delegate);

-  /// \brief Get the delegate of the third-party AI framework. Only valid for Lite.
+  /// \brief Get the delegate of the third-party AI framework.
  ///
  /// \return Pointer to the custom delegate.
+  std::shared_ptr<AbstractDelegate> get_delegate() const;
+
+  // deprecated
  std::shared_ptr<Delegate> GetDelegate() const;

  /// \brief Set quant model to run as float model in multi device.
--- a/include/api/delegate.h
+++ b/include/api/delegate.h
@ -22,7 +22,7 @@
 #include <memory>
 #include "schema/model_generated.h"
 #include "include/api/kernel.h"
-#include "include/api/status.h"
+#include "include/api/delegate_api.h"

 namespace mindspore {
 typedef enum {
@ -36,6 +36,7 @@ using KernelIter = std::vector<kernel::Kernel *>::iterator;
 template <class T>
 class MS_API DelegateModel {
 public:
+  DelegateModel() = default;
  /// \brief Constructor of MindSpore Lite DelegateModel.
  DelegateModel(std::vector<kernel::Kernel *> *kernels, const std::vector<MSTensor> &inputs,
                const std::vector<MSTensor> &outputs, const std::map<kernel::Kernel *, const T *> &primitives,
@ -107,14 +108,14 @@ class MS_API DelegateModel {
  SchemaVersion version_;
 };

-class MS_API Delegate {
+// lite delegate use kernel::Kernel as graph node.
+using LiteDelegateGraph = DelegateModel<schema::Primitive>;
+class Delegate : public IDelegate<LiteDelegateGraph, kernel::Kernel, kernel::Kernel> {
 public:
-  /// \brief Constructor of MindSpore Lite Delegate.
  Delegate() = default;
-
-  /// \brief Destructor of MindSpore Lite Delegate.
+  Delegate(const std::vector<mindspore::MSTensor> &inputs, const std::vector<mindspore::MSTensor> &outputs)
+      : IDelegate<LiteDelegateGraph, kernel::Kernel, kernel::Kernel>(inputs, outputs) {}
  virtual ~Delegate() = default;
-
  /// \brief Init delegate.
  ///
  /// \note Init will be called in Model::Build.
@ -122,12 +123,26 @@ class MS_API Delegate {
  /// \return Status. If Status is kLiteNotSupport, the program will return to the MindSpore Lite inner inference.
  virtual Status Init() = 0;

-  /// \brief Build delegate graph for MindSpore Lite model.
+  std::shared_ptr<kernel::Kernel> CreateKernel(const std::shared_ptr<kernel::Kernel> &node) override {
+    // return node as kernel since they are same one.
+    return node;
+  }
+
+  bool IsDelegateNode(const std::shared_ptr<kernel::Kernel> &node) override { return false; }
+
+  /// \brief Replace the nodes in model with delegate nodes, delegate will create kernels by its delegate nodes.
+  ///
+  /// \param[in] graph The graph to be built.
+  void ReplaceNodes(const std::shared_ptr<LiteDelegateGraph> &graph) override {}
+
+  /// \brief Build delegate graph for MindSpore model.
  ///
  /// \note Build will be called in Model::Build.
  ///
  /// \param[in] model Define the delegate model to be built.
-  virtual Status Build(DelegateModel<schema::Primitive> *model) = 0;
+  ///
+  /// \note deprecated, use ReplaceNodes and CreateKernel to build delegate model.
+  virtual Status Build(LiteDelegateGraph *model) = 0;
 };

 class MS_API CoreMLDelegate : public Delegate {
@ -147,7 +162,7 @@ class MS_API CoreMLDelegate : public Delegate {
  /// \note Build will be called in Model::Build.
  ///
  /// \param[in] model Define the delegate model to be built.
-  Status Build(DelegateModel<schema::Primitive> *model) override;
+  Status Build(LiteDelegateGraph *model) override;

 protected:
  std::shared_ptr<Delegate> impl_;
--- a/include/api/delegate_api.h
+++ b/include/api/delegate_api.h
@ -0,0 +1,75 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_INCLUDE_API_DELEGATE_API_H
+#define MINDSPORE_INCLUDE_API_DELEGATE_API_H
+
+#include <map>
+#include <vector>
+#include <memory>
+#include "include/api/status.h"
+#include "include/api/types.h"
+namespace mindspore {
+class AbstractDelegate {
+ public:
+  AbstractDelegate() = default;
+  AbstractDelegate(const std::vector<mindspore::MSTensor> &inputs, const std::vector<mindspore::MSTensor> &outputs)
+      : inputs_(inputs), outputs_(outputs) {}
+  virtual ~AbstractDelegate() = default;
+  /// \brief Get the input tensors of DelegateModel.
+  ///
+  /// \return The input tensor vector of DelegateModel.
+  const std::vector<mindspore::MSTensor> &inputs() { return this->inputs_; }
+
+  /// \brief Get the output tensors of DelegateModel.
+  ///
+  /// \return The ioutput tensor vector of DelegateModel.
+  const std::vector<mindspore::MSTensor> &outputs() { return this->outputs_; }
+
+ protected:
+  std::vector<mindspore::MSTensor> inputs_;
+  std::vector<mindspore::MSTensor> outputs_;
+};
+
+template <typename Graph, typename Node, typename Kernel>
+class IDelegate : public AbstractDelegate {
+ public:
+  IDelegate() = default;
+  IDelegate(const std::vector<mindspore::MSTensor> &inputs, const std::vector<mindspore::MSTensor> &outputs)
+      : AbstractDelegate(inputs, outputs) {}
+  virtual ~IDelegate() = default;
+
+  /// \brief Replace the nodes in model with delegate nodes, delegate will create kernels by its delegate nodes.
+  ///
+  /// \param[in] graph The graph to be built.
+  virtual void ReplaceNodes(const std::shared_ptr<Graph> &graph) = 0;
+
+  /// \brief Check if this node is belong to this delegate.
+  ///
+  /// \param[in] node The node need to be checked.
+  ///
+  /// \return True if the node is belong to this delegate, otherwise return false.
+  virtual bool IsDelegateNode(const std::shared_ptr<Node> &node) = 0;
+
+  /// \brief Create a delegate kernel if the node is a delegate node.
+  ///
+  /// \param[in] node Define the delegate model to be built.
+  ///
+  /// \return The delegate kernel, if the node is not a delegate node, return nullptr.
+  virtual std::shared_ptr<Kernel> CreateKernel(const std::shared_ptr<Node> &node) = 0;
+};
+}  // namespace mindspore
+#endif  // MINDSPORE_INCLUDE_API_DELEGATE_API_H
--- a/include/api/kernel.h
+++ b/include/api/kernel.h
@ -14,8 +14,8 @@
 * limitations under the License.
 */

-#ifndef MINDSPORE_INCLUDE_API_KERNEL_H
-#define MINDSPORE_INCLUDE_API_KERNEL_H
+#ifndef MINDSPORE_LITE_INCLUDE_KERNEL_H
+#define MINDSPORE_LITE_INCLUDE_KERNEL_H
 #include <vector>
 #include <string>
 #include <utility>
@ -23,75 +23,18 @@
 #include "schema/model_generated.h"
 #include "include/api/types.h"
 #include "include/api/context.h"
+#include "include/api/kernel_api.h"

 namespace mindspore::kernel {
-/// \brief The Kernel class is used to define a MindSpore Kernel.
-class MS_API Kernel {
+class MS_API Kernel : public IKernel<schema::Primitive> {
 public:
  Kernel() = default;
-  /// \brief Constructor.
-  ///
-  /// \param[in] inputs define the input tensors for kernel.
-  /// \param[in] outputs define the output tensors for kernel.
-  /// \param[in] primitive define the primitive of kernel generated by flatbuffers.
-  /// \param[in] ctx define the context for kernel.
  Kernel(const std::vector<mindspore::MSTensor> &inputs, const std::vector<mindspore::MSTensor> &outputs,
         const schema::Primitive *primitive, const mindspore::Context *ctx)
-      : context_(ctx), inputs_(std::move(inputs)), outputs_(std::move(outputs)), primitive_(primitive) {
+      : IKernel<schema::Primitive>(inputs, outputs, primitive, ctx) {
    Initialize();
  }
-  /// \brief Destructor.
  virtual ~Kernel() = default;
-  /// \brief prepare for executing kernel.
-  ///
-  /// \return result code.
-  virtual int Prepare() = 0;
-  /// \brief execute the kernel.
-  ///
-  /// \return result code.
-  virtual int Execute() = 0;
-  /// \brief resize the kernel input shape, memory need to refresh.
-  ///
-  /// \return result code.
-  virtual int ReSize() = 0;
-  /// \brief set kernel's input tensors.
-  ///
-  /// \param[in] in_tensors define the input tensors.
-  virtual void set_inputs(const std::vector<mindspore::MSTensor> &in_tensors) { this->inputs_ = in_tensors; }
-  /// \brief set kernel's input tensor.
-  ///
-  /// \param[in] in_tensor define the input tensor.
-  /// \param[in] index define the index of the input tensor.
-  virtual void set_input(mindspore::MSTensor in_tensor, int index) { this->inputs_[index] = in_tensor; }
-  /// \brief set kernel's output tensors.
-  ///
-  /// \param[in] out_tensors define the output tensors.
-  virtual void set_outputs(const std::vector<mindspore::MSTensor> &out_tensors) { this->outputs_ = out_tensors; }
-  /// \brief set kernel's output tensor.
-  ///
-  /// \param[in] out_tensor define the output tensor.
-  /// \param[in] index define the index of the output tensor.
-  virtual void set_output(mindspore::MSTensor out_tensor, int index) { this->outputs_[index] = out_tensor; }
-  /// \brief obtain kernel's input tensors.
-  ///
-  /// \return input tensors.
-  virtual const std::vector<mindspore::MSTensor> &inputs() { return this->inputs_; }
-  /// \brief obtain kernel's output tensors.
-  ///
-  /// \return output tensors.
-  virtual const std::vector<mindspore::MSTensor> &outputs() { return this->outputs_; }
-  /// \brief obtain kernel's name.
-  ///
-  /// \return kernel's name.
-  std::string name() const { return this->name_; }
-  /// \brief set kernel's name.
-  ///
-  /// \param[in] name define the kernel's name.
-  void set_name(const std::string &name) { this->name_ = name; }
-  /// \brief obtain kernel's context.
-  ///
-  /// \return kernel's context.
-  const mindspore::Context *context() const { return this->context_; }
  /// \brief obtain kernel's type.
  ///
  /// \return kernel's type.
@ -101,61 +44,13 @@ class MS_API Kernel {
  /// \return kernel's quant type.
  virtual schema::QuantType quant_type() const { return quant_type_; }
  /// \brief obtain the primitive of kernel generated by flatbuffers.
-  ///
-  /// \return the primitive of kernel generated by flatbuffers.
-  const schema::Primitive *primitive() const { return this->primitive_; }
-
-  /// \brief get kernel's attribute.
-  ///
-  /// \param[in] key define the kernel's attribute key.
-  std::string GetAttr(const std::string &key) const {
-    auto iter = attrs_.find(key);
-    if (iter != attrs_.end()) {
-      return iter->second;
-    }
-    return "";
-  }
-
-  /// \brief set kernel's config.
-  ///
-  /// \param[in] config define the kernel's config.
-  void SetConfig(const std::map<std::string, std::map<std::string, std::string>> *config) {
-    config_ = config;
-  }
-  /// \brief set kernel's config.
-  ///
-  /// \param[in] section define the section of the kernel's config.
-  std::map<std::string, std::string> GetConfig(const std::string &section) const {
-    if (config_ == nullptr) {
-      return std::map<std::string, std::string>();
-    }
-    auto iter = config_->find(section);
-    if (iter != config_->end()) {
-      return iter->second;
-    }
-    return std::map<std::string, std::string>();
-  }
+ protected:
+  void Initialize();

 protected:
-  /// \brief set kernel's attribute
-  ///
-  /// \param[in] key define the kernel's attribute key.
-  /// \param[in] value define the kernel's attribute value.
-  void SetAttr(const std::string &key, const std::string &value) { attrs_[key] = value; }
-
-  std::string name_;
-  const mindspore::Context *context_ = nullptr;
-  std::vector<mindspore::MSTensor> inputs_;
-  std::vector<mindspore::MSTensor> outputs_;
  schema::PrimitiveType type_ = schema::PrimitiveType_NONE;
-  const schema::Primitive *primitive_ = nullptr;
-  std::map<std::string, std::string> attrs_;
-  const std::map<std::string, std::map<std::string, std::string>> *config_;
  schema::QuantType quant_type_ = schema::QuantType_QUANT_NONE;
-
- private:
-  void Initialize();
 };
 }  // namespace mindspore::kernel

-#endif  // MINDSPORE_INCLUDE_API_KERNEL_H
+#endif  // MINDSPORE_LITE_INCLUDE_KERNEL_H
--- a/include/api/kernel_api.h
+++ b/include/api/kernel_api.h
@ -0,0 +1,143 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_INCLUDE_API_KERNEL_API_H
+#define MINDSPORE_INCLUDE_API_KERNEL_API_H
+#include <vector>
+#include <string>
+#include <utility>
+#include <map>
+#include "include/api/types.h"
+namespace mindspore {
+class Context;
+namespace kernel {
+/// \brief The Kernel class is used to define a MindSpore Kernel.
+template <typename Primitive>
+class MS_API IKernel {
+ public:
+  IKernel() = default;
+  /// \brief Constructor.
+  ///
+  /// \param[in] inputs define the input tensors for kernel.
+  /// \param[in] outputs define the output tensors for kernel.
+  /// \param[in] primitive define the primitive of kernel.
+  /// \param[in] ctx define the context for kernel.
+  IKernel(const std::vector<mindspore::MSTensor> &inputs, const std::vector<mindspore::MSTensor> &outputs,
+          const Primitive *primitive, const mindspore::Context *ctx)
+      : context_(ctx), inputs_(std::move(inputs)), outputs_(std::move(outputs)), primitive_(primitive) {}
+  /// \brief Destructor.
+  virtual ~IKernel() = default;
+  /// \brief prepare for executing kernel.
+  ///
+  /// \return result code.
+  virtual int Prepare() = 0;
+  /// \brief execute the kernel.
+  ///
+  /// \return result code.
+  virtual int Execute() = 0;
+  /// \brief resize the kernel input shape, memory need to refresh.
+  ///
+  /// \return result code.
+  virtual int ReSize() = 0;
+  /// \brief set kernel's input tensors.
+  ///
+  /// \param[in] in_tensors define the input tensors.
+  virtual void set_inputs(const std::vector<mindspore::MSTensor> &in_tensors) { this->inputs_ = in_tensors; }
+  /// \brief set kernel's input tensor.
+  ///
+  /// \param[in] in_tensor define the input tensor.
+  /// \param[in] index define the index of the input tensor.
+  virtual void set_input(mindspore::MSTensor in_tensor, int index) { this->inputs_[index] = in_tensor; }
+  /// \brief set kernel's output tensors.
+  ///
+  /// \param[in] out_tensors define the output tensors.
+  virtual void set_outputs(const std::vector<mindspore::MSTensor> &out_tensors) { this->outputs_ = out_tensors; }
+  /// \brief set kernel's output tensor.
+  ///
+  /// \param[in] out_tensor define the output tensor.
+  /// \param[in] index define the index of the output tensor.
+  virtual void set_output(mindspore::MSTensor out_tensor, int index) { this->outputs_[index] = out_tensor; }
+  /// \brief obtain kernel's input tensors.
+  ///
+  /// \return input tensors.
+  virtual const std::vector<mindspore::MSTensor> &inputs() { return this->inputs_; }
+  /// \brief obtain kernel's output tensors.
+  ///
+  /// \return output tensors.
+  virtual const std::vector<mindspore::MSTensor> &outputs() { return this->outputs_; }
+  /// \brief obtain kernel's name.
+  ///
+  /// \return kernel's name.
+  std::string name() const { return this->name_; }
+  /// \brief set kernel's name.
+  ///
+  /// \param[in] name define the kernel's name.
+  void set_name(const std::string &name) { this->name_ = name; }
+  /// \brief obtain kernel's context.
+  ///
+  /// \return kernel's context.
+  const mindspore::Context *context() const { return this->context_; }
+  ///
+  /// \return the primitive of kernel generated by flatbuffers.
+  const Primitive *primitive() const { return this->primitive_; }
+
+  /// \brief get kernel's attribute.
+  ///
+  /// \param[in] key define the kernel's attribute key.
+  std::string GetAttr(const std::string &key) const {
+    auto iter = attrs_.find(key);
+    if (iter != attrs_.end()) {
+      return iter->second;
+    }
+    return "";
+  }
+
+  /// \brief set kernel's config.
+  ///
+  /// \param[in] config define the kernel's config.
+  void SetConfig(const std::map<std::string, std::map<std::string, std::string>> *config) { config_ = config; }
+  /// \brief set kernel's config.
+  ///
+  /// \param[in] section define the section of the kernel's config.
+  std::map<std::string, std::string> GetConfig(const std::string &section) const {
+    if (config_ == nullptr) {
+      return std::map<std::string, std::string>();
+    }
+    auto iter = config_->find(section);
+    if (iter != config_->end()) {
+      return iter->second;
+    }
+    return std::map<std::string, std::string>();
+  }
+
+ protected:
+  /// \brief set kernel's attribute
+  ///
+  /// \param[in] key define the kernel's attribute key.
+  /// \param[in] value define the kernel's attribute value.
+  void SetAttr(const std::string &key, const std::string &value) { attrs_[key] = value; }
+
+  std::string name_;
+  const mindspore::Context *context_ = nullptr;
+  std::vector<mindspore::MSTensor> inputs_;
+  std::vector<mindspore::MSTensor> outputs_;
+  const Primitive *primitive_ = nullptr;
+  std::map<std::string, std::string> attrs_;
+  const std::map<std::string, std::map<std::string, std::string>> *config_;
+};
+}  // namespace kernel
+}  // namespace mindspore
+#endif  // MINDSPORE_INCLUDE_API_KERNEL_API_H
--- a/include/api/model.h
+++ b/include/api/model.h
@ -48,7 +48,7 @@ class MS_API Model {
  Model(const Model &) = delete;
  void operator=(const Model &) = delete;

-  /// \brief Build a model from model buffer so that it can run on a device. Only valid for Lite.
+  /// \brief Build a model from model buffer so that it can run on a device.
  ///
  /// \param[in] model_data Define the buffer read from a model file.
  /// \param[in] data_size Define bytes number of model buffer.
@ -60,7 +60,7 @@ class MS_API Model {
  Status Build(const void *model_data, size_t data_size, ModelType model_type,
               const std::shared_ptr<Context> &model_context = nullptr);

-  /// \brief Load and build a model from model buffer so that it can run on a device. Only valid for Lite.
+  /// \brief Load and build a model from model buffer so that it can run on a device.
  ///
  /// \param[in] model_path Define the model path.
  /// \param[in] model_type Define The type of model file. Options: ModelType::kMindIR, ModelType::kOM. Only
@ -71,7 +71,7 @@ class MS_API Model {
  inline Status Build(const std::string &model_path, ModelType model_type,
                      const std::shared_ptr<Context> &model_context = nullptr);

-  /// \brief Build a model from model buffer so that it can run on a device. Only valid for Lite.
+  /// \brief Build a model from model buffer so that it can run on a device.
  ///
  /// \param[in] model_data Define the buffer read from a model file.
  /// \param[in] data_size Define bytes number of model buffer.
@ -87,7 +87,7 @@ class MS_API Model {
                      const std::shared_ptr<Context> &model_context, const Key &dec_key, const std::string &dec_mode,
                      const std::string &cropto_lib_path);

-  /// \brief Load and build a model from model buffer so that it can run on a device. Only valid for Lite.
+  /// \brief Load and build a model from model buffer so that it can run on a device.
  ///
  /// \param[in] model_path Define the model path.
  /// \param[in] model_type Define The type of model file. Options: ModelType::kMindIR, ModelType::kOM. Only
--- a/include/api/model_parallel_runner.h
+++ b/include/api/model_parallel_runner.h
@ -85,7 +85,7 @@ class MS_API ModelParallelRunner {
  ModelParallelRunner() = default;
  ~ModelParallelRunner() = default;

-  /// \brief build a model parallel runner from model path so that it can run on a device. Only valid for Lite.
+  /// \brief build a model parallel runner from model path so that it can run on a device.
  ///
  /// \param[in] model_path Define the model path.
  /// \param[in] runner_config Define the config used to store options during model pool init.
@ -93,7 +93,7 @@ class MS_API ModelParallelRunner {
  /// \return Status.
  inline Status Init(const std::string &model_path, const std::shared_ptr<RunnerConfig> &runner_config = nullptr);

-  /// \brief build a model parallel runner from model buffer so that it can run on a device. Only valid for Lite.
+  /// \brief build a model parallel runner from model buffer so that it can run on a device.
  ///
  /// \param[in] model_data Define the buffer read from a model file.
  /// \param[in] data_size Define bytes number of model buffer.
--- a/include/api/types.h
+++ b/include/api/types.h
@ -215,42 +215,42 @@ class MS_API MSTensor {
  /// \return The boolean value that indicates whether the MSTensor not equals tensor.
  bool operator!=(const MSTensor &tensor) const;

-  /// \brief Set the shape of for the MSTensor. Only valid for Lite.
+  /// \brief Set the shape of for the MSTensor.
  ///
  /// \param[in] shape Shape of the MSTensor, a vector of int64_t.
  void SetShape(const std::vector<int64_t> &shape);

-  /// \brief Set the data type for the MSTensor. Only valid for Lite.
+  /// \brief Set the data type for the MSTensor.
  ///
  /// \param[in] data_type The data type of the MSTensor.
  void SetDataType(enum DataType data_type);

-  /// \brief Set the name for the MSTensor. Only valid for Lite.
+  /// \brief Set the name for the MSTensor.
  ///
  /// \param[in] name The name of the MSTensor.
  inline void SetTensorName(const std::string &name);

-  /// \brief Set the Allocator for the MSTensor. Only valid for Lite.
+  /// \brief Set the Allocator for the MSTensor.
  ///
  /// \param[in] allocator A pointer to Allocator.
  void SetAllocator(std::shared_ptr<Allocator> allocator);

-  /// \brief Obtain the Allocator of the MSTensor. Only valid for Lite.
+  /// \brief Obtain the Allocator of the MSTensor.
  ///
  /// \return A pointer to Allocator.
  std::shared_ptr<Allocator> allocator() const;

-  /// \brief Set the format for the MSTensor. Only valid for Lite.
+  /// \brief Set the format for the MSTensor.
  ///
  /// \param[in] format The format of the MSTensor.
  void SetFormat(mindspore::Format format);

-  /// \brief Obtain the format of the MSTensor. Only valid for Lite.
+  /// \brief Obtain the format of the MSTensor.
  ///
  /// \return The format of the MSTensor.
  mindspore::Format format() const;

-  /// \brief Set the data for the MSTensor. Only valid for Lite.
+  /// \brief Set the data for the MSTensor.
  ///
  /// \note Deprecated, this interface will be removed in the next iteration
  ///
@ -274,12 +274,12 @@ class MS_API MSTensor {
  /// \return A pointer to the device data of the MSTensor.
  void *GetDeviceData();

-  /// \brief Get the quantization parameters of the MSTensor. Only valid for Lite.
+  /// \brief Get the quantization parameters of the MSTensor.
  ///
  /// \return The quantization parameters of the MSTensor.
  std::vector<QuantParam> QuantParams() const;

-  /// \brief Set the quantization parameters for the MSTensor. Only valid for Lite.
+  /// \brief Set the quantization parameters for the MSTensor.
  ///
  /// \param[in] quant_params The quantization parameters of the MSTensor.
  void SetQuantParams(std::vector<QuantParam> quant_params);
--- a/include/c_api/model_c.h
+++ b/include/c_api/model_c.h
@ -45,12 +45,12 @@ typedef struct MSCallBackParamC {
 typedef bool (*MSKernelCallBackC)(const MSTensorHandleArray inputs, const MSTensorHandleArray outputs,
                                  const MSCallBackParamC kernel_Info);

-/// \brief Create a model object. Only valid for Lite.
+/// \brief Create a model object.
 ///
 /// \return Model object handle.
 MS_API MSModelHandle MSModelCreate();

-/// \brief Destroy the model object. Only valid for Lite.
+/// \brief Destroy the model object.
 ///
 /// \param[in] model Model object handle address.
 MS_API void MSModelDestroy(MSModelHandle *model);
@ -62,7 +62,7 @@ MS_API void MSModelDestroy(MSModelHandle *model);
 /// \param[in] workspace_size Define the workspace size.
 MS_API void MSModelSetWorkspace(MSModelHandle model, void *workspace, size_t workspace_size);

-/// \brief Build the model from model file buffer so that it can run on a device. Only valid for Lite.
+/// \brief Build the model from model file buffer so that it can run on a device.
 ///
 /// \param[in] model Model object handle.
 /// \param[in] model_data Define the buffer read from a model file.
@ -74,7 +74,7 @@ MS_API void MSModelSetWorkspace(MSModelHandle model, void *workspace, size_t wor
 MS_API MSStatus MSModelBuild(MSModelHandle model, const void *model_data, size_t data_size, MSModelType model_type,
                             const MSContextHandle model_context);

-/// \brief Load and build the model from model path so that it can run on a device. Only valid for Lite.
+/// \brief Load and build the model from model path so that it can run on a device.
 ///
 /// \param[in] model Model object handle.
 /// \param[in] model_path Define the model file path.
--- a/mindspore/lite/java/src/main/java/com/mindspore/ModelParallelRunner.java
+++ b/mindspore/lite/java/src/main/java/com/mindspore/ModelParallelRunner.java
@ -54,7 +54,7 @@ public class ModelParallelRunner {
    }

    /**
-     * Build a model runner from model path so that it can run on a device. Only valid for Lite.
+     * Build a model runner from model path so that it can run on a device. 
     *
     * @param modelPath    the model path.
     * @param runnerConfig the RunnerConfig Object.
@ -69,7 +69,7 @@ public class ModelParallelRunner {
    }

    /**
-     * Build a model runner from model path so that it can run on a device. Only valid for Lite.
+     * Build a model runner from model path so that it can run on a device. 
     *
     * @param modelPath the model path.
     * @return init status.
@ -83,7 +83,7 @@ public class ModelParallelRunner {
    }

    /**
-     * Build a model runner from model path so that it can run on a device. Only valid for Lite.
+     * Build a model runner from model path so that it can run on a device. 
     *
     * @param inputs  inputs A vector where model inputs are arranged in sequence.
     * @param outputs outputs Which is a pointer to a vector. The model outputs are filled in the container in sequence.
--- a/mindspore/lite/java/src/main/java/com/mindspore/config/MSContext.java
+++ b/mindspore/lite/java/src/main/java/com/mindspore/config/MSContext.java
@ -128,7 +128,7 @@ public class MSContext {
    }

    /**
-     * set the number of threads at runtime. Only valid for Lite.
+     * set the number of threads at runtime. 
     * If you haven't init context before, it will do nothing.
     *
     * @param threadNum the number of threads at runtime.
@ -142,7 +142,7 @@ public class MSContext {
    }

    /**
-     * get the current thread number setting. Only valid for Lite.
+     * get the current thread number setting. 
     * If you haven't init context, it will return {@value  ERROR_VALUE}.
     *
     * @return The current thread number setting.
@ -158,7 +158,7 @@ public class MSContext {
    }

    /**
-     * set the parallel number of operators at runtime. Only valid for Lite.
+     * set the parallel number of operators at runtime. 
     * If you haven't init context before, it will do nothing.
     *
     * @param parallelNum parallelNum the parallel number of operators at runtime.
@ -172,7 +172,7 @@ public class MSContext {
    }

    /**
-     * get the current operators parallel number setting. Only valid for Lite.
+     * get the current operators parallel number setting. 
     * If you haven't init context, it will return {@value  ERROR_VALUE}.
     *
     * @return The current operators parallel number setting.
@ -188,7 +188,7 @@ public class MSContext {
    }

    /**
-     * set the thread affinity to CPU cores. Only valid for Lite.
+     * set the thread affinity to CPU cores. 
     * If you haven't init context before, it will do nothing.
     *
     * @param mode: 0: no affinities, 1: big cores first, 2: little cores first
@ -203,7 +203,7 @@ public class MSContext {


    /**
-     * get the thread affinity of CPU cores. Only valid for Lite.
+     * get the thread affinity of CPU cores. 
     * If you haven't init context, it will return {@value  ERROR_VALUE}.
     *
     * @return Thread affinity to CPU cores. 0: no affinities, 1: big cores first, 2: little cores first
@ -219,7 +219,7 @@ public class MSContext {
    }

    /**
-     * set the thread lists to CPU cores. Only valid for Lite.
+     * set the thread lists to CPU cores. 
     * If coreList and mode are set by setThreadAffinity at the same time, the coreList is effective, but the
     * mode is not effective.
     * If you haven't init context before, it will do nothing.
@ -240,7 +240,7 @@ public class MSContext {
    }

    /**
-     * get the thread lists of CPU cores. Only valid for Lite.
+     * get the thread lists of CPU cores. 
     * If you haven't init context, it will return {@value  ERROR_VALUE}.
     *
     * @return An {@code ArrayList<Integer>} of thread core lists.
@ -257,7 +257,7 @@ public class MSContext {
    }

    /**
-     * set the status whether to perform model inference or training in parallel. Only valid for Lite.
+     * set the status whether to perform model inference or training in parallel. 
     * If you haven't init context before, it will do nothing.
     *
     * @param isParallel: true, parallel; false, not in parallel.
@ -271,7 +271,7 @@ public class MSContext {
    }

    /**
-     * get the status whether to perform model inference or training in parallel. Only valid for Lite.
+     * get the status whether to perform model inference or training in parallel. 
     * If you haven't init context, it will also return false.
     *
     * @return boolean value that indicates whether in parallel.
--- a/mindspore/lite/src/extendrt/CMakeLists.txt
+++ b/mindspore/lite/src/extendrt/CMakeLists.txt
@ -38,25 +38,24 @@ if(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE)
        ${CMAKE_CURRENT_SOURCE_DIR}/../litert/pack_weight_manager.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/dynamic_mem_allocator.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/dynamic_mem_manager.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/subgraph_kernel.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/numa_adapter.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/kernel/cpu/less_test_kernel_mod.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/kernel/cpu/transpose_kernel_mod.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/infer_session.cc
-        ${CMAKE_CURRENT_SOURCE_DIR}/single_op_session.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/session/single_op_session.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/infer_device_address.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/utils/kernel_build_utils.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/utils/kernel_graph_utils.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/utils/tensor_utils.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/utils/runtime_utils.cc
-        ${CMAKE_CURRENT_SOURCE_DIR}/delegate/graph_executor/delegate.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/session/delegate_session.cc
-        ${CMAKE_CURRENT_SOURCE_DIR}/session/graph_executor_session.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/session/factory.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/delegate/factory.cc
-        ${CMAKE_CURRENT_SOURCE_DIR}/delegate/graph_executor/factory.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/delegate/plugin/tensorrt_executor_plugin.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/delegate/tensorrt/distribution/distribution_base.cc
        ${CMAKE_CURRENT_SOURCE_DIR}/session/lite_infer_session.cc
+        ${CMAKE_CURRENT_SOURCE_DIR}/delegate_graph_executor.cc
        )
    # when cpu kernel is need
    #if(NOT MSLITE_ENABLE_ACL)
--- a/mindspore/lite/src/extendrt/acl/acl_env_guard.cc
+++ b/mindspore/lite/src/extendrt/acl/acl_env_guard.cc
@ -1,60 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "extendrt/acl/acl_env_guard.h"
-#include "utils/log_adapter.h"
-#include "acl/acl.h"
-
-namespace mindspore {
-std::weak_ptr<AclEnvGuard> AclEnvGuard::global_acl_env_;
-std::mutex AclEnvGuard::global_acl_env_mutex_;
-
-AclEnvGuard::AclEnvGuard() {
-  errno_ = aclInit(nullptr);
-  if (errno_ != ACL_ERROR_NONE && errno_ != ACL_ERROR_REPEAT_INITIALIZE) {
-    MS_LOG(ERROR) << "Execute aclInit Failed";
-    return;
-  }
-  MS_LOG(INFO) << "Acl init success";
-}
-
-AclEnvGuard::~AclEnvGuard() {
-  errno_ = aclFinalize();
-  if (errno_ != ACL_ERROR_NONE && errno_ != ACL_ERROR_REPEAT_FINALIZE) {
-    MS_LOG(ERROR) << "Finalize acl failed";
-  }
-  MS_LOG(INFO) << "Acl finalize success";
-}
-
-std::shared_ptr<AclEnvGuard> AclEnvGuard::GetAclEnv() {
-  std::shared_ptr<AclEnvGuard> acl_env;
-
-  std::lock_guard<std::mutex> lock(global_acl_env_mutex_);
-  acl_env = global_acl_env_.lock();
-  if (acl_env != nullptr) {
-    MS_LOG(INFO) << "Acl has been initialized, skip.";
-  } else {
-    acl_env = std::make_shared<AclEnvGuard>();
-    aclError ret = acl_env->GetErrno();
-    if (ret != ACL_ERROR_NONE && ret != ACL_ERROR_REPEAT_INITIALIZE) {
-      MS_LOG(ERROR) << "Execute aclInit Failed";
-      return nullptr;
-    }
-    global_acl_env_ = acl_env;
-    MS_LOG(INFO) << "Acl init success";
-  }
-  return acl_env;
-}
-}  // namespace mindspore
--- a/mindspore/lite/src/extendrt/acl/acl_env_guard.h
+++ b/mindspore/lite/src/extendrt/acl/acl_env_guard.h
@ -1,37 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_GRAPH_ACL_ACL_ENV_GUARD_H_
-#define MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_GRAPH_ACL_ACL_ENV_GUARD_H_
-#include <memory>
-#include <mutex>
-#include "acl/acl_base.h"
-
-namespace mindspore {
-class __attribute__((visibility("default"))) AclEnvGuard {
- public:
-  explicit AclEnvGuard();
-  ~AclEnvGuard();
-  aclError GetErrno() const { return errno_; }
-  static std::shared_ptr<AclEnvGuard> GetAclEnv();
-
- private:
-  static std::weak_ptr<AclEnvGuard> global_acl_env_;
-  static std::mutex global_acl_env_mutex_;
-
-  aclError errno_;
-};
-}  // namespace mindspore
-#endif  // MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_GRAPH_ACL_ACL_ENV_GUARD_H_
--- a/mindspore/lite/src/extendrt/acl/acl_infer.cc
+++ b/mindspore/lite/src/extendrt/acl/acl_infer.cc
@ -1,323 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "extendrt/acl/acl_infer.h"
-#include "include/api/context.h"
-#include "extendrt/acl/model_converter.h"
-#include "utils/log_adapter.h"
-#include "mindspore/core/utils/convert_utils_base.h"
-#include "extendrt/acl/acl_utils.h"
-
-namespace mindspore {
-AclInferExecutor::AclInferExecutor()
-    : init_flag_(false),
-      load_flag_(false),
-      device_type_("AscendCL"),
-      device_id_(0),
-      context_(nullptr),
-      acl_env_(nullptr) {}
-
-AclInferExecutor::~AclInferExecutor() {
-  try {
-    (void)FinalizeEnv();
-  } catch (const std::exception &e) {
-    MS_LOG(ERROR) << "AclInferExecutor destructor run failed, error message : " << e.what();
-  } catch (...) {
-    MS_LOG(ERROR) << "AclInferExecutor destructor run failed, unknown error occurred.";
-  }
-}
-
-Status AclInferExecutor::Run(const std::vector<MSTensor> &inputs, std::vector<MSTensor> *outputs) {
-  MS_EXCEPTION_IF_NULL(outputs);
-  Status ret = Load(IntToUint(device_id_));
-  if (ret != kSuccess) {
-    MS_LOG(ERROR) << "Prepare model resource failed.";
-    return ret;
-  }
-
-  return model_process_.PredictFromHost(inputs, outputs);
-}
-
-std::vector<MSTensor> AclInferExecutor::GetInputs() {
-  Status ret = Load(IntToUint(device_id_));
-  if (ret != kSuccess) {
-    MS_LOG(ERROR) << "Prepare model resource failed.";
-    return {};
-  }
-
-  return model_process_.GetInputs();
-}
-
-std::vector<MSTensor> AclInferExecutor::GetOutputs() {
-  Status ret = Load(IntToUint(device_id_));
-  if (ret != kSuccess) {
-    MS_LOG(ERROR) << "Prepare model resource failed.";
-    return {};
-  }
-
-  return model_process_.GetOutputs();
-}
-
-Status AclInferExecutor::LoadAclModel(const Buffer om_data) {
-  MS_LOG(INFO) << "Start load acl model.";
-  // acl load model
-  uint32_t acl_model_id;
-  auto acl_ret = aclmdlLoadFromMem(om_data.Data(), om_data.DataSize(), &acl_model_id);
-  if (acl_ret != ACL_ERROR_NONE) {
-    MS_LOG(ERROR) << "Call aclmdlLoadFromMem failed.";
-    return kMCDeviceError;
-  }
-
-  // acl init model resource
-  model_process_.set_model_id(acl_model_id);
-  Status ret = model_process_.PreInitModelResource();
-  if (ret != kSuccess) {
-    (void)aclmdlUnload(acl_model_id);
-    MS_LOG(ERROR) << "Pre init model resource failed.";
-    return ret;
-  }
-
-  MS_LOG(INFO) << "Load acl model success.";
-  return kSuccess;
-}
-
-Status AclInferExecutor::InitEnv() {
-  if (init_flag_) {
-    return kSuccess;
-  }
-
-  acl_env_ = AclEnvGuard::GetAclEnv();
-  if (acl_env_ == nullptr) {
-    MS_LOG(ERROR) << "Acl init failed.";
-    return kMCDeviceError;
-  }
-
-  aclError ret = aclrtSetDevice(device_id_);
-  if (ret != ACL_ERROR_NONE) {
-    MS_LOG(EXCEPTION) << "Device " << device_id_ << " call aclrtSetDevice failed, ret[" << static_cast<int>(ret) << "]";
-  }
-  MS_LOG(INFO) << "Open device " << device_id_ << " success";
-
-  ret = aclrtCreateContext(&context_, device_id_);
-  if (ret != ACL_ERROR_NONE) {
-    MS_LOG(ERROR) << "Acl create context failed";
-    return kMCDeviceError;
-  }
-  MS_LOG(INFO) << "Create context success";
-
-  aclrtRunMode run_mode;
-  ret = aclrtGetRunMode(&run_mode);
-  if (ret != ACL_ERROR_NONE) {
-    MS_LOG(ERROR) << "Acl get run mode failed";
-    return kMCDeviceError;
-  }
-  bool is_device = (run_mode == ACL_DEVICE);
-  model_process_.SetIsDevice(is_device);
-  MS_LOG(INFO) << "Get run mode success is device input/output " << is_device;
-
-  MS_LOG(INFO) << "Init acl success, device id " << device_id_;
-  init_flag_ = true;
-  return kSuccess;
-}
-
-Status AclInferExecutor::FinalizeEnv() {
-  if (!init_flag_) {
-    return kSuccess;
-  }
-
-  aclError rt_ret = aclrtSetCurrentContext(context_);
-  if (rt_ret != ACL_ERROR_NONE) {
-    MS_LOG(ERROR) << "Set the ascend device context failed";
-    return kMCDeviceError;
-  }
-
-  Status ret = model_process_.UnLoad();
-  if (ret != kSuccess) {
-    MS_LOG(ERROR) << "Unload model inner failed.";
-    return ret;
-  }
-
-  if (context_ != nullptr) {
-    rt_ret = aclrtDestroyContext(context_);
-    if (rt_ret != ACL_ERROR_NONE) {
-      MS_LOG(ERROR) << "Destroy context failed";
-    }
-    context_ = nullptr;
-  }
-  MS_LOG(INFO) << "End to destroy context";
-
-  rt_ret = aclrtResetDevice(device_id_);
-  if (rt_ret != ACL_ERROR_NONE) {
-    MS_LOG(ERROR) << "Reset device " << device_id_ << " failed";
-  }
-  MS_LOG(INFO) << "End to reset device " << device_id_;
-
-  init_flag_ = false;
-  return kSuccess;
-}
-
-Status AclInferExecutor::Build() {
-  MS_LOG(INFO) << "Start build model.";
-  MS_EXCEPTION_IF_NULL(graph_);
-
-  if (graph_cell_ != nullptr) {
-    MS_LOG(INFO) << "This model has been built, skip.";
-    return kSuccess;
-  }
-
-  std::shared_ptr<AclModelOptions> options = std::make_shared<AclModelOptions>(model_context_);
-  MS_EXCEPTION_IF_NULL(options);
-
-  if (graph_cell_ == nullptr && graph_->ModelType() == ModelType::kOM) {
-    MS_LOG(INFO) << "Load om model and all build options will be ignored.";
-    graph_cell_ = std::make_shared<GraphCell>(graph_);
-    MS_EXCEPTION_IF_NULL(graph_cell_);
-    auto ret = graph_cell_->Load(options->GetDeviceID());
-    if (ret != kSuccess) {
-      MS_LOG(ERROR) << "Load failed.";
-      return ret;
-    }
-
-    options_ = std::move(options);
-    return kSuccess;
-  }
-
-  std::string options_key = options->GenAclOptionsKey();
-  std::shared_ptr<Graph> graph;
-  if (auto iter = dynamic_size_graph_map_.find(options_key); iter != dynamic_size_graph_map_.end()) {
-    MS_LOG(INFO) << "This options has been built, read cache.";
-    graph = iter->second;
-  } else {
-    auto func_graph = ModelImpl::GetFuncGraph();
-    MS_EXCEPTION_IF_NULL(func_graph);
-    auto inputs = func_graph->parameters();
-    std::vector<std::string> input_names;
-    for (auto node : inputs) {
-      auto para = node->cast<ParameterPtr>();
-      MS_EXCEPTION_IF_NULL(para);
-      std::string name = para->name();
-      for (auto pos = name.find(':'); pos != std::string::npos; pos = name.find(':')) {
-        name = name.substr(0, pos) + "_" + name.substr(pos + 1);
-        MS_LOG(INFO) << name;
-      }
-      para->set_name(name);
-      input_names.push_back(name);
-    }
-    options->RenameInput(input_names);
-    MS_EXCEPTION_IF_NULL(func_graph);
-    model_converter_.set_options(options);
-    auto om_data = model_converter_.LoadMindIR(func_graph);
-    if (om_data.Data() == nullptr || om_data.DataSize() == 0) {
-      MS_LOG(ERROR) << "Load MindIR failed.";
-      return kMCFailed;
-    }
-    graph = std::make_shared<Graph>(std::make_shared<Graph::GraphData>(om_data, ModelType::kOM));
-    dynamic_size_graph_map_[options_key] = graph;
-  }
-
-  MS_EXCEPTION_IF_NULL(graph);
-  auto graph_cell = std::make_shared<GraphCell>(graph);
-  MS_EXCEPTION_IF_NULL(graph_cell);
-  auto ret = graph_cell->Load(options->GetDeviceID());
-  if (ret != kSuccess) {
-    MS_LOG(ERROR) << "Load failed.";
-    return ret;
-  }
-
-  // save result
-  graph_cell_ = graph_cell;
-  options_ = std::move(options);
-  MS_LOG(INFO) << "Build model success.";
-  return kSuccess;
-}
-
-Status AclInferExecutor::Load(uint32_t device_id) {
-  // check graph type
-  if (graph_->ModelType() != ModelType::kOM) {
-    Status ret = ConvertToOM();
-    if (ret != kSuccess) {
-      MS_LOG(ERROR) << "Load Failed.";
-      return ret;
-    }
-  }
-
-  const auto &graph_data = GraphImpl::MutableGraphData();
-  MS_EXCEPTION_IF_NULL(graph_data);
-  auto om_data = graph_data->GetOMData();
-
-  // init
-  device_id_ = UintToInt(device_id);
-  Status ret = InitEnv();
-  if (ret != kSuccess) {
-    MS_LOG(ERROR) << "InitEnv failed.";
-    return ret;
-  }
-
-  // load model
-  if (!load_flag_) {
-    ret = LoadAclModel(om_data);
-    if (ret != kSuccess) {
-      MS_LOG(ERROR) << "Load acl model failed.";
-      return ret;
-    }
-    load_flag_ = true;
-  }
-
-  aclError rt_ret = aclrtSetCurrentContext(context_);
-  if (rt_ret != ACL_ERROR_NONE) {
-    MS_LOG(ERROR) << "Set the ascend device context failed";
-    return kMCDeviceError;
-  }
-
-  return kSuccess;
-}
-
-Status AclInferExecutor::ConvertToOM() {
-  MS_LOG(INFO) << "Start convert to om model.";
-  if (graph_ == nullptr) {
-    MS_LOG(ERROR) << "Invalid graph_ is null.";
-    return kMCFailed;
-  }
-
-  auto &graph_data = GraphImpl::MutableGraphData();
-  MS_EXCEPTION_IF_NULL(graph_data);
-  if (graph_->ModelType() == ModelType::kOM) {
-    MS_LOG(INFO) << "This model has been built, skip.";
-    return kSuccess;
-  } else if (graph_->ModelType() == ModelType::kMindIR) {
-    auto func_graph = graph_data->GetFuncGraph();
-    MS_EXCEPTION_IF_NULL(func_graph);
-    ModelConverter model_converter;
-    Buffer om_data = model_converter.LoadMindIR(func_graph);
-    if (om_data.Data() == nullptr || om_data.DataSize() == 0) {
-      MS_LOG(ERROR) << "Convert MindIR to OM failed.";
-      return kMCFailed;
-    }
-    graph_data = std::make_shared<Graph::GraphData>(om_data, ModelType::kOM);
-    MS_LOG(INFO) << "Convert MindIR to OM success.";
-    return kSuccess;
-  }
-  MS_LOG(ERROR) << "Unsupported ModelType " << graph_->ModelType();
-  return kMCFailed;
-}
-
-bool AclInferExecutor::CheckDeviceSupport(mindspore::DeviceType device_type) {
-  // for Ascend, only support kAscend and kAscend310
-  if (device_type != kAscend && device_type != kAscend310) {
-    return false;
-  }
-  return IsAscendNo910Soc();
-}
-}  // namespace mindspore
--- a/mindspore/lite/src/extendrt/acl/acl_infer.h
+++ b/mindspore/lite/src/extendrt/acl/acl_infer.h
@ -1,56 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_GRAPH_ACL_ACL_GRAPH_IMPL_H_
-#define MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_GRAPH_ACL_ACL_GRAPH_IMPL_H_
-#include <functional>
-#include <map>
-#include <string>
-#include <vector>
-#include <memory>
-#include <utility>
-#include "include/api/graph.h"
-#include "extendrt/acl/model_process.h"
-#include "extendrt/acl/acl_env_guard.h"
-#include "extendrt/cxx_api/graph/graph_impl.h"
-#include "extendrt/graph_executor.h"
-
-namespace mindspore {
-class AclInferExecutor : public GraphExecutor {
- public:
-  AclInferExecutor();
-  ~AclInferExecutor() override;
-
-  Status Execute(const ExecutePlan &plan, const std::vector<MSTensor> &inputs, std::vector<MSTensor> *outputs) override;
-
- protected:
-  bool CheckDeviceSupport(mindspore::DeviceType device_type) override;
-  Status Load(uint32_t device_id);
-  Status InitEnv();
-  Status FinalizeEnv();
-  Status CheckModelInputs(const std::vector<tensor::TensorPtr> &inputs) const;
-
-  bool init_flag_;
-  bool load_flag_;
-  std::string device_type_;
-  int32_t device_id_;
-  aclrtContext context_;
-
-  std::shared_ptr<AclEnvGuard> acl_env_;
-
-  ModelProcess model_process_;
-};
-}  // namespace mindspore
-#endif  // MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_GRAPH_ACL_ACL_GRAPH_IMPL_H_
--- a/mindspore/lite/src/extendrt/acl/acl_model_multi.cc
+++ b/mindspore/lite/src/extendrt/acl/acl_model_multi.cc
@ -1,267 +0,0 @@
-/**
- * Copyright 2021 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "extendrt/acl/acl_model_multi.h"
-#include <vector>
-#include <utility>
-#include <map>
-#include <string>
-#include <algorithm>
-#include <numeric>
-#include <deque>
-#include <functional>
-#include "extendrt/factory.h"
-#include "acl/acl_rt.h"
-#include "mindspore/core/load_mindir/infer_mindir.h"
-#include "extendrt/acl/acl_vm/ms_tensor_ref.h"
-#include "extendrt/acl/acl_vm/acl_vm.h"
-
-namespace mindspore {
-API_FACTORY_REG(ModelImpl, AclModelMulti);
-
-namespace {
-std::map<DataType, size_t> kDtypeMap = {
-  {DataType::kNumberTypeBool, sizeof(bool)},       {DataType::kNumberTypeInt8, sizeof(int8_t)},
-  {DataType::kNumberTypeInt16, sizeof(int16_t)},   {DataType::kNumberTypeInt32, sizeof(int32_t)},
-  {DataType::kNumberTypeInt64, sizeof(int64_t)},   {DataType::kNumberTypeFloat16, sizeof(float16)},
-  {DataType::kNumberTypeFloat32, sizeof(float)},   {DataType::kNumberTypeFloat64, sizeof(double)},
-  {DataType::kNumberTypeUInt8, sizeof(uint8_t)},   {DataType::kNumberTypeUInt16, sizeof(uint16_t)},
-  {DataType::kNumberTypeUInt32, sizeof(uint32_t)}, {DataType::kNumberTypeUInt64, sizeof(uint64_t)}};
-
-std::shared_ptr<compile::MsBackend> CreateBackend(const std::shared_ptr<AclModelOptions> &options) {
-  MS_EXCEPTION_IF_NULL(options);
-  return std::make_shared<AclBackend>(kMsConvert, kDavinciMultiGraphInferenceDevice, options);
-}
-
-bool HasMultiGraph(const FuncGraphPtr &fg) {
-  MS_EXCEPTION_IF_NULL(fg);
-  std::vector<AnfNodePtr> all_nodes = TopoSort(fg->get_return());
-  for (const auto &node : all_nodes) {
-    MS_EXCEPTION_IF_NULL(node);
-    if (IsValueNode<FuncGraph>(node)) {
-      MS_LOG(INFO) << fg->ToString() << " has FuncGraph node " << node->DebugString() << " is multi graph.";
-      return true;
-    }
-  }
-  return false;
-}
-}  // namespace
-Status AclModelMulti::Build() {
-  if (!is_multi_graph_.has_value()) {
-    is_multi_graph_ = ModelImpl::GetFuncGraph() == nullptr ? false : HasMultiGraph(ModelImpl::GetFuncGraph());
-  }
-
-  if (!is_multi_graph_.value()) {
-    return AclInferSession::Build();
-  }
-
-  if (vm_ != nullptr) {
-    MS_LOG(INFO) << "Multi graph model has been built, skip.";
-    return kSuccess;
-  }
-  MS_LOG(INFO) << "Start build multi graph model.";
-  // perpare func graph
-  auto manager = MakeManager();
-  manager->AddFuncGraph(ModelImpl::GetFuncGraph());
-  ModelImpl::GetFuncGraph()->set_manager(manager);
-  // set inputs
-  SetInputs();
-  // infer mindir
-  abstract::AbstractBasePtrList broaded_args;
-  auto fg = ModelImpl::GetFuncGraph();
-  MS_EXCEPTION_IF_NULL(fg);
-  const auto &inputs = fg->get_inputs();
-  (void)std::transform(inputs.begin(), inputs.end(), std::back_inserter(broaded_args),
-                       [](const AnfNodePtr &n) -> AbstractBasePtr {
-                         MS_EXCEPTION_IF_NULL(n);
-                         auto abstract = n->abstract();
-                         MS_EXCEPTION_IF_NULL(abstract);
-                         if (abstract->GetValueTrack() != kAnyValue) {
-                           return abstract->Broaden();
-                         }
-                         return abstract;
-                       });
-  try {
-    (void)InferMindir(ModelImpl::GetFuncGraph(), broaded_args);
-  } catch (const std::runtime_error &e) {
-    MS_LOG(ERROR) << "Infer mindir for sub graph failed: " << e.what();
-    return kMCFailed;
-  }
-
-  // set output
-  SetOutput();
-  // create vm
-  auto backend = CreateBackend(std::make_shared<AclModelOptions>(model_context_));
-  auto context_ptr = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(context_ptr);
-  backend->set_is_multi_graph_sink(false);
-  context_ptr->set_param<std::string>(MS_CTX_DEVICE_TARGET, kDavinciMultiGraphInferenceDevice);
-  context_ptr->set_param<bool>(MS_CTX_IS_MULTI_GRAPH_SINK, false);
-  context_ptr->set_param<bool>(MS_CTX_ENABLE_LOOP_SINK, false);
-  auto compile = std::make_shared<AclCompileGraphs>(backend, compile::GetMsNonlinearOps());
-
-  vm_ = compile->CompileAndLink(ModelImpl::GetFuncGraph());
-  backend_ = std::move(backend);
-  MS_LOG(INFO) << "Build multi graph model success.";
-  return kSuccess;
-}
-
-Status AclModelMulti::Predict(const std::vector<MSTensor> &inputs, std::vector<MSTensor> *outputs) {
-  if (!is_multi_graph_.has_value()) {
-    is_multi_graph_ = ModelImpl::GetFuncGraph() == nullptr ? false : HasMultiGraph(ModelImpl::GetFuncGraph());
-  }
-
-  if (!is_multi_graph_.value()) {
-    return AclInferSession::Predict(inputs, outputs);
-  }
-
-  auto ret = Build();
-  if (ret != kSuccess) {
-    MS_LOG(ERROR) << "Build multi-graph model as default options failed.";
-    return ret;
-  }
-  MS_LOG(INFO) << "Start predict multi graph model.";
-  MS_EXCEPTION_IF_NULL(vm_);
-  MS_EXCEPTION_IF_NULL(outputs);
-  try {
-    (*outputs) = MSTensorRef::Convert(vm_->Eval(MSTensorRef::Convert(inputs)));
-  } catch (const std::exception &ex) {
-    MS_LOG(ERROR) << "Predict Failed, error: " << ex.what();
-    return kMCFailed;
-  }
-
-  if (inputs_.empty()) {
-    inputs_ = inputs;
-  } else {
-    if (inputs.size() != inputs_.size()) {
-      MS_LOG(ERROR) << "Input Size is wrong.";
-      return kMCFailed;
-    }
-    for (size_t i = 0; i < inputs_.size(); ++i) {
-      auto input_tensor = MSTensor::CreateTensor(inputs_[i].Name(), inputs_[i].DataType(), inputs_[i].Shape(),
-                                                 inputs[i].Data().get(), inputs[i].DataSize());
-      inputs_[i] = (*input_tensor);
-      MSTensor::DestroyTensorPtr(input_tensor);
-    }
-  }
-
-  outputs_ = *outputs;
-  MS_LOG(INFO) << "Predict multi graph model success.";
-  return kSuccess;
-}
-
-void AclModelMulti::SetInputs() {
-  if (inputs_.empty()) {
-    auto fg = ModelImpl::GetFuncGraph();
-    MS_EXCEPTION_IF_NULL(fg);
-    const auto &inputs = fg->get_inputs();
-    for (const auto &in : inputs) {
-      auto input_param = std::dynamic_pointer_cast<Parameter>(in);
-      MS_EXCEPTION_IF_NULL(input_param);
-      auto input_abs = input_param->abstract();
-      MS_EXCEPTION_IF_NULL(input_abs);
-      auto tensor_abs = input_abs->cast<abstract::AbstractTensorPtr>();
-      if (tensor_abs == nullptr) {
-        MS_LOG(EXCEPTION) << "The graph input type is not a tensor. input args info:" << input_abs->ToString();
-      }
-      auto shape_ptr = tensor_abs->BuildShape();
-      MS_EXCEPTION_IF_NULL(shape_ptr);
-      auto tensor_shape = shape_ptr->cast<abstract::ShapePtr>();
-      MS_EXCEPTION_IF_NULL(tensor_shape);
-      auto elem = tensor_abs->element();
-      MS_EXCEPTION_IF_NULL(elem);
-      auto type_id = elem->BuildType()->type_id();
-      auto tensor = std::make_shared<tensor::Tensor>(type_id, tensor_shape->shape());
-
-      std::vector<int64_t> shape = tensor->shape_c();
-      auto input_tensor = MSTensor::CreateTensor(input_param->name(), static_cast<DataType>(tensor->data_type_c()),
-                                                 shape, nullptr, tensor->Size());
-      inputs_.emplace_back(*input_tensor);
-      MSTensor::DestroyTensorPtr(input_tensor);
-    }
-  } else {
-    MS_LOG(DEBUG) << "inputs_ has been set.";
-  }
-}
-
-void AclModelMulti::SetOutput() {
-  if (outputs_.empty()) {
-    auto fg = ModelImpl::GetFuncGraph();
-    MS_EXCEPTION_IF_NULL(fg);
-    const auto output = fg->output();
-    MS_EXCEPTION_IF_NULL(output);
-    auto abs = output->abstract();
-    MS_EXCEPTION_IF_NULL(abs);
-
-    // DataType
-    DataType type_id;
-    if (abs->isa<abstract::AbstractTensor>()) {
-      auto abs_tensor = abs->cast<abstract::AbstractTensorPtr>();
-      auto ele = abs_tensor->element();
-      MS_EXCEPTION_IF_NULL(ele);
-      MS_EXCEPTION_IF_NULL(ele->GetTypeTrack());
-      type_id = static_cast<DataType>(ele->GetTypeTrack()->type_id());
-    } else {
-      MS_EXCEPTION_IF_NULL(abs->GetTypeTrack());
-      type_id = static_cast<DataType>(abs->GetTypeTrack()->type_id());
-    }
-    // Shape
-    auto shape_track = abs->GetShapeTrack();
-    MS_EXCEPTION_IF_NULL(shape_track);
-    std::vector<int64_t> shape = {};
-    if (shape_track->isa<abstract::Shape>()) {
-      auto shapeptr = shape_track->cast<abstract::ShapePtr>();
-      shape = static_cast<std::vector<int64_t>>(shapeptr->shape());
-    }
-    // Size
-    size_t ato_size = 0;
-    if (kDtypeMap.find(type_id) != kDtypeMap.end()) {
-      ato_size = kDtypeMap[type_id];
-    }
-    int64_t ele_num = std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int64_t>());
-    size_t size = ato_size * LongToSize(ele_num);
-    // create tensor
-    auto output_tensor = MSTensor::CreateTensor("", type_id, shape, nullptr, size);
-    outputs_.emplace_back(*output_tensor);
-    MSTensor::DestroyTensorPtr(output_tensor);
-  } else {
-    MS_LOG(DEBUG) << "outputs_ has been set.";
-  }
-}
-
-std::vector<MSTensor> AclModelMulti::GetInputs() {
-  if (!is_multi_graph_.has_value()) {
-    is_multi_graph_ = ModelImpl::GetFuncGraph() == nullptr ? false : HasMultiGraph(ModelImpl::GetFuncGraph());
-  }
-
-  if (!is_multi_graph_.value()) {
-    return AclInferSession::GetInputs();
-  }
-
-  return inputs_;
-}
-
-std::vector<MSTensor> AclModelMulti::GetOutputs() {
-  if (!is_multi_graph_.has_value()) {
-    is_multi_graph_ = ModelImpl::GetFuncGraph() == nullptr ? false : HasMultiGraph(ModelImpl::GetFuncGraph());
-  }
-
-  if (!is_multi_graph_.value()) {
-    return AclInferSession::GetOutputs();
-  }
-
-  return outputs_;
-}
-}  // namespace mindspore
--- a/mindspore/lite/src/extendrt/acl/acl_model_multi.h
+++ b/mindspore/lite/src/extendrt/acl/acl_model_multi.h
@ -1,54 +0,0 @@
-/**
- * Copyright 2021 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_MODEL_ACL_ACL_MODEL_MULTI_H_
-#define MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_MODEL_ACL_ACL_MODEL_MULTI_H_
-
-#include "extendrt/acl/acl_model.h"
-#include <memory>
-#include <optional>
-#include <vector>
-#include <string>
-#include <map>
-
-namespace mindspore {
-namespace compile {
-class MsBackend;
-class FinalVM;
-}  // namespace compile
-
-class AclModelMulti : public AclInferSession {
- public:
-  AclModelMulti() : AclInferSession(), is_multi_graph_(std::nullopt) {}
-  ~AclModelMulti() = default;
-
-  Status Build() override;
-  Status Predict(const std::vector<MSTensor> &inputs, std::vector<MSTensor> *outputs) override;
-
-  std::vector<MSTensor> GetInputs() override;
-  std::vector<MSTensor> GetOutputs() override;
-
- private:
-  void SetInputs();
-  void SetOutput();
-
-  std::optional<bool> is_multi_graph_;
-  std::shared_ptr<compile::MsBackend> backend_;
-  std::shared_ptr<compile::FinalVM> vm_;
-  std::vector<MSTensor> inputs_ = {};
-  std::vector<MSTensor> outputs_ = {};
-};
-}  // namespace mindspore
-#endif  // MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_MODEL_ACL_ACL_MODEL_MULTI_H_
--- a/mindspore/lite/src/extendrt/acl/acl_model_options.cc
+++ b/mindspore/lite/src/extendrt/acl/acl_model_options.cc
@ -1,163 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "extendrt/acl/acl_model_options.h"
-#include <set>
-#include <memory>
-#include "utils/log_adapter.h"
-#include "external/ge/ge_api_types.h"
-#include "acl/acl_base.h"
-
-namespace mindspore {
-static const std::map<DataType, std::string> kSupportedDtypeOptionMap = {{DataType::kNumberTypeFloat16, "FP16"},
-                                                                         {DataType::kNumberTypeFloat32, "FP32"},
-                                                                         {DataType::kNumberTypeUInt8, "UINT8"}};
-
-AclModelOptions::AclModelOptions(const std::shared_ptr<Context> &context) {
-  if (context == nullptr) {
-    return;
-  }
-  auto &device_infos = context->MutableDeviceInfo();
-  if (device_infos.size() != 1) {
-    return;
-  }
-  auto ascend_info = device_infos[0]->Cast<AscendDeviceInfo>();
-  if (ascend_info == nullptr) {
-    return;
-  }
-
-  insert_op_cfg_path_ = ascend_info->GetInsertOpConfigPath();
-  input_format_ = ascend_info->GetInputFormat();
-  input_shape_map_ = ascend_info->GetInputShapeMap();
-  auto out_type = ascend_info->GetOutputType();
-  auto iter = kSupportedDtypeOptionMap.find(out_type);
-  if (out_type == DataType::kTypeUnknown) {
-    // do nothing
-  } else if (iter == kSupportedDtypeOptionMap.end()) {
-    MS_LOG(INFO) << "Unsupported output type " << out_type << ", use FP32 as default.";
-  } else {
-    output_type_ = iter->second;
-  }
-  dynamic_batch_size_ = ascend_info->GetDynamicBatchSize();
-  dynamic_image_size_ = ascend_info->GetDynamicImageSize();
-  precision_mode_ = ascend_info->GetPrecisionMode();
-  op_select_impl_mode_ = ascend_info->GetOpSelectImplMode();
-  fusion_switch_cfg_path_ = ascend_info->GetFusionSwitchConfigPath();
-  device_id_ = ascend_info->GetDeviceID();
-  buffer_optimize_mode_ = ascend_info->GetBufferOptimizeMode();
-#ifndef SUPPORT_SD3403_DAVINCI
-  const char *soc_name = aclrtGetSocName();
-  if (soc_name == nullptr) {
-    MS_LOG(WARNING) << "Get soc version failed.";
-    return;
-  }
-  soc_version_ = soc_name;
-#else
-  soc_version = "OPTG";
-#endif
-}
-
-void AclModelOptions::RenameInput(const std::vector<std::string> &input_names) {
-  if (input_names.size() != input_shape_map_.size()) {
-    MS_LOG(INFO) << "Inputs count not match";
-    return;
-  }
-  input_shape_ = "";
-  for (size_t i = 0; i < input_shape_map_.size(); i++) {
-    if (input_shape_map_.find(i) == input_shape_map_.end()) {
-      MS_LOG(WARNING) << "Not find the key: " << i;
-      return;
-    }
-    std::string s;
-    for (size_t j = 0; j < input_shape_map_[i].size(); j++) {
-      s += std::to_string(input_shape_map_[i][j]) + ",";
-    }
-    input_shape_ += input_names[i] + ":" + s.substr(0, s.size() - 1) + ";";
-  }
-  input_shape_ = input_shape_.substr(0, input_shape_.size() - 1);
-  MS_LOG(INFO) << "input name is " << input_shape_;
-}
-
-std::tuple<std::map<std::string, std::string>, std::map<std::string, std::string>> AclModelOptions::GenAclOptions()
-  const {
-  const std::map<std::string const *, std::string> init_options_map = {
-    {&op_select_impl_mode_, ge::ir_option::OP_SELECT_IMPL_MODE},
-    {&soc_version_, ge::ir_option::SOC_VERSION},
-    {&fusion_switch_cfg_path_, ge::ir_option::FUSION_SWITCH_FILE},
-    {&buffer_optimize_mode_, ge::ir_option::BUFFER_OPTIMIZE}};
-
-  const std::map<std::string const *, std::string> build_options_map = {
-    {&insert_op_cfg_path_, ge::ir_option::INSERT_OP_FILE},
-    {&input_format_, ge::ir_option::INPUT_FORMAT},
-    {&input_shape_, ge::ir_option::INPUT_SHAPE},
-    {&output_type_, ge::ir_option::OUTPUT_TYPE},
-    {&precision_mode_, ge::ir_option::PRECISION_MODE},
-    {&dynamic_batch_size_, ge::ir_option::DYNAMIC_BATCH_SIZE},
-    {&dynamic_image_size_, ge::ir_option::DYNAMIC_IMAGE_SIZE}};
-
-  const std::set<std::string> first_graph_options = {
-    ge::ir_option::INSERT_OP_FILE,
-    ge::ir_option::INPUT_FORMAT,
-    ge::ir_option::INPUT_SHAPE,
-  };
-
-  const std::set<std::string> multi_graph_unsupported_options = {ge::ir_option::OUTPUT_TYPE};
-
-  std::map<std::string, std::string> init_options;
-  std::map<std::string, std::string> build_options;
-  for (auto [ms_option, acl_option_key] : init_options_map) {
-    if (ms_option == nullptr || ms_option->empty()) {
-      continue;
-    }
-    MS_LOG(INFO) << "Option " << acl_option_key << " : " << *ms_option;
-    init_options.emplace(acl_option_key, *ms_option);
-  }
-
-  for (auto [ms_option, acl_option_key] : build_options_map) {
-    if (ms_option == nullptr || ms_option->empty()) {
-      continue;
-    }
-    MS_LOG(INFO) << "Option " << acl_option_key << " : " << *ms_option;
-    build_options.emplace(acl_option_key, *ms_option);
-  }
-
-  // first_graph_flag has value means being multi graph mode
-  if (first_graph_flag_.has_value()) {
-    for (const auto &option : multi_graph_unsupported_options) {
-      build_options.erase(option);
-    }
-    // non-input graph
-    if (!first_graph_flag_) {
-      for (const auto &option : first_graph_options) {
-        build_options.erase(option);
-      }
-    }
-  }
-
-  return {init_options, build_options};
-}
-
-std::string AclModelOptions::GenAclOptionsKey() const {
-  auto [init_options, build_options] = GenAclOptions();
-  std::string key_str;
-  for (auto &[key, value] : init_options) {
-    key_str += key + "^" + value + "^^";
-  }
-  for (auto &[key, value] : build_options) {
-    key_str += key + "^" + value + "^^";
-  }
-  return key_str;
-}
-}  // namespace mindspore
--- a/mindspore/lite/src/extendrt/acl/acl_model_options.h
+++ b/mindspore/lite/src/extendrt/acl/acl_model_options.h
@ -1,64 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_LITE_EXTENDRT_SESSION_ACL_OPTION_PARSER_H
-#define MINDSPORE_LITE_EXTENDRT_SESSION_ACL_OPTION_PARSER_H
-#include <vector>
-#include <string>
-#include <map>
-#include <tuple>
-#include <memory>
-#include <optional>
-#include "include/api/types.h"
-#include "include/api/status.h"
-#include "include/api/context.h"
-
-namespace mindspore {
-class MS_API AclModelOptions {
- public:
-  explicit AclModelOptions(const std::shared_ptr<Context> &context);
-  ~AclModelOptions() = default;
-  std::string GenAclOptionsKey() const;
-  uint32_t GetDeviceID() const { return device_id_; }
-  void RenameInput(const std::vector<std::string> &);
-
-  // return tuple<init_options, build_options>
-  std::tuple<std::map<std::string, std::string>, std::map<std::string, std::string>> GenAclOptions() const;
-  void SetFirstGraph(bool is_first_graph) noexcept { first_graph_flag_ = is_first_graph; }
-  void SetOmFilePath(const std::string &file_path) noexcept { om_file_path_ = file_path; }
-  std::string GetOmFilePath() const { return om_file_path_; }
-
- private:
-  std::string output_node_;  // todo: at convert.cc::BuildGraph(), no atc options
-  // build options
-  std::string insert_op_cfg_path_;
-  std::string input_format_;
-  std::string input_shape_;
-  std::string output_type_;
-  std::string precision_mode_;
-  std::string op_select_impl_mode_;
-  std::string fusion_switch_cfg_path_;
-  std::string soc_version_ = "Ascend310";
-  std::string dynamic_batch_size_;
-  std::string dynamic_image_size_;
-  std::string buffer_optimize_mode_;
-  std::map<int, std::vector<int>> input_shape_map_;
-  // other options
-  uint32_t device_id_;
-  std::optional<bool> first_graph_flag_;
-  std::string om_file_path_;
-};
-}  // namespace mindspore
-#endif  // MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_MODEL_ACL_ACL_MODEL_OPTIONS_H_
--- a/mindspore/lite/src/extendrt/acl/acl_utils.h
+++ b/mindspore/lite/src/extendrt/acl/acl_utils.h
@ -1,45 +0,0 @@
-/**
- * Copyright 2021 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_ACL_UTILS_H_
-#define MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_ACL_UTILS_H_
-#include <string>
-#include "acl/acl_base.h"
-namespace mindspore {
-static inline bool IsAscend910Soc() {
-  const char *soc_name_c = aclrtGetSocName();
-  if (soc_name_c == nullptr) {
-    return false;
-  }
-  std::string soc_name(soc_name_c);
-  if (soc_name.find("910") == std::string::npos) {
-    return false;
-  }
-  return true;
-}
-
-static inline bool IsAscendNo910Soc() {
-  const char *soc_name_c = aclrtGetSocName();
-  if (soc_name_c == nullptr) {
-    return false;
-  }
-  std::string soc_name(soc_name_c);
-  if (soc_name.find("910") != std::string::npos) {
-    return false;
-  }
-  return true;
-}
-}  // namespace mindspore
-#endif  // MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_ACL_UTILS_H_
--- a/mindspore/lite/src/extendrt/acl/acl_vm/acl_multi_graph_session.cc
+++ b/mindspore/lite/src/extendrt/acl/acl_vm/acl_multi_graph_session.cc
@ -1,154 +0,0 @@
-/**
- * Copyright 2021 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "extendrt/acl/acl_vm/acl_multi_graph_session.h"
-#include <memory>
-#include <deque>
-#include <vector>
-#include "backend/common/session/session_factory.h"
-#include "backend/common/optimizer/optimizer.h"
-#include "plugin/device/ascend/optimizer/enhancer/add_placeholder_for_dynamic_rnn.h"
-#include "extendrt/acl/model_converter.h"
-#include "extendrt/acl/acl_model_options.h"
-#include "extendrt/acl/acl_vm/ms_tensor_ref.h"
-#include "extendrt/cxx_api/graph/graph_data.h"
-
-namespace mindspore::session {
-void MultiGraphAclSession::Init(uint32_t device_id) { InitExecutor(kDavinciMultiGraphInferenceDevice, device_id); }
-
-GraphId MultiGraphAclSession::CompileGraphImpl(const AnfNodePtrList &lst, const AnfNodePtrList &outputs) {
-  class FirstGraphModeGuard {
-   public:
-    explicit FirstGraphModeGuard(const std::shared_ptr<AclModelOptions> &options) : options_(options) {
-      if (options_ != nullptr) {
-        options_->SetFirstGraph(true);
-      }
-    }
-    ~FirstGraphModeGuard() {
-      if (options_ != nullptr) {
-        options_->SetFirstGraph(false);
-      }
-    }
-
-   private:
-    std::shared_ptr<AclModelOptions> options_;
-  };
-  MS_LOG(INFO) << "Start MultiGraph Compile.";
-  // construct kernel graph
-  auto kernel_graph = SessionBasic::ConstructKernelGraph(lst, outputs, device::DeviceType::kUnknown, false);
-  MS_EXCEPTION_IF_NULL(kernel_graph);
-  auto optimizer = std::make_shared<opt::GraphOptimizer>();
-  auto pm = std::make_shared<opt::PassManager>("310_multi_graph_pm");
-  pm->AddPass(std::make_shared<opt::InsertPlaceholderForDynamicRNN>());
-  optimizer->AddPassManager(pm);
-  (void)optimizer->Optimize(kernel_graph);
-  kernel_graph->SetExecOrderByDefault();
-  // concert to om data
-  ModelConverter model_converter_;
-  model_converter_.set_options(options_);
-  FirstGraphModeGuard guard(options_);
-  auto om_data = model_converter_.LoadMindIR(kernel_graph);
-  if (om_data.Data() == nullptr || om_data.DataSize() == 0) {
-    MS_LOG(ERROR) << "Load MindIR failed.";
-    return kMCFailed;
-  }
-  // load
-  std::shared_ptr<Graph> graph = std::make_shared<Graph>(std::make_shared<Graph::GraphData>(om_data, ModelType::kOM));
-  MS_EXCEPTION_IF_NULL(graph);
-  auto graph_cell = GraphCell(graph);
-  auto ret = graph_cell.Load(options_->GetDeviceID());
-  if (ret != kSuccess) {
-    MS_LOG(EXCEPTION) << "Load failed.";
-  }
-  graphs_[kernel_graph->graph_id()] = graph_cell;
-  kernel_graphs_[kernel_graph->graph_id()] = kernel_graph;
-  MS_LOG(INFO) << "Multi graph compile success, graph id " << kernel_graph->graph_id();
-  return kernel_graph->graph_id();
-}
-
-void MultiGraphAclSession::RunGraph(GraphId graph_id, const std::vector<MSTensor> &inputs, VectorRef *outputs) {
-  MS_EXCEPTION_IF_NULL(outputs);
-  MS_LOG(INFO) << "Start run graph " << graph_id;
-  auto iter = graphs_.find(graph_id);
-  if (iter == graphs_.end()) {
-    MS_LOG(EXCEPTION) << "Graph id " << graph_id << " not found.";
-  }
-  std::vector<MSTensor> out_tensors;
-  auto ret = iter->second.Run(inputs, &out_tensors);
-  if (ret != kSuccess) {
-    MS_LOG(EXCEPTION) << "Graph id " << graph_id << " run failed.";
-  }
-
-  std::deque<MSTensor> out_tensors_deque(out_tensors.begin(), out_tensors.end());
-  (*outputs) = ConstructOutputRef(graph_id, &out_tensors_deque);
-}
-
-VectorRef MultiGraphAclSession::ConstructOutputRef(GraphId graph_id, std::deque<MSTensor> *out_tensors) {
-  MS_EXCEPTION_IF_NULL(out_tensors);
-  VectorRef outs;
-  auto out_nodes = kernel_graphs_[graph_id]->outputs();
-  for (auto &out : out_nodes) {
-    auto item_with_index = common::AnfAlgo::VisitKernelWithReturnType(
-      out, 0, false, std::vector<PrimitivePtr>{prim::kPrimMakeTuple, prim::kPrimUpdateState, prim::kPrimStateSetItem});
-    auto &anf_node = item_with_index.first;
-    if (common::AnfAlgo::CheckPrimitiveType(anf_node, prim::kPrimMakeTuple)) {
-      auto cnode = anf_node->cast<CNodePtr>();
-      MS_EXCEPTION_IF_NULL(cnode);
-      outs.emplace_back(ConstructOutputRefByTupleNode(cnode, out_tensors));
-    } else if (AnfUtils::IsRealKernel(anf_node)) {
-      if (out_tensors->empty()) {
-        MS_LOG(EXCEPTION) << "Can not find MSTensor for output node " << out->DebugString()
-                          << ", visited: " << anf_node->DebugString();
-      }
-      outs.emplace_back(MSTensorRef(out_tensors->front()));
-      out_tensors->pop_front();
-    }
-  }
-
-  if (!out_tensors->empty()) {
-    MS_LOG(EXCEPTION) << "Number of output size " << outs.size() << " but " << out_tensors->size()
-                      << " MSTensor remained.";
-  }
-
-  return outs;
-}
-
-VectorRef MultiGraphAclSession::ConstructOutputRefByTupleNode(const CNodePtr &tuple_node,
-                                                              std::deque<MSTensor> *out_tensors) {
-  MS_EXCEPTION_IF_NULL(out_tensors);
-  VectorRef outs;
-  for (size_t i = 1; i < tuple_node->inputs().size(); ++i) {
-    auto item_with_index = common::AnfAlgo::VisitKernelWithReturnType(
-      tuple_node->input(i), 0, false,
-      std::vector<PrimitivePtr>{prim::kPrimMakeTuple, prim::kPrimUpdateState, prim::kPrimStateSetItem});
-    auto &anf_node = item_with_index.first;
-    if (common::AnfAlgo::CheckPrimitiveType(anf_node, prim::kPrimMakeTuple)) {
-      auto cnode = anf_node->cast<CNodePtr>();
-      MS_EXCEPTION_IF_NULL(cnode);
-      outs.emplace_back(ConstructOutputRefByTupleNode(cnode, out_tensors));
-    } else if (AnfUtils::IsRealKernel(anf_node)) {
-      if (out_tensors->empty()) {
-        MS_LOG(EXCEPTION) << "Can not find MSTensor for output node " << tuple_node->input(i)->DebugString()
-                          << ", visited: " << anf_node->DebugString();
-      }
-      outs.emplace_back(MSTensorRef(out_tensors->front()));
-      out_tensors->pop_front();
-    }
-  }
-
-  return outs;
-}
-MS_REG_SESSION(kDavinciMultiGraphInferenceDevice, MultiGraphAclSession);
-}  // namespace mindspore::session
--- a/mindspore/lite/src/extendrt/acl/acl_vm/acl_multi_graph_session.h
+++ b/mindspore/lite/src/extendrt/acl/acl_vm/acl_multi_graph_session.h
@ -1,49 +0,0 @@
-/**
- * Copyright 2021 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_MODEL_ACL_ACL_VM_ACL_MULTI_GRAPH_SESSION_H_
-#define MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_MODEL_ACL_ACL_VM_ACL_MULTI_GRAPH_SESSION_H_
-
-#include <deque>
-#include <vector>
-#include <map>
-#include <memory>
-#include "include/api/types.h"
-#include "include/api/cell.h"
-#include "backend/common/session/session_basic.h"
-
-namespace mindspore {
-class AclModelOptions;
-namespace session {
-class MultiGraphAclSession : public session::SessionBasic {
- public:
-  MultiGraphAclSession() = default;
-  ~MultiGraphAclSession() override = default;
-  void Init(uint32_t device_id) override;
-  GraphId CompileGraphImpl(const AnfNodePtrList &lst, const AnfNodePtrList &outputs) override;
-  void RunGraph(GraphId graph_id, const std::vector<MSTensor> &inputs, VectorRef *outputs);
-  void SetOptions(const std::shared_ptr<AclModelOptions> &options) { options_ = options; }
-
- private:
-  VectorRef ConstructOutputRef(GraphId graph_id, std::deque<MSTensor> *out_tensors);
-  VectorRef ConstructOutputRefByTupleNode(const CNodePtr &tuple_node, std::deque<MSTensor> *out_tensors);
-
-  std::map<GraphId, GraphCell> graphs_ = {};
-  std::map<GraphId, KernelGraphPtr> kernel_graphs_ = {};
-  std::shared_ptr<AclModelOptions> options_ = nullptr;
-};
-}  // namespace session
-}  // namespace mindspore
-#endif  // MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_MODEL_ACL_ACL_VM_ACL_MULTI_GRAPH_SESSION_H_
--- a/mindspore/lite/src/extendrt/acl/acl_vm/acl_vm.cc
+++ b/mindspore/lite/src/extendrt/acl/acl_vm/acl_vm.cc
@ -1,295 +0,0 @@
-/**
- * Copyright 2021 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "extendrt/acl/acl_vm/acl_vm.h"
-#include <memory>
-#include <string>
-#include <vector>
-#include "extendrt/acl/acl_model_options.h"
-#include "extendrt/acl/acl_vm/acl_multi_graph_session.h"
-#include "utils/trace_base.h"
-
-namespace mindspore {
-namespace {
-inline bool IsMonadNode(const AnfNodePtr &node) {
-  if (IsPrimitiveCNode(node, prim::kPrimStateSetItem) || IsPrimitiveCNode(node, prim::kPrimUpdateState)) {
-    return true;
-  }
-
-  if (HasAbstractMonad(node)) {
-    return true;
-  }
-
-  return false;
-}
-}  // namespace
-AclBackend::AclBackend(const std::string &name, const std::string &target,
-                       const std::shared_ptr<AclModelOptions> &options)
-    : MsBackend(name, target, options->GetDeviceID()) {
-  auto session = std::dynamic_pointer_cast<session::MultiGraphAclSession>(MsBackend::target_sess_);
-  MS_EXCEPTION_IF_NULL(session);
-  session->SetOptions(options);
-}
-
-VectorRef AclBackend::MsRunGraph(const GraphId &g, const VectorRef &args, const std::string &target) {
-  std::vector<MSTensor> inputs;
-  for (const auto &arg : args) {
-    if (!utils::isa<MSTensorRef>(arg)) {
-      MS_LOG(EXCEPTION) << "Invalid item " << arg.ToString();
-    }
-    auto wrapper = utils::cast<MSTensorRef>(arg);
-    inputs.emplace_back(wrapper.GetTensor());
-  }
-
-  VectorRef outputs;
-  MS_EXCEPTION_IF_NULL(target_sess_);
-  auto exec_sess = std::dynamic_pointer_cast<session::MultiGraphAclSession>(target_sess_);
-  MS_EXCEPTION_IF_NULL(exec_sess);
-  exec_sess->RunGraph(g, inputs, &outputs);
-  return outputs;
-}
-
-bool AclBackend::GetCond(const BaseRef &c, bool *value) {
-  MS_EXCEPTION_IF_NULL(value);
-  if (!utils::isa<MSTensorRef>(c)) {
-    MS_LOG(ERROR) << "Invalid item " << c.ToString() << " must be a MSTensorRef.";
-    return false;
-  }
-  auto wrapper = utils::cast<MSTensorRef>(c);
-  if (wrapper.GetTensor().DataType() != DataType::kNumberTypeBool) {
-    MS_LOG(ERROR) << "Invalid data type " << wrapper.GetTensor().DataType() << " must be bool.";
-    return false;
-  }
-  auto data = wrapper.GetTensor().Data();
-  if (data == nullptr) {
-    return false;
-  }
-  (*value) = *reinterpret_cast<const bool *>(data.get());
-  return true;
-}
-
-bool AclBackend::GetIndex(const BaseRef &c, int64_t *value) {
-  MS_EXCEPTION_IF_NULL(value);
-  if (!utils::isa<MSTensorRef>(c)) {
-    MS_LOG(ERROR) << "Invalid item " << c.ToString() << " must be a MSTensorRef.";
-    return false;
-  }
-
-  auto wrapper = utils::cast<MSTensorRef>(c);
-  if (wrapper.GetTensor().DataType() == DataType::kNumberTypeInt32) {
-    auto data = wrapper.GetTensor().Data();
-    if (data == nullptr) {
-      return false;
-    }
-    auto value_int32 = *reinterpret_cast<const int32_t *>(data.get());
-    (*value) = static_cast<int64_t>(value_int32);
-    return true;
-  } else if (wrapper.GetTensor().DataType() == DataType::kNumberTypeInt64) {
-    auto data = wrapper.GetTensor().Data();
-    if (data == nullptr) {
-      return false;
-    }
-    (*value) = *reinterpret_cast<const int64_t *>(data.get());
-    return true;
-  } else {
-    MS_LOG(ERROR) << "Index must be Int type.";
-    return false;
-  }
-}
-
-AclCompileGraph::AclCompileGraph(const std::shared_ptr<compile::MsBackend> &backend,
-                                 const std::vector<PrimitivePtr> &cut_list)
-    : CompileGraph(backend, cut_list) {}
-
-void AclCompileGraph::AddInst(const compile::Instruction &inst, const MSTensorRef &arg) {
-  VectorRef args;
-  args.push_back(arg);
-  compile::CompileGraph::AddInst(inst, args);
-}
-
-int64_t AclCompileGraph::Ref(const AnfNodePtr &node) {
-  MS_EXCEPTION_IF_NULL(node);
-  MS_LOG(DEBUG) << "Start Ref node " << node->DebugString(true) << " height_: " << height_;
-  if (slots_.count(node) == 0 && node->isa<ValueNode>()) {
-    if (IsValueNode<FuncGraph>(node)) {
-      MS_LOG(DEBUG) << "Push graph.";
-      compile::CompileGraph::AddInst(compile::Instruction::kGraph, GetValueNode(node));
-    } else {
-      MS_LOG(DEBUG) << "Push.";
-      if (IsValueNode<Primitive>(node)) {
-        MS_LOG(EXCEPTION) << "must not be primitive in here NodeInfo: " << trace::GetDebugInfo(node->debug_info());
-      } else if (IsValueNode<tensor::Tensor>(node)) {
-        auto tensor_node = std::dynamic_pointer_cast<tensor::Tensor>(node->cast<ValueNodePtr>()->value());
-        MS_EXCEPTION_IF_NULL(tensor_node);
-        std::string name = "";
-        std::vector<int64_t> shape = tensor_node->shape_c();
-        DataType type = static_cast<DataType>(tensor_node->data_type_c());
-        auto mstensor_node = MSTensor::CreateRefTensor(name, type, shape, tensor_node->data_c(), tensor_node->Size());
-        MSTensorRef mstensor_ref(*mstensor_node);
-        AddInst(compile::Instruction::kPush, mstensor_ref);
-        MSTensor::DestroyTensorPtr(mstensor_node);
-      } else {
-        compile::CompileGraph::AddInst(compile::Instruction::kPush, GetValueNode(node));
-      }
-    }
-    Push(node);
-  } else if (auto const_parameter = dyn_cast<Parameter>(node);
-             slots_.count(node) == 0 && const_parameter != nullptr && const_parameter->has_default()) {
-    auto value = const_parameter->default_param();
-    MS_EXCEPTION_IF_NULL(value);
-    if (value->isa<tensor::Tensor>()) {
-      auto tensor_node = std::dynamic_pointer_cast<tensor::Tensor>(value);
-      MS_EXCEPTION_IF_NULL(tensor_node);
-      std::vector<int64_t> shape = tensor_node->shape_c();
-      DataType type = static_cast<DataType>(tensor_node->data_type_c());
-      auto mstensor_node =
-        MSTensor::CreateRefTensor(const_parameter->name(), type, shape, tensor_node->data_c(), tensor_node->Size());
-      MSTensorRef mstensor_ref(*mstensor_node);
-      AddInst(compile::Instruction::kPush, mstensor_ref);
-      MSTensor::DestroyTensorPtr(mstensor_node);
-    } else {
-      compile::CompileGraph::AddInst(compile::Instruction::kPush, value);
-    }
-    Push(node);
-  }
-  MS_LOG(DEBUG) << "End Ref node end height_: " << height_ << ", slots: " << slots_[node]
-                << ", return: " << slots_[node] - height_;
-  return slots_[node] - height_;
-}
-
-void AclCompileGraph::AddExternal(const compile::LinConvertResult &result) {
-  VectorRef args;
-  args.push_back(result.run);
-  args.push_back(result.simu_run);
-  size_t size = result.inputs.size();
-  for (size_t i = 0; i < size; ++i) {
-    const auto &input = result.inputs[i];
-    MS_EXCEPTION_IF_NULL(input);
-    if (auto parameter = dyn_cast<Parameter>(input); parameter != nullptr && parameter->has_default()) {
-      MS_LOG(DEBUG) << parameter->DebugString() << " has default value, will not be pushed as inputs.";
-      continue;
-    }
-    if (IsMonadNode(input)) {
-      MS_LOG(DEBUG) << input->DebugString() << " is monad node, will not be pushed as inputs.";
-      continue;
-    }
-    args.emplace_back(Ref(input));
-  }
-  compile::CompileGraph::AddInst(compile::Instruction::kExternal, args);
-  size_t out_count = 0;
-  for (auto &out : result.outputs) {
-    if (IsMonadNode(out)) {
-      continue;
-    }
-    ++out_count;
-    Push(out);
-  }
-  MS_LOG(DEBUG) << "Args size " << args.size() << " out size " << out_count;
-}
-
-void AclCompileGraph::AddInput(const AnfNodePtr &node) {
-  MS_EXCEPTION_IF_NULL(node);
-  if (IsMonadNode(node)) {
-    return;
-  }
-  if (slots_.count(node) == 0) {
-    MS_LOG(DEBUG) << "Input node is null " << node->DebugString(true);
-    (void)Ref(node);
-    return;
-  }
-  compile::CompileGraph::AddInst(compile::Instruction::kInput, Ref(node));
-  set_height(height_ + 1);
-}
-
-void AclCompileGraph::AddPartial(const CNodePtr &node) {
-  MS_EXCEPTION_IF_NULL(node);
-  auto inputs = node->inputs();
-  VectorRef args;
-  if (inputs.size() <= 1) {
-    MS_LOG(EXCEPTION) << "The node:" << node->DebugString() << "do not have two input.";
-  }
-  auto fn = inputs[1];
-  if (!IsValueNode<FuncGraph>(fn)) {
-    MS_LOG(EXCEPTION) << "The type of 1st input of node must be FuncGraph";
-  }
-  for (size_t i = 1; i < inputs.size(); i++) {
-    if (IsMonadNode(inputs[i])) {
-      continue;
-    }
-    args.emplace_back(Ref(inputs[i]));
-  }
-  compile::CompileGraph::AddInst(compile::Instruction::kPartial, args);
-}
-
-int64_t AclCompileGraph::AddCall(const FuncGraphPtr &graph, const CNodePtr &node) {
-  MS_EXCEPTION_IF_NULL(graph);
-  MS_EXCEPTION_IF_NULL(node);
-  auto inputs = node->inputs();
-  AnfNodePtr fn = inputs[0];
-  (void)Ref(fn);
-  size_t size = inputs.size();
-  size_t non_monad_size = size;
-  for (size_t i = size - 1; i > 0; --i) {
-    if (IsMonadNode(inputs[i])) {
-      --non_monad_size;
-      continue;
-    }
-    AddInput(inputs[i]);
-  }
-  if (node == graph->output()) {
-    AddTailCall(fn, non_monad_size);
-    return RET_BREAK;
-  }
-  MS_LOG(DEBUG) << "Call:" << Ref(fn) << ", " << height_ << ", " << (non_monad_size - 1);
-  compile::CompileGraph::AddInst(compile::Instruction::kCall, Ref(fn));
-  Ret(static_cast<int64_t>(non_monad_size - 1));
-  for (size_t i = size - 1; i > 0; i--) {
-    const auto iter = slots_.find(inputs[i]);
-    if (iter != slots_.end() && iter->second >= height_) {
-      slots_.erase(inputs[i]);
-    }
-  }
-  return RET_SUCCESS;
-}
-
-void AclCompileGraph::PushParameters(const FuncGraphPtr &func_graph) {
-  MS_EXCEPTION_IF_NULL(func_graph);
-  std::vector<AnfNodePtr> parameters = func_graph->parameters();
-  for (size_t i = parameters.size(); i != 0; i--) {
-    MS_EXCEPTION_IF_NULL(parameters[i - 1]);
-    auto param = parameters[i - 1]->cast<ParameterPtr>();
-    MS_EXCEPTION_IF_NULL(param);
-    if (param->has_default()) {
-      MS_LOG(DEBUG) << "Parameter " << (i - 1) << ": " << param->DebugString() << " has default value, skip.";
-      continue;
-    }
-    if (IsMonadNode(param)) {
-      MS_LOG(DEBUG) << "Parameter " << (i - 1) << ": " << param->DebugString() << " has monad type, skip.";
-      continue;
-    }
-    Push(param);
-    MS_LOG(DEBUG) << "Push parameter " << (i - 1) << ": " << param->DebugString();
-  }
-}
-
-AclCompileGraphs::AclCompileGraphs(const std::shared_ptr<compile::MsBackend> &backend,
-                                   const std::vector<PrimitivePtr> &cut_list)
-    : CompileGraphs(backend, cut_list) {
-  MS_EXCEPTION_IF_NULL(backend);
-  MS_LOG(DEBUG) << "Start vm: " << backend->name();
-  transform_ = std::make_shared<AclCompileGraph>(backend, cut_list);
-  Reset();
-}
-}  // namespace mindspore
--- a/mindspore/lite/src/extendrt/acl/acl_vm/acl_vm.h
+++ b/mindspore/lite/src/extendrt/acl/acl_vm/acl_vm.h
@ -1,62 +0,0 @@
-/**
- * Copyright 2021 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_MODEL_ACL_ACL_VM_ACL_VM_H_
-#define MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_MODEL_ACL_ACL_VM_ACL_VM_H_
-
-#include <vector>
-#include <memory>
-#include <string>
-#include "backend/graph_compiler/transform.h"
-#include "backend/graph_compiler/backend.h"
-#include "extendrt/acl/acl_vm/ms_tensor_ref.h"
-
-namespace mindspore {
-class AclModelOptions;
-class AclBackend : public compile::MsBackend {
- public:
-  AclBackend(const std::string &name, const std::string &target, const std::shared_ptr<AclModelOptions> &options);
-  ~AclBackend() override = default;
-
-  VectorRef MsRunGraph(const GraphId &g, const VectorRef &args, const std::string &target) override;
-  bool GetCond(const BaseRef &c, bool *value) override;
-  bool GetIndex(const BaseRef &c, int64_t *value) override;
-};
-
-class AclCompileGraph : public compile::CompileGraph {
- public:
-  explicit AclCompileGraph(const std::shared_ptr<compile::MsBackend> &backend,
-                           const std::vector<PrimitivePtr> &cut_list);
-  ~AclCompileGraph() override = default;
-
-  int64_t Ref(const AnfNodePtr &node) override;
-  void AddExternal(const compile::LinConvertResult &result) override;
-  void AddInput(const AnfNodePtr &node) override;
-  void AddPartial(const CNodePtr &node) override;
-  int64_t AddCall(const FuncGraphPtr &graph, const CNodePtr &node) override;
-  void PushParameters(const FuncGraphPtr &func_graph) override;
-
- private:
-  void AddInst(const compile::Instruction &inst, const MSTensorRef &arg);
-};
-
-class AclCompileGraphs : public compile::CompileGraphs {
- public:
-  explicit AclCompileGraphs(const std::shared_ptr<compile::MsBackend> &backend,
-                            const std::vector<PrimitivePtr> &cut_list);
-  ~AclCompileGraphs() override = default;
-};
-}  // namespace mindspore
-#endif  // MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_MODEL_ACL_ACL_VM_ACL_VM_H_
--- a/mindspore/lite/src/extendrt/acl/acl_vm/ms_tensor_ref.cc
+++ b/mindspore/lite/src/extendrt/acl/acl_vm/ms_tensor_ref.cc
@ -1,78 +0,0 @@
-/**
- * Copyright 2021 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "extendrt/acl/acl_vm/ms_tensor_ref.h"
-#include <algorithm>
-
-namespace mindspore {
-VectorRef MSTensorRef::Convert(const std::vector<MSTensor> &tensors) {
-  VectorRef res;
-  (void)std::transform(tensors.begin(), tensors.end(), std::back_inserter(res),
-                       [](const MSTensor &t) { return MSTensorRef(t); });
-  return res;
-}
-
-std::vector<MSTensor> MSTensorRef::Convert(const BaseRef &args) {
-  std::vector<MSTensor> res;
-  if (utils::isa<VectorRef>(args)) {
-    VectorRef args_vec = utils::cast<VectorRef>(args);
-    res = ConvertTuple(args_vec);
-  } else if (utils::isa<MSTensorRef>(args)) {
-    auto wrapper = utils::cast<MSTensorRef>(args);
-    res.push_back(wrapper.ms_tensor_);
-  } else {
-    MS_LOG(EXCEPTION) << "Invalid BaseRef " << args.ToString() << " must be MSTensorRef or VectorRef{MSTensorRef...}";
-  }
-
-  return res;
-}
-
-std::shared_ptr<Base> MSTensorRef::copy() const {
-  MSTensor *tensor = ms_tensor_.Clone();
-  auto res = std::make_shared<MSTensorRef>(static_cast<const MSTensor &>(*tensor));
-  MSTensor::DestroyTensorPtr(tensor);
-  return res;
-}
-
-bool MSTensorRef::operator==(const BaseRef &other) const {
-  if (!utils::isa<MSTensorRef>(other)) {
-    return false;
-  }
-  auto other_ms_tensor = utils::cast<MSTensorRef>(other).ms_tensor_;
-  auto this_ms_tensor = ms_tensor_;
-  return (this_ms_tensor.Name() == other_ms_tensor.Name()) && (this_ms_tensor.Shape() == other_ms_tensor.Shape()) &&
-         (this_ms_tensor.MutableData() == other_ms_tensor.MutableData()) &&
-         (this_ms_tensor.DataSize() == other_ms_tensor.DataSize()) &&
-         (this_ms_tensor.DataType() == other_ms_tensor.DataType());
-}
-
-std::vector<MSTensor> MSTensorRef::ConvertTuple(const VectorRef &args) {
-  std::vector<MSTensor> outs;
-  for (size_t i = 0; i < args.size(); ++i) {
-    const auto &item = args[i];
-    if (utils::isa<VectorRef>(item)) {
-      VectorRef args_vec = utils::cast<VectorRef>(args);
-      auto ret = ConvertTuple(args_vec);
-      (void)outs.insert(outs.end(), ret.begin(), ret.end());
-    } else if (utils::isa<MSTensorRef>(item)) {
-      auto wrapper = utils::cast<MSTensorRef>(item);
-      outs.push_back(wrapper.ms_tensor_);
-    } else {
-      MS_LOG(EXCEPTION) << "Invalid BaseRef " << args.ToString() << " must be MSTensorRef or VectorRef{MSTensorRef...}";
-    }
-  }
-  return outs;
-}
-}  // namespace mindspore
--- a/mindspore/lite/src/extendrt/acl/acl_vm/ms_tensor_ref.h
+++ b/mindspore/lite/src/extendrt/acl/acl_vm/ms_tensor_ref.h
@ -1,49 +0,0 @@
-/**
- * Copyright 2021 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_MODEL_ACL_ACL_VM_MS_TENSOR_REF_H_
-#define MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_MODEL_ACL_ACL_VM_MS_TENSOR_REF_H_
-
-#include <memory>
-#include <string>
-#include <vector>
-#include "include/api/types.h"
-#include "mindspore/core/base/base_ref.h"
-
-namespace mindspore {
-class MSTensorRef : public BaseRef {
- public:
-  MS_DECLARE_PARENT(MSTensorRef, BaseRef);
-
-  static VectorRef Convert(const std::vector<MSTensor> &tensors);
-  static std::vector<MSTensor> Convert(const BaseRef &args);
-
-  explicit MSTensorRef(const MSTensor &tensor) : ms_tensor_(tensor) {}
-  ~MSTensorRef() override = default;
-
-  const MSTensor &GetTensor() const { return ms_tensor_; }
-  std::shared_ptr<Base> copy() const override;
-
-  uint32_t type() const override { return tid(); }
-  std::string ToString() const override { return ms_tensor_.Name(); }
-  bool operator==(const BaseRef &other) const override;
-
- private:
-  static std::vector<MSTensor> ConvertTuple(const VectorRef &args);
-
-  MSTensor ms_tensor_;
-};
-}  // namespace mindspore
-#endif  // MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_MODEL_ACL_ACL_VM_MS_TENSOR_REF_H_
--- a/mindspore/lite/src/extendrt/acl/model_converter.cc
+++ b/mindspore/lite/src/extendrt/acl/model_converter.cc
@ -1,256 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "extendrt/acl/model_converter.h"
-#include <memory>
-#include "include/transform/graph_ir/convert.h"
-#include "include/transform/graph_ir/graph_runner.h"
-#include "mindspore/core/utils/ms_context.h"
-#include "include/api/serialization.h"
-#include "graph/model.h"
-#include "extendrt/acl/model_converter_utils/multi_process.h"
-
-namespace mindspore {
-namespace {
-transform::TensorOrderMap GetParams(const FuncGraphPtr &anf_graph) {
-  transform::TensorOrderMap res;
-  for (auto &anf_node : anf_graph->parameters()) {
-    MS_EXCEPTION_IF_NULL(anf_node);
-    auto para = anf_node->cast<ParameterPtr>();
-    MS_EXCEPTION_IF_NULL(para);
-    if (para->has_default()) {
-      auto value = para->default_param();
-      MS_EXCEPTION_IF_NULL(value);
-      auto tensor = value->cast<std::shared_ptr<tensor::Tensor>>();
-      res.emplace(para->name(), tensor);
-      MS_LOG(INFO) << "Parameter " << para->name() << " has default value.";
-    }
-  }
-  return res;
-}
-
-bool CreateSessionAndGraphRunner() {
-  std::shared_ptr<ge::Session> sess = transform::DfGraphManager::GetInstance().GetGeSession();
-  if (sess == nullptr) {
-    transform::SessionOptions options;
-    options["ge.trainFlag"] = "0";
-    options["ge.enablePrintOpPass"] = "0";
-    sess = transform::GraphRunner::NewSession(options);
-    transform::DfGraphManager::GetInstance().SetGeSession(sess);
-  }
-
-  transform::GraphRunnerOptions options;
-  options.sess_ptr = sess;
-  auto graph_runner = std::make_shared<transform::GraphRunner>(options);
-  if (graph_runner == nullptr) {
-    MS_LOG(ERROR) << "Create new graph runner failed";
-    return false;
-  } else {
-    transform::DfGraphManager::GetInstance().SetGraphRunner(graph_runner);
-  }
-
-  return true;
-}
-}  // namespace
-
-transform::DfGraphPtr ModelConverter::ConvertFuncGraphToAIR(const FuncGraphPtr &anf_graph) {
-  MS_EXCEPTION_IF_NULL(anf_graph);
-  transform::DfGraphConvertor converter(anf_graph);
-  std::string net_id = "0";
-  std::string init_graph = "init_subgraph." + net_id;
-  std::string checkpoint_name = "save." + net_id;
-
-  converter.set_training(false);
-  (void)converter.ConvertAllNode().InitParam(GetParams(anf_graph)).BuildGraph();
-  (void)converter.GenerateCheckpointGraph();
-  if (converter.ErrCode() != 0) {
-    transform::DfGraphManager::GetInstance().ClearGraph();
-    MS_LOG(ERROR) << "Convert df graph failed, err:" << converter.ErrCode();
-    return nullptr;
-  }
-  (void)transform::DfGraphManager::GetInstance().AddGraph(anf_graph->ToString(), converter.GetComputeGraph());
-  (void)transform::DfGraphManager::GetInstance().AddGraph(init_graph, converter.GetInitGraph());
-  (void)transform::DfGraphManager::GetInstance().AddGraph(BROADCAST_GRAPH_NAME, converter.GetBroadcastGraph());
-
-  transform::Status ret =
-    transform::DfGraphManager::GetInstance().AddGraph(checkpoint_name, converter.GetSaveCheckpointGraph());
-  if (ret == transform::Status::SUCCESS) {
-    transform::DfGraphManager::GetInstance().SetAnfGraph(checkpoint_name, anf_graph);
-  }
-
-  (void)setenv("GE_TRAIN", "0", 1);
-
-  if (!CreateSessionAndGraphRunner()) {
-    MS_LOG(ERROR) << "Create GE Session or GraphRunner failed.";
-    return nullptr;
-  }
-
-  auto wrap_ptr = transform::DfGraphManager::GetInstance().GetGraphByName(anf_graph->ToString());
-  if (wrap_ptr == nullptr) {
-    MS_LOG(ERROR) << "Get graph form DfGraphManager failed!";
-    return nullptr;
-  }
-  transform::DfGraphPtr &ge_graph = wrap_ptr->graph_ptr_;
-  if (ge_graph == nullptr) {
-    MS_LOG(ERROR) << "The export graph is null";
-    return nullptr;
-  }
-
-  return ge_graph;
-}
-
-Buffer ModelConverter::BuildAirModel(const transform::DfGraphPtr &graph,
-                                     const std::map<std::string, std::string> &init_options,
-                                     const std::map<std::string, std::string> &build_options) {
-  ge::ModelBufferData model;
-  auto ret = ge::aclgrphBuildInitialize(init_options);
-  if (ret != ge::SUCCESS) {
-    MS_LOG(ERROR) << "Call aclgrphBuildInitialize fail.";
-    return Buffer();
-  }
-
-  ret = ge::aclgrphBuildModel(*graph, build_options, model);
-  if (ret != ge::SUCCESS) {
-    MS_LOG(ERROR) << "Call aclgrphBuildModel fail.";
-    return Buffer();
-  }
-
-  if (SaveModel(model) != kSuccess) {
-    MS_LOG(ERROR) << "Save model failed.";
-    return Buffer();
-  }
-
-  ge::aclgrphBuildFinalize();
-  return Buffer(model.data.get(), model.length);
-}
-
-Status ModelConverter::SaveModel(const ge::ModelBufferData &model) {
-#ifdef BUILD_LITE
-  std::string file_path;
-  auto option = options_.lock();
-  if (option != nullptr) {
-    file_path = option->GetOmFilePath();
-  }
-  if (file_path.empty()) {
-    MS_LOG(INFO) << "File path is empty, there is no need to save model";
-    return kSuccess;
-  }
-  MS_LOG(INFO) << "Om file path: " << file_path;
-  auto ret = ge::aclgrphSaveModel(file_path, model);
-  if (ret != ge::SUCCESS) {
-    MS_LOG(ERROR) << "Call aclgrphSaveModel fail.";
-    return kMCFailed;
-  }
-#endif
-  return kSuccess;
-}
-
-Buffer ModelConverter::LoadMindIR(const FuncGraphPtr &func_graph) {
-  MultiProcess multi_process;
-  Buffer buffer_ret;
-  auto parent_process = [&func_graph, &buffer_ret, this](MultiProcess *multi_process) -> Status {
-    MS_EXCEPTION_IF_NULL(multi_process);
-    auto df_graph = ConvertFuncGraphToAIR(func_graph);
-    if (df_graph == nullptr) {
-      MS_LOG(ERROR) << "Convert FuncGraph to AscendIR failed.";
-      return kMCFailed;
-    }
-    ge::Model model;
-    ge::Buffer model_data;
-    model.SetGraph(*df_graph);
-    auto ge_ret = model.Save(model_data);
-    if (ge_ret != ge::SUCCESS) {
-      MS_LOG(ERROR) << "Save ge model to buffer failed.";
-      return kMCFailed;
-    }
-
-    // send original model to child
-    auto status = multi_process->SendMsg(model_data.data(), model_data.size());
-    if (status != kSuccess) {
-      MS_LOG_ERROR << "Send original model to child process failed";
-      return status;
-    }
-    // receive convert model result from child
-    CreateBufferCall call = [&buffer_ret](size_t msg_len) -> uint8_t * {
-      (void)buffer_ret.ResizeData(msg_len);
-      return reinterpret_cast<uint8_t *>(buffer_ret.MutableData());
-    };
-    status = multi_process->ReceiveMsg(call);
-    if (status != kSuccess) {
-      MS_LOG_ERROR << "Receive result model from child process failed";
-      return status;
-    }
-    return kSuccess;
-  };
-  auto child_process = [this](MultiProcess *multi_process) -> Status {
-    MS_EXCEPTION_IF_NULL(multi_process);
-    // receive original model from parent
-    Buffer model;
-    CreateBufferCall call = [&model](size_t msg_len) -> uint8_t * {
-      (void)model.ResizeData(msg_len);
-      return reinterpret_cast<uint8_t *>(model.MutableData());
-    };
-    auto status = multi_process->ReceiveMsg(call);
-    if (status != kSuccess) {
-      MS_LOG_ERROR << "Receive original model from parent process failed";
-      return status;
-    }
-    Buffer model_result = LoadAscendIRInner(model);
-    if (model_result.DataSize() == 0) {
-      MS_LOG_ERROR << "Convert model from MindIR to OM failed";
-      return kMCFailed;
-    }
-    // send result model to parent
-    status = multi_process->SendMsg(model_result.Data(), model_result.DataSize());
-    if (status != kSuccess) {
-      MS_LOG_ERROR << "Send result model to parent process failed";
-      return status;
-    }
-    return kSuccess;
-  };
-  auto status = multi_process.MainProcess(parent_process, child_process);
-  if (status != kSuccess) {
-    MS_LOG_ERROR << "Convert MindIR model to OM model failed";
-  } else {
-    MS_LOG_INFO << "Convert MindIR model to OM model success";
-  }
-  return buffer_ret;
-}
-
-Buffer ModelConverter::LoadAscendIRInner(const Buffer &model_data) {
-  ge::Model load_model = ge::Model("loadmodel", "version2");
-  ge::Status ret =
-    ge::Model::Load(reinterpret_cast<const uint8_t *>(model_data.Data()), model_data.DataSize(), load_model);
-  if (ret != ge::GRAPH_SUCCESS) {
-    MS_LOG(ERROR) << "Load AscendIR failed, ret = " << ret;
-    return Buffer();
-  }
-
-  transform::DfGraphPtr df_graph = std::make_shared<transform::DfGraph>(load_model.GetGraph());
-  if (df_graph == nullptr) {
-    MS_LOG(ERROR) << "Convert FuncGraph to AscendIR failed.";
-    return Buffer();
-  }
-
-  std::map<std::string, std::string> init_options;
-  std::map<std::string, std::string> build_options;
-  auto option = options_.lock();
-  if (option != nullptr) {
-    std::tie(init_options, build_options) = option->GenAclOptions();
-  }
-
-  return BuildAirModel(df_graph, init_options, build_options);
-}
-}  // namespace mindspore
--- a/mindspore/lite/src/extendrt/acl/model_converter.h
+++ b/mindspore/lite/src/extendrt/acl/model_converter.h
@ -1,50 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_MODEL_ACL_MODEL_CONVERTER_H_
-#define MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_MODEL_ACL_MODEL_CONVERTER_H_
-#include <vector>
-#include <string>
-#include <map>
-#include <memory>
-#include "include/api/types.h"
-#include "include/api/status.h"
-#include "mindspore/core/ir/func_graph.h"
-#include "include/transform/graph_ir/types.h"
-#include "external/ge/ge_ir_build.h"
-#include "extendrt/acl/acl_model_options.h"
-
-namespace mindspore {
-class MS_API ModelConverter {
- public:
-  ModelConverter() : options_() {}
-  ~ModelConverter() = default;
-
-  Buffer LoadMindIR(const FuncGraphPtr &func_graph);
-
-  void set_options(const std::weak_ptr<AclModelOptions> &options) { options_ = options; }
-
-  Status SaveModel(const ge::ModelBufferData &model);
-
- private:
-  transform::DfGraphPtr ConvertFuncGraphToAIR(const FuncGraphPtr &anf_graph);
-  Buffer BuildAirModel(const transform::DfGraphPtr &graph, const std::map<std::string, std::string> &init_options,
-                       const std::map<std::string, std::string> &build_options);
-  Buffer LoadAscendIRInner(const Buffer &model_data);
-
-  std::weak_ptr<AclModelOptions> options_;
-};
-}  // namespace mindspore
-#endif  // MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_MODEL_ACL_MODEL_CONVERTER_H_
--- a/mindspore/lite/src/extendrt/acl/model_converter_utils/multi_process.cc
+++ b/mindspore/lite/src/extendrt/acl/model_converter_utils/multi_process.cc
@ -1,246 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "extendrt/acl/model_converter_utils/multi_process.h"
-#include <unistd.h>
-#include <sys/wait.h>
-#include <algorithm>
-#include <vector>
-#include <thread>
-#include "mindspore/core/utils/log_adapter.h"
-#include "extendrt/acl/model_converter_utils/shared_memory.h"
-
-namespace mindspore {
-namespace {
-constexpr uint64_t kSharedMemorySize = 100ull << 20;  // 100 MB
-constexpr timespec kOneMillisecond = {
-  0,                  // 0 seconds
-  1 * 1000L * 1000L,  // And 1 ms
-};
-
-constexpr timespec kOneHundredMilliseconds = {
-  0,                    // 0 seconds
-  100 * 1000L * 1000L,  // And 100 ms
-};
-}  // namespace
-
-MultiProcess::MultiProcess() = default;
-
-MultiProcess::~MultiProcess() = default;
-
-Status MultiProcess::MainProcess(const ProcessFuncCall &parent_process, const ProcessFuncCall &child_process) {
-  MS_EXCEPTION_IF_NULL(parent_process);
-  MS_EXCEPTION_IF_NULL(child_process);
-  Status ret;
-  memory_size_ = kSharedMemorySize;  // 100 MB
-  SharedMemory shared_memory;
-  ret = shared_memory.Create(memory_size_);
-  if (ret != kSuccess) {
-    MS_LOG_ERROR << "Create shared memory failed";
-    return ret;
-  }
-  pid_t pid = fork();
-  if (pid < 0) {
-    shared_memory.Destroy();
-    MS_LOG_ERROR << "Fork process to convert model failed";
-    return kMEFailed;
-  }
-  ret = shared_memory.Attach();
-  if (ret != kSuccess) {
-    MS_LOG_ERROR << "Process attach shared memory failed, pid " << pid;
-    return ret;
-  }
-  shmat_addr_ = shared_memory.GetSharedMemoryAddr();
-  if (shmat_addr_ == nullptr) {
-    MS_LOG_ERROR << "Get shared memory failed";
-    return ret;
-  }
-  constexpr size_t kMsgStructNum = 2;
-  shmat_data_addr_ = shmat_addr_ + sizeof(MessageFlag) * kMsgStructNum;
-  shmat_data_max_size_ =
-    memory_size_ - (reinterpret_cast<uintptr_t>(shmat_data_addr_) - reinterpret_cast<uintptr_t>(shmat_addr_));
-  MS_LOG_INFO << "Shm addr " << (uintptr_t)shmat_addr_;
-  if (pid == 0) {
-    ChildProcess(child_process);
-    shared_memory.Detach();
-    MS_LOG_INFO << "Model converter: child process sleep waiting for exit signal.";
-    while (1) {
-      // waiting for signal
-    }
-  } else {  // parent process
-    ret = ParentProcess(parent_process);
-    shared_memory.Detach();
-
-    MS_LOG_INFO << "Model converter: parent process kills child of fork.";
-    (void)kill(pid, SIGKILL);
-    constexpr uint32_t kMaxLoopCount = 5;
-    bool child_exited = false;
-    for (uint32_t i = 0; i < kMaxLoopCount; ++i) {
-      int status;
-      if (waitpid(pid, &status, WNOHANG) == pid) {
-        MS_LOG(INFO) << "Child process " << pid << " exits success.";
-        child_exited = true;
-        break;
-      }
-      (void)sleep(1);
-    }
-    if (!child_exited) {
-      MS_LOG(WARNING) << "Child process " << pid << " has been killed but waitpid failed.";
-    }
-    shared_memory.Destroy();
-  }
-  return ret;
-}
-
-Status MultiProcess::ParentProcess(const ProcessFuncCall &parent_process) {
-  auto parent_msg = reinterpret_cast<MessageFlag *>(shmat_addr_);
-  auto child_msg = reinterpret_cast<MessageFlag *>(shmat_addr_ + sizeof(MessageFlag));
-  send_msg_ = parent_msg;
-  receive_msg_ = child_msg;
-  std::thread heartbeat_thread(MultiProcess::HeartbeatThreadFunc, this);
-  Status ret;
-  try {
-    ret = parent_process(this);
-    if (ret != kSuccess) {
-      MS_LOG_ERROR << "Parent process process failed";
-    }
-  } catch (const std::runtime_error &ex) {
-    MS_LOG_ERROR << "Catch parent process runtime error: " << ex.what();
-    ret = kMEFailed;
-  }
-  stopped_ = true;
-  send_msg_->stop = 1;
-  heartbeat_thread.join();
-  return ret;
-}
-
-void MultiProcess::ChildProcess(const ProcessFuncCall &child_process) {
-  auto parent_msg = reinterpret_cast<MessageFlag *>(shmat_addr_);
-  auto child_msg = reinterpret_cast<MessageFlag *>(shmat_addr_ + sizeof(MessageFlag));
-  send_msg_ = child_msg;
-  receive_msg_ = parent_msg;
-  std::thread heartbeat_thread(MultiProcess::HeartbeatThreadFunc, this);
-  try {
-    MS_EXCEPTION_IF_NULL(child_process);
-    auto ret = child_process(this);
-    if (ret != kSuccess) {
-      MS_LOG_ERROR << "Child process process failed";
-    }
-  } catch (const std::runtime_error &ex) {
-    MS_LOG_ERROR << "Catch child process runtime error: " << ex.what();
-  }
-  stopped_ = true;
-  send_msg_->stop = 1;
-  heartbeat_thread.join();
-}
-
-Status MultiProcess::SendMsg(const void *buffer, uint64_t msg_len) {
-  MS_EXCEPTION_IF_NULL(buffer);
-  MS_LOG_INFO << "Start to send message to peer process, msg len " << msg_len;
-  send_msg_->msg_total_len = msg_len;
-  uint64_t cur_offset = 0;
-  while (msg_len > cur_offset) {
-    uint64_t sub_msg_len = std::min(msg_len - cur_offset, shmat_data_max_size_);
-    if (sub_msg_len == 0) {
-      MS_LOG(ERROR) << "Invalid message len " << sub_msg_len;
-      return kMEFailed;
-    }
-    auto ret =
-      memcpy_s(shmat_data_addr_, shmat_data_max_size_, static_cast<const uint8_t *>(buffer) + cur_offset, sub_msg_len);
-    if (ret != EOK) {
-      MS_LOG(ERROR) << "memcpy_s failed, ret = " << ret;
-      return kMEFailed;
-    }
-    cur_offset += sub_msg_len;
-
-    send_msg_->msg_len = sub_msg_len;
-    send_msg_->read_finish_flag = 0;
-    send_msg_->read_ready_flag = 1;
-    MS_LOG_INFO << "Send start " << cur_offset << ", msg len " << sub_msg_len << ", total len " << msg_len;
-    while (!send_msg_->read_finish_flag && !peer_stopped_) {
-      (void)nanosleep(&kOneMillisecond, nullptr);  // 1ms
-    }
-    if (peer_stopped_) {
-      if (!send_msg_->read_finish_flag) {
-        return kMEFailed;
-      }
-      break;
-    }
-    MS_LOG_INFO << "Send end " << cur_offset << ", msg len " << sub_msg_len << ", total len " << msg_len;
-  }
-  MS_LOG_INFO << "End to send message to peer process, msg len " << msg_len;
-  return kSuccess;
-}
-
-Status MultiProcess::ReceiveMsg(const CreateBufferCall &create_buffer_call) {
-  uint64_t cur_offset = 0;
-  uint8_t *msg_buffer = nullptr;
-  uint64_t msg_len = 0;
-  do {
-    MS_LOG_INFO << "Receive start from " << cur_offset;
-    while (!receive_msg_->read_ready_flag && !peer_stopped_) {
-      (void)nanosleep(&kOneMillisecond, nullptr);  // 1ms
-    }
-    if (peer_stopped_) {
-      return kMEFailed;
-    }
-    if (msg_buffer == nullptr) {
-      msg_len = receive_msg_->msg_total_len;
-      msg_buffer = create_buffer_call(msg_len);
-    }
-    MS_EXCEPTION_IF_NULL(msg_buffer);
-    auto ret = memcpy_s(msg_buffer + cur_offset, msg_len - cur_offset, shmat_data_addr_, receive_msg_->msg_len);
-    if (ret != EOK) {
-      MS_LOG(INFO) << "memcpy_s failed, ret = " << ret;
-      return kMEFailed;
-    }
-    cur_offset += receive_msg_->msg_len;
-    receive_msg_->read_ready_flag = 0;
-    receive_msg_->read_finish_flag = 1;
-    MS_LOG_INFO << "Receive end, current length " << cur_offset << ", total length " << msg_len << std::endl;
-  } while (msg_len > cur_offset);
-  return kSuccess;
-}
-
-void MultiProcess::HeartbeatThreadFunc(MultiProcess *multi_process) { multi_process->HeartbeatThreadFuncInner(); }
-
-void MultiProcess::HeartbeatThreadFuncInner() {
-  constexpr uint64_t kOvertime = 1024;
-  uint64_t last_beat_cnt = 0;
-  uint64_t repeat_cnt = 0;
-  while (!stopped_) {
-    if (receive_msg_->stop) {
-      peer_stopped_ = true;
-      MS_LOG_WARNING << "Peer stopped";
-      break;
-    }
-    uint64_t heartbeat_gap = receive_msg_->heartbeat - last_beat_cnt;
-    if (heartbeat_gap > 0 && heartbeat_gap < kOvertime) {
-      last_beat_cnt = receive_msg_->heartbeat;
-      repeat_cnt = 0;
-    } else {
-      repeat_cnt++;
-      if (repeat_cnt > 30) {  // 30*100ms = 3s no reply
-        peer_stopped_ = true;
-        MS_LOG_WARNING << "Peer stopped";
-        break;
-      }
-    }
-    send_msg_->heartbeat += 1;
-    (void)nanosleep(&kOneHundredMilliseconds, nullptr);  // sleep 100 ms
-  }
-}
-}  // namespace mindspore
--- a/mindspore/lite/src/extendrt/acl/model_converter_utils/multi_process.h
+++ b/mindspore/lite/src/extendrt/acl/model_converter_utils/multi_process.h
@ -1,62 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_MODEL_MODEL_CONVERTER_UTILS_MULTI_PROCESS_H_
-#define MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_MODEL_MODEL_CONVERTER_UTILS_MULTI_PROCESS_H_
-#include <iostream>
-#include <functional>
-#include "include/api/status.h"
-
-namespace mindspore {
-struct MessageFlag {
-  uint64_t heartbeat = 0;
-  uint64_t stop = false;
-  uint64_t msg_len = 0;
-  uint64_t msg_total_len = 0;
-  uint64_t read_ready_flag = false;
-  uint64_t read_finish_flag = false;
-};
-
-class MultiProcess;
-using ProcessFuncCall = std::function<Status(MultiProcess *multi_process)>;
-using CreateBufferCall = std::function<uint8_t *(size_t msg_len)>;
-
-class MultiProcess {
- public:
-  MultiProcess();
-  ~MultiProcess();
-
-  Status MainProcess(const ProcessFuncCall &parent_process, const ProcessFuncCall &child_process);
-  Status SendMsg(const void *buffer, uint64_t msg_len);
-  Status ReceiveMsg(const CreateBufferCall &create_buffer_call);
-
- private:
-  uint8_t *shmat_addr_ = nullptr;
-  uint8_t *shmat_data_addr_ = nullptr;
-  uint64_t shmat_data_max_size_ = 0;
-  uint64_t memory_size_ = 0;
-
-  bool peer_stopped_ = false;
-  bool stopped_ = false;
-  MessageFlag *send_msg_ = nullptr;
-  MessageFlag *receive_msg_ = nullptr;
-
-  static void HeartbeatThreadFunc(MultiProcess *multi_process);
-  void HeartbeatThreadFuncInner();
-  Status ParentProcess(const ProcessFuncCall &parent_process);
-  void ChildProcess(const ProcessFuncCall &child_process);
-};
-}  // namespace mindspore
-#endif  // MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_MODEL_MODEL_CONVERTER_UTILS_MULTI_PROCESS_H_
--- a/mindspore/lite/src/extendrt/acl/model_converter_utils/shared_memory.cc
+++ b/mindspore/lite/src/extendrt/acl/model_converter_utils/shared_memory.cc
@ -1,65 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "extendrt/acl/model_converter_utils/shared_memory.h"
-#include <sys/shm.h>
-#include <sys/stat.h>
-#include <string>
-#include "mindspore/core/utils/log_adapter.h"
-
-namespace mindspore {
-Status SharedMemory::Create(uint64_t memory_size) {
-  auto access_mode = S_IRUSR | S_IWUSR | S_IROTH | S_IWOTH | S_IRGRP | S_IWGRP;
-  shm_id_ = shmget(IPC_PRIVATE, memory_size, IPC_CREAT | IPC_EXCL | access_mode);
-  if (shm_id_ == -1) {
-    MS_LOG_ERROR << "Shared memory creation failed. Errno " + std::to_string(errno);
-    return kMCFailed;
-  }
-  MS_LOG_INFO << "shmget success, shm id " << shm_id_;
-  return kSuccess;
-}
-
-Status SharedMemory::Attach() {
-  void *shmat_addr = shmat(shm_id_, nullptr, 0);
-  if (shmat_addr == reinterpret_cast<void *>(-1)) {
-    MS_LOG_ERROR << "Shared memory attach failed. Errno " + std::to_string(errno);
-    return kMCFailed;
-  }
-  shmat_addr_ = reinterpret_cast<uint8_t *>(shmat_addr);
-  return kSuccess;
-}
-
-void SharedMemory::Detach() {
-  if (shmat_addr_ != nullptr) {
-    auto err = shmdt(shmat_addr_);
-    if (err == -1) {
-      MS_LOG_ERROR << "Shared memory detach failed. Errno " + std::to_string(errno);
-      return;
-    }
-  }
-  shmat_addr_ = nullptr;
-}
-
-void SharedMemory::Destroy() {
-  // Remove the shared memory and never mind about the return code.
-  auto err = shmctl(shm_id_, IPC_RMID, nullptr);
-  if (err == -1) {
-    std::string errMsg = "Unable to remove shared memory with id " + std::to_string(shm_id_);
-    errMsg += ". Errno :" + std::to_string(errno);
-    errMsg += "\nPlesae remove it manually using ipcrm -m command";
-    MS_LOG_ERROR << errMsg;
-  }
-}
-}  // namespace mindspore
--- a/mindspore/lite/src/extendrt/acl/model_converter_utils/shared_memory.h
+++ b/mindspore/lite/src/extendrt/acl/model_converter_utils/shared_memory.h
@ -1,37 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_MODEL_MODEL_CONVERTER_UTILS_SHARED_MEMORY_H_
-#define MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_MODEL_MODEL_CONVERTER_UTILS_SHARED_MEMORY_H_
-#include <iostream>
-#include "include/api/status.h"
-
-namespace mindspore {
-class SharedMemory {
- public:
-  Status Create(uint64_t memory_size);
-  Status Attach();
-  void Detach();
-  void Destroy();
-
- private:
-  friend class MultiProcess;
-  uint8_t *GetSharedMemoryAddr() { return shmat_addr_; }
-
-  int shm_id_ = -1;
-  uint8_t *shmat_addr_ = nullptr;
-};
-}  // namespace mindspore
-#endif  // MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_MODEL_MODEL_CONVERTER_UTILS_SHARED_MEMORY_H_
--- a/mindspore/lite/src/extendrt/acl/model_process.cc
+++ b/mindspore/lite/src/extendrt/acl/model_process.cc
@ -1,531 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "extendrt/acl/model_process.h"
-#include <sys/time.h>
-#include <algorithm>
-#include <map>
-#include "include/common/utils/utils.h"
-#include "mindspore/core/utils/convert_utils_base.h"
-
-namespace mindspore {
-static DataType TransToApiType(aclDataType data_type) {
-  static const std::map<aclDataType, enum DataType> data_type_map = {
-    {ACL_FLOAT16, DataType::kNumberTypeFloat16}, {ACL_FLOAT, DataType::kNumberTypeFloat32},
-    {ACL_DOUBLE, DataType::kNumberTypeFloat64},  {ACL_INT8, DataType::kNumberTypeInt8},
-    {ACL_INT16, DataType::kNumberTypeInt16},     {ACL_INT32, DataType::kNumberTypeInt32},
-    {ACL_INT64, DataType::kNumberTypeInt64},     {ACL_UINT8, DataType::kNumberTypeUInt8},
-    {ACL_UINT16, DataType::kNumberTypeUInt16},   {ACL_UINT32, DataType::kNumberTypeUInt32},
-    {ACL_UINT64, DataType::kNumberTypeUInt64},   {ACL_BOOL, DataType::kNumberTypeBool},
-  };
-  auto it = data_type_map.find(data_type);
-  if (it == data_type_map.end()) {
-    return DataType::kTypeUnknown;
-  } else {
-    return it->second;
-  }
-}
-
-template <class T>
-inline static void ClearIfNotNull(T *vec) {
-  if (vec != nullptr) {
-    vec->clear();
-  }
-}
-
-template <class T, class U = std::vector<T>>
-inline static void PushbackIfNotNull(U *vec, T &&item) {
-  if (vec != nullptr) {
-    vec->emplace_back(item);
-  }
-}
-
-static void ConstructTensorDesc(const std::vector<AclTensorInfo> &acl_tensor_list, std::vector<std::string> *names,
-                                std::vector<std::vector<int64_t>> *shapes, std::vector<enum DataType> *data_types,
-                                std::vector<size_t> *mem_sizes) {
-  ClearIfNotNull(names);
-  ClearIfNotNull(shapes);
-  ClearIfNotNull(data_types);
-  ClearIfNotNull(mem_sizes);
-  for (size_t i = 0; i < acl_tensor_list.size(); ++i) {
-    const auto &info = acl_tensor_list[i];
-    PushbackIfNotNull(names, info.name);
-    PushbackIfNotNull(shapes, info.dims);
-    PushbackIfNotNull(data_types, TransToApiType(info.data_type));
-    PushbackIfNotNull(mem_sizes, info.buffer_size);
-  }
-}
-
-static std::string ShapeToString(const std::vector<int64_t> &shape) {
-  std::string result = "[";
-  for (size_t i = 0; i < shape.size(); ++i) {
-    result += std::to_string(shape[i]);
-    if (i + 1 < shape.size()) {
-      result += ", ";
-    }
-  }
-  result += "]";
-  return result;
-}
-
-Status ModelProcess::ConstructTensors(const std::vector<AclTensorInfo> &acl_tensor_list,
-                                      std::vector<MSTensor> *tensor_list) {
-  MS_EXCEPTION_IF_NULL(tensor_list);
-  std::vector<std::string> names;
-  std::vector<std::vector<int64_t>> shapes;
-  std::vector<enum DataType> data_types;
-  std::vector<size_t> mem_sizes;
-
-  ConstructTensorDesc(acl_tensor_list, &names, &shapes, &data_types, &mem_sizes);
-  tensor_list->clear();
-  if (names.size() != acl_tensor_list.size() || shapes.size() != acl_tensor_list.size() ||
-      data_types.size() != acl_tensor_list.size() || mem_sizes.size() != acl_tensor_list.size()) {
-    MS_LOG(ERROR) << "Inner error, size do not match: names size " << names.size() << " shapes size " << shapes.size()
-                  << " data types size " << data_types.size() << " mem sizes size " << mem_sizes.size()
-                  << " acl_tensor_list size " << acl_tensor_list.size();
-    return kMCFailed;
-  }
-
-  aclrtMemcpyKind kind = is_run_on_device_ ? ACL_MEMCPY_HOST_TO_HOST : ACL_MEMCPY_DEVICE_TO_HOST;
-  for (size_t i = 0; i < acl_tensor_list.size(); ++i) {
-    tensor_list->emplace_back(names[i], data_types[i], shapes[i], nullptr, mem_sizes[i]);
-    if (acl_tensor_list[i].cur_device_data == nullptr) {
-      // when run on device, cur_device_data is nullptr before first execute
-      continue;
-    }
-    auto ret = aclrtMemcpy((*tensor_list)[i].MutableData(), (*tensor_list)[i].DataSize(),
-                           acl_tensor_list[i].cur_device_data, acl_tensor_list[i].buffer_size, kind);
-    if (ret != ACL_ERROR_NONE) {
-      MS_LOG(ERROR) << "Memcpy input " << i << " from " << (is_run_on_device_ ? "host" : "device")
-                    << " to host failed, memory size " << acl_tensor_list[i].buffer_size;
-      return kMCFailed;
-    }
-  }
-
-  return kSuccess;
-}
-
-Status ModelProcess::PreInitModelResource() {
-  model_desc_ = aclmdlCreateDesc();
-  aclError acl_ret = aclmdlGetDesc(model_desc_, model_id_);
-  if (acl_ret != ACL_ERROR_NONE) {
-    MS_LOG(ERROR) << "Read model desc failed";
-    return kMCDeviceError;
-  }
-  Status ret = InitInputsBuffer();
-  if (ret != kSuccess) {
-    MS_LOG(ERROR) << "Create input buffer failed";
-    return ret;
-  }
-  ret = InitOutputsBuffer();
-  if (ret != kSuccess) {
-    MS_LOG(ERROR) << "Create output buffer failed";
-    return ret;
-  }
-  return kSuccess;
-}
-
-Status ModelProcess::InitInputsBuffer() {
-  aclError ret;
-  size_t input_size = aclmdlGetNumInputs(model_desc_);
-  MS_LOG(INFO) << "input_size = " << input_size;
-  for (size_t i = 0; i < input_size; ++i) {
-    auto buffer_size = aclmdlGetInputSizeByIndex(model_desc_, i);
-    void *data_mem_buffer = nullptr;
-    if (!is_run_on_device_) {  // need to copy input/output to/from device
-      ret = aclrtMalloc(&data_mem_buffer, buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY);
-      if (ret != ACL_ERROR_NONE) {
-        MS_LOG(ERROR) << "Malloc device input buffer failed , input size " << buffer_size;
-        return kMCDeviceError;
-      }
-    }
-
-    aclmdlIODims dims;
-    ret = aclmdlGetInputDims(model_desc_, i, &dims);
-    if (ret != ACL_ERROR_NONE) {
-      MS_LOG(ERROR) << "Get input shape failed";
-      if (!is_run_on_device_) {
-        (void)aclrtFree(data_mem_buffer);
-      }
-      return kMCDeviceError;
-    }
-    aclDataType data_type = aclmdlGetInputDataType(model_desc_, i);
-    std::vector<int64_t> shape(dims.dims, dims.dims + dims.dimCount);
-    const char *input_name_char = aclmdlGetInputNameByIndex(model_desc_, i);
-    std::string input_name = (input_name_char != nullptr) ? input_name_char : std::string();
-    if (input_name.empty()) {
-      MS_LOG(WARNING) << "Get name of input " << i << " failed.";
-    }
-    MS_LOG(INFO) << "Name of input " << i << " is " << input_name;
-    input_infos_.emplace_back(
-      AclTensorInfo{data_mem_buffer, data_mem_buffer, buffer_size, data_type, shape, input_name});
-  }
-  MS_LOG(INFO) << "Create model inputs success";
-  return kSuccess;
-}
-
-Status ModelProcess::CreateDataBuffer(void **data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset) const {
-  MS_EXCEPTION_IF_NULL(data_mem_buffer);
-  aclError ret;
-  auto free_data_buffer = [this](void *dataMemBuffer) {
-    if (!is_run_on_device_) {
-      (void)aclrtFree(dataMemBuffer);
-    } else {
-      (void)aclrtFreeHost(dataMemBuffer);
-    }
-  };
-
-  if (!is_run_on_device_) {
-    ret = aclrtMalloc(data_mem_buffer, buffer_size, ACL_MEM_MALLOC_NORMAL_ONLY);
-    if (ret != ACL_ERROR_NONE) {
-      MS_LOG(ERROR) << "Malloc device buffer failed , buffer size " << buffer_size;
-      return kMCDeviceError;
-    }
-  } else {
-    ret = aclrtMallocHost(data_mem_buffer, buffer_size);
-    if (ret != ACL_ERROR_NONE) {
-      MS_LOG(ERROR) << "Malloc device buffer failed , buffer size " << buffer_size;
-      return kMCDeviceError;
-    }
-  }
-
-  auto data_buffer = aclCreateDataBuffer(*data_mem_buffer, buffer_size);
-  if (data_buffer == nullptr) {
-    MS_LOG(ERROR) << "Create Data Buffer failed";
-    free_data_buffer(*data_mem_buffer);
-    return kMCDeviceError;
-  }
-  ret = aclmdlAddDatasetBuffer(dataset, data_buffer);
-  if (ret != ACL_ERROR_NONE) {
-    MS_LOG(ERROR) << "add data buffer failed";
-    free_data_buffer(*data_mem_buffer);
-    (void)aclDestroyDataBuffer(data_buffer);
-    return kMCDeviceError;
-  }
-  return kSuccess;
-}
-
-Status ModelProcess::InitOutputsBuffer() {
-  aclError ret;
-  outputs_ = aclmdlCreateDataset();
-  if (outputs_ == nullptr) {
-    MS_LOG(ERROR) << "Create input dataset failed";
-    return kMCDeviceError;
-  }
-  size_t output_size = aclmdlGetNumOutputs(model_desc_);
-  MS_LOG(INFO) << "output_size = " << output_size;
-  for (size_t i = 0; i < output_size; ++i) {
-    auto buffer_size = aclmdlGetOutputSizeByIndex(model_desc_, i);
-
-    void *data_mem_buffer = nullptr;
-    if (CreateDataBuffer(&data_mem_buffer, buffer_size, outputs_) != kSuccess) {
-      MS_LOG(ERROR) << "add output data buffer failed, buffer size " << buffer_size;
-      return kMCDeviceError;
-    }
-    aclmdlIODims dims;
-    ret = aclmdlGetOutputDims(model_desc_, i, &dims);
-    if (ret != ACL_ERROR_NONE) {
-      MS_LOG(ERROR) << "Get input shape failed";
-      if (!is_run_on_device_) {
-        (void)aclrtFree(data_mem_buffer);
-      } else {
-        (void)aclrtFreeHost(data_mem_buffer);
-      }
-      return kMCDeviceError;
-    }
-    aclDataType data_type = aclmdlGetOutputDataType(model_desc_, i);
-    std::vector<int64_t> shape(dims.dims, dims.dims + dims.dimCount);
-    const char *output_name_char = aclmdlGetOutputNameByIndex(model_desc_, i);
-    std::string output_name = (output_name_char != nullptr) ? output_name_char : std::string();
-    if (output_name.empty()) {
-      MS_LOG(WARNING) << "Get name of output " << i << " failed.";
-    }
-    MS_LOG(INFO) << "Name of input " << i << " is " << output_name;
-    output_infos_.emplace_back(
-      AclTensorInfo{data_mem_buffer, data_mem_buffer, buffer_size, data_type, shape, output_name});
-  }
-  MS_LOG(INFO) << "Create model output success";
-  return kSuccess;
-}
-
-void ModelProcess::DestroyInputsDataset() {
-  if (inputs_ == nullptr) {
-    return;
-  }
-  for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(inputs_); i++) {
-    auto dataBuffer = aclmdlGetDatasetBuffer(inputs_, i);
-    (void)aclDestroyDataBuffer(dataBuffer);
-  }
-  (void)aclmdlDestroyDataset(inputs_);
-  inputs_ = nullptr;
-}
-
-void ModelProcess::DestroyInputsDataMem() {
-  if (!is_run_on_device_) {
-    for (const auto &item : input_infos_) {
-      (void)aclrtFree(item.device_data);
-    }
-  }
-  input_infos_.clear();
-}
-
-void ModelProcess::DestroyInputsBuffer() {
-  DestroyInputsDataMem();
-  DestroyInputsDataset();
-}
-
-void ModelProcess::DestroyOutputsBuffer() {
-  for (const auto &item : output_infos_) {
-    if (!is_run_on_device_) {
-      (void)aclrtFree(item.device_data);
-    } else {
-      (void)aclrtFreeHost(item.device_data);
-    }
-  }
-  output_infos_.clear();
-
-  if (outputs_ == nullptr) {
-    return;
-  }
-  for (size_t i = 0; i < aclmdlGetDatasetNumBuffers(outputs_); i++) {
-    auto dataBuffer = aclmdlGetDatasetBuffer(outputs_, i);
-    (void)aclDestroyDataBuffer(dataBuffer);
-  }
-  (void)aclmdlDestroyDataset(outputs_);
-  outputs_ = nullptr;
-}
-
-Status ModelProcess::UnLoad() {
-  auto ret = aclmdlUnload(model_id_);
-  if (ret != ACL_ERROR_NONE) {
-    MS_LOG(ERROR) << "Unload model failed";
-    return kMCDeviceError;
-  }
-  if (model_desc_ != nullptr) {
-    ret = aclmdlDestroyDesc(model_desc_);
-    if (ret != ACL_ERROR_NONE) {
-      MS_LOG(ERROR) << "Unload model failed";
-      return kMCDeviceError;
-    }
-    model_desc_ = nullptr;
-  }
-  DestroyInputsBuffer();
-  DestroyOutputsBuffer();
-  MS_LOG(INFO) << "End unload model " << model_id_;
-  return kSuccess;
-}
-
-size_t ModelProcess::GetDynamicDims(const std::vector<AclTensorInfo> &inputs) {
-  size_t max_num = 0;
-  for (auto input : inputs) {
-    size_t cur_num = std::count(input.dims.begin(), input.dims.end(), -1);
-    if (cur_num > max_num) {
-      max_num = cur_num;
-    }
-  }
-  return max_num;
-}
-
-Status ModelProcess::SetBatchSize(const std::vector<MSTensor> &inputs) {
-  size_t index;
-  aclError ret;
-  for (size_t i = 0; i < inputs.size(); i++) {
-    input_infos_[i].buffer_size = inputs[i].DataSize();
-  }
-  auto *p = reinterpret_cast<const float *>(inputs[inputs.size() - 1].Data().get());
-  MS_EXCEPTION_IF_NULL(p);
-  size_t dynamicBatchSize = FloatToSize(p[0]);
-  ret = aclmdlGetInputIndexByName(model_desc_, ACL_DYNAMIC_TENSOR_NAME, &index);
-  if (ret != ACL_ERROR_NONE) {
-    MS_LOG(ERROR) << "get index failed";
-    return kMCDeviceError;
-  }
-  ret = aclmdlSetDynamicBatchSize(model_id_, inputs_, index, dynamicBatchSize);
-  if (ret != ACL_ERROR_NONE) {
-    MS_LOG(ERROR) << "dynamic batch set failed, modelId is " << model_id_;
-    return kMCDeviceError;
-  }
-  return kSuccess;
-}
-
-Status ModelProcess::CheckAndInitInput(const std::vector<MSTensor> &inputs) {
-  aclError ret;
-  inputs_ = aclmdlCreateDataset();
-  constexpr size_t dynamic_batch_size = 1;
-  constexpr size_t dynamic_image_size = 2;
-  size_t dynamic_nums = GetDynamicDims(input_infos_);
-  // check inputs
-  if (inputs.size() != input_infos_.size()) {
-    MS_LOG(ERROR) << "Inputs count not match, required count " << input_infos_.size() << ", given count "
-                  << inputs.size();
-    return kMCInvalidInput;
-  }
-  if (dynamic_nums == 0) {
-    for (size_t i = 0; i < input_infos_.size(); ++i) {
-      if (inputs[i].Shape() != input_infos_[i].dims) {
-        MS_LOG(INFO) << "Note: input " << i << " shape not match, required " << ShapeToString(input_infos_[i].dims)
-                     << ", given " << ShapeToString(inputs[i].Shape());
-      }
-      if (inputs[i].DataType() != TransToApiType(input_infos_[i].data_type)) {
-        MS_LOG(INFO) << "Note: input " << i << " data type not match, required "
-                     << TransToApiType(input_infos_[i].data_type) << ", given " << inputs[i].DataType();
-      }
-      if (inputs[i].DataSize() != input_infos_[i].buffer_size) {
-        MS_LOG(ERROR) << "Input " << i << " data size not match, required size " << input_infos_[i].buffer_size
-                      << ", given count " << inputs[i].DataSize();
-        return kMCInvalidInput;
-      }
-    }
-  }
-  // copy inputs
-  for (size_t i = 0; i < input_infos_.size(); ++i) {
-    auto &info = input_infos_[i];
-    auto input = inputs[i];
-    void *data = input.MutableData();
-    void *input_buffer = nullptr;
-    if (!is_run_on_device_) {
-      if (input.IsDevice()) {
-        info.cur_device_data = data;
-        input_buffer = info.cur_device_data;
-      } else {
-        info.cur_device_data = info.device_data;
-        ret = aclrtMemcpy(info.cur_device_data, info.buffer_size, data, input.DataSize(), ACL_MEMCPY_HOST_TO_DEVICE);
-        if (ret != ACL_ERROR_NONE) {
-          MS_LOG(ERROR) << "Acl memcpy input " << i << " data to device failed, buffer size " << input.DataSize();
-          return kMCDeviceError;
-        }
-        input_buffer = info.cur_device_data;
-      }
-    } else {
-      input_buffer = data;
-    }
-    auto data_buffer = aclCreateDataBuffer(input_buffer, info.buffer_size);
-    if (data_buffer == nullptr) {
-      MS_LOG(ERROR) << "Create Data Buffer failed";
-      return kMCDeviceError;
-    }
-    ret = aclmdlAddDatasetBuffer(inputs_, data_buffer);
-    if (ret != ACL_ERROR_NONE) {
-      MS_LOG(ERROR) << "add data buffer failed";
-      (void)aclDestroyDataBuffer(data_buffer);
-      return kMCDeviceError;
-    }
-  }
-  if (dynamic_nums == dynamic_batch_size) {
-    if (SetBatchSize(inputs) != kSuccess) {
-      MS_LOG(ERROR) << "failed to convert dynamic batch size";
-      return kMCDeviceError;
-    }
-    if (ResetOutputSize() != kSuccess) {
-      MS_LOG(ERROR) << "reset output size failed";
-      return kMCDeviceError;
-    }
-  } else if (dynamic_nums == dynamic_image_size) {
-    MS_LOG(ERROR) << "only dynamic batch size is supported";
-    return kMCInvalidInput;
-  }
-  return kSuccess;
-}
-
-Status ModelProcess::ResetOutputSize() {
-  aclDataType output_type;
-  aclError ret;
-  size_t output_size = aclmdlGetNumOutputs(model_desc_);
-  for (size_t index = 0; index < output_size; index++) {
-    int64_t dims = 1;
-    struct aclmdlIODims output_dims;
-    ret = aclmdlGetCurOutputDims(model_desc_, index, &output_dims);
-    if (ret != ACL_ERROR_NONE) {
-      MS_LOG(ERROR) << "get output dim error.";
-      return kMCDeviceError;
-    }
-    for (size_t i = 0; i < output_dims.dimCount; i++) {
-      dims *= output_dims.dims[i];
-    }
-    output_type = aclmdlGetOutputDataType(model_desc_, index);
-    output_infos_[index].buffer_size = LongToSize(dims) * aclDataTypeSize(output_type);
-  }
-  return kSuccess;
-}
-
-Status ModelProcess::PredictFromHost(const std::vector<MSTensor> &inputs, std::vector<MSTensor> *outputs) {
-  MS_EXCEPTION_IF_NULL(outputs);
-  aclError acl_ret;
-  Status ret = CheckAndInitInput(inputs);
-  if (ret != kSuccess) {
-    MS_LOG(ERROR) << "check or init input failed";
-    DestroyInputsDataset();
-    return ret;  // forward status error
-  }
-
-  struct timeval start_time;
-  struct timeval end_time;
-  (void)gettimeofday(&start_time, nullptr);
-  acl_ret = aclmdlExecute(model_id_, inputs_, outputs_);
-  (void)gettimeofday(&end_time, nullptr);
-  constexpr uint64_t kUSecondInSecond = 1000000;
-  uint64_t cost =
-    (kUSecondInSecond * static_cast<uint64_t>(end_time.tv_sec) + static_cast<uint64_t>(end_time.tv_usec)) -
-    (kUSecondInSecond * static_cast<uint64_t>(start_time.tv_sec) + static_cast<uint64_t>(start_time.tv_usec));
-  MS_LOG(INFO) << "Model execute in " << cost << " us";
-
-  DestroyInputsDataset();
-  if (acl_ret != ACL_ERROR_NONE) {
-    MS_LOG(ERROR) << "Execute Model Failed";
-    return kMCDeviceError;
-  }
-  ret = BuildOutputs(outputs);
-  if (ret != kSuccess) {
-    MS_LOG(ERROR) << "Build outputs failed";
-    return ret;
-  }
-  MS_LOG(INFO) << "Execute model success";
-  return kSuccess;
-}
-
-Status ModelProcess::BuildOutputs(std::vector<MSTensor> *outputs) {
-  MS_EXCEPTION_IF_NULL(outputs);
-  // copy outputs
-  outputs->clear();
-  auto inner_outputs = GetOutputs();
-  if (inner_outputs.size() != output_infos_.size()) {
-    MS_LOG(ERROR) << "Invalid inner outputs size " << inner_outputs.size() << " do not match device output infos size "
-                  << output_infos_.size();
-    return kMCFailed;
-  }
-  (*outputs) = inner_outputs;
-  return kSuccess;
-}
-
-std::vector<MSTensor> ModelProcess::GetInputs() {
-  Status ret = ConstructTensors(input_infos_, &input_tensors_);
-  if (ret != kSuccess) {
-    MS_LOG(ERROR) << "ConstructTensors failed.";
-    input_tensors_.clear();
-  }
-
-  return input_tensors_;
-}
-
-std::vector<MSTensor> ModelProcess::GetOutputs() {
-  Status ret = ConstructTensors(output_infos_, &output_tensors_);
-  if (ret != kSuccess) {
-    MS_LOG(ERROR) << "ConstructTensors failed.";
-    output_tensors_.clear();
-  }
-
-  return output_tensors_;
-}
-}  // namespace mindspore
--- a/mindspore/lite/src/extendrt/acl/model_process.h
+++ b/mindspore/lite/src/extendrt/acl/model_process.h
@ -1,89 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_GRAPH_ACL_MODEL_PROCESS_H_
-#define MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_GRAPH_ACL_MODEL_PROCESS_H_
-#include <vector>
-#include <string>
-#include <map>
-#include "acl/acl.h"
-#include "acl/acl_mdl.h"
-#include "acl/acl_rt.h"
-#include "include/api/status.h"
-#include "include/api/types.h"
-
-namespace mindspore {
-struct AclTensorInfo {
-  void *cur_device_data;
-  void *device_data;
-  size_t buffer_size;
-  aclDataType data_type;
-  std::vector<int64_t> dims;
-  std::string name;
-};
-
-class ModelProcess {
- public:
-  ModelProcess()
-      : model_id_(0xffffffff),
-        is_run_on_device_(false),
-        model_desc_(nullptr),
-        inputs_(nullptr),
-        outputs_(nullptr),
-        input_infos_(),
-        output_infos_() {}
-  ~ModelProcess() {}
-
-  Status UnLoad();
-  Status PredictFromHost(const std::vector<MSTensor> &inputs, std::vector<MSTensor> *outputs);
-  Status PreInitModelResource();
-  std::vector<MSTensor> GetInputs();
-  std::vector<MSTensor> GetOutputs();
-
-  // override this method to avoid request/reply data copy
-  void SetIsDevice(bool is_device) { is_run_on_device_ = is_device; }
-
-  void set_model_id(uint32_t model_id) { model_id_ = model_id; }
-  uint32_t model_id() const { return model_id_; }
-
- private:
-  Status CreateDataBuffer(void **data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset) const;
-  Status CheckAndInitInput(const std::vector<MSTensor> &inputs);
-  Status ConstructTensors(const std::vector<AclTensorInfo> &acl_tensor_list, std::vector<MSTensor> *tensor_list);
-  Status BuildOutputs(std::vector<MSTensor> *outputs);
-  Status SetBatchSize(const std::vector<MSTensor> &inputs);
-  Status InitInputsBuffer();
-  Status InitOutputsBuffer();
-  Status ResetOutputSize();
-
-  void DestroyInputsDataset();
-  void DestroyInputsDataMem();
-  void DestroyInputsBuffer();
-  void DestroyOutputsBuffer();
-
-  uint32_t model_id_;
-  // if run one device(AICPU), there is no need to alloc device memory and copy inputs to(/outputs from) device
-  bool is_run_on_device_;
-  aclmdlDesc *model_desc_;
-  aclmdlDataset *inputs_;
-  aclmdlDataset *outputs_;
-  std::vector<AclTensorInfo> input_infos_;
-  std::vector<AclTensorInfo> output_infos_;
-  std::vector<MSTensor> input_tensors_;
-  std::vector<MSTensor> output_tensors_;
-  size_t GetDynamicDims(const std::vector<AclTensorInfo> &);
-};
-}  // namespace mindspore
-#endif  // MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_GRAPH_ACL_MODEL_PROCESS_H_
--- a/mindspore/lite/src/extendrt/ascend/ascend_infer.cc
+++ b/mindspore/lite/src/extendrt/ascend/ascend_infer.cc
@ -1,455 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "extendrt/ascend/ascend_infer.h"
-#include <algorithm>
-#include "include/api/context.h"
-#include "extendrt/factory.h"
-#include "extendrt/acl/acl_utils.h"
-#include "utils/log_adapter.h"
-#include "runtime/device/context_extends.h"
-#include "mindspore/core/base/base_ref_utils.h"
-#include "backend/common/session/session_factory.h"
-#include "backend/common/session/executor_manager.h"
-#include "runtime/device/kernel_runtime_manager.h"
-#include "runtime/dev.h"
-#include "frontend/parallel/strategy_checkpoint/parallel_strategy_checkpoint.h"
-#include "include/common/utils/python_adapter.h"
-namespace mindspore {
-namespace {
-constexpr auto kHcclEnable = "MS_ENABLE_HCCL";
-constexpr auto kHcclGroupFile = "PARA_GROUP_FILE";
-
-void InitHccl() {
-  auto ms_context = MsContext::GetInstance();
-  MS_EXCEPTION_IF_NULL(ms_context);
-  mindspore::python_adapter::set_python_env_flag(true);
-  uint32_t device_id = ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
-  if (ms_context->backend_policy() == "ms") {
-    auto runtime_instance = device::KernelRuntimeManager::Instance().GetKernelRuntime(kAscendDevice, device_id);
-    MS_EXCEPTION_IF_NULL(runtime_instance);
-#ifndef ENABLE_SECURITY
-    runtime_instance->PreInit();
-#endif
-    (void)context::OpenTsd(ms_context);
-    if (!runtime_instance->Init()) {
-      MS_LOG(EXCEPTION) << "Runtime init failed.";
-    }
-  } else {
-    (void)context::OpenTsd(ms_context);
-  }
-}
-
-bool CreateGroupsByCkptFile(const std::string &file) {
-  parallel::GroupInfoMap group_info_map;
-  if (parallel::StrategyCheckpoint::GetInstance().LoadGroupInfo(file, &group_info_map) != parallel::SUCCESS) {
-    return false;
-  }
-
-  for (const auto &[group_name, rank_ids] : group_info_map) {
-    if (!CommManager::GetInstance().CreateGroupSync(group_name, rank_ids)) {
-      MS_LOG(ERROR) << "Create group " << group_name << " rank ids " << rank_ids << " failed.";
-      return false;
-    }
-  }
-
-  MS_LOG(INFO) << "Create groups by checkpoint file success";
-  return true;
-}
-}  // namespace
-AscendInferExecutor::AscendInferExecutor()
-    : session_impl_(nullptr),
-      graph_id_(0),
-      device_type_("Ascend"),
-      device_id_(0),
-      context_(nullptr),
-      inputs_info_(),
-      outputs_info_(),
-      input_names_(),
-      output_names_(),
-      load_flag_(false) {}
-
-AscendInferExecutor::~AscendInferExecutor() {}
-
-Status AscendInferExecutor::InitEnv() {
-  MS_LOG(INFO) << "Start to init env.";
-  env_guard_ = MsEnvGuard::GetEnv(device_id_);
-  if (env_guard_ == nullptr) {
-    MS_LOG(ERROR) << "Env init failed.";
-    return kMCDeviceError;
-  }
-
-  session_impl_ = session::SessionFactory::Get().Create(kDavinciInferenceDevice);
-  if (session_impl_ == nullptr) {
-    MS_LOG(ERROR) << "Session create failed!, please make sure target device:" << kDavinciInferenceDevice
-                  << " is available.";
-    return kMCFailed;
-  }
-  session_impl_->Init(device_id_);
-
-  MS_LOG(INFO) << "InitEnv success.";
-  return kSuccess;
-}
-
-Status AscendInferExecutor::CompileGraph(const std::shared_ptr<FuncGraph> &funcGraphPtr) {
-  MS_ASSERT(session_impl_ != nullptr);
-  try {
-    graph_id_ = session_impl_->CompileGraph(NOT_NULL(funcGraphPtr));
-    return kSuccess;
-  } catch (std::exception &e) {
-    MS_LOG(ERROR) << "CompileGraph failed: " << e.what();
-    return kMCFailed;
-  }
-}
-
-std::vector<tensor::TensorPtr> AscendInferExecutor::RunGraph(const std::vector<tensor::TensorPtr> &inputs) {
-  try {
-    VectorRef outputs;
-    session_impl_->RunGraph(graph_id_, inputs, &outputs);
-    return TransformVectorRefToMultiTensor(outputs);
-  } catch (std::exception &e) {
-    MS_LOG(ERROR) << "RunGraph failed: " << e.what();
-    return std::vector<tensor::TensorPtr>();
-  }
-}
-
-Status AscendInferExecutor::CheckModelInputs(const std::vector<tensor::TensorPtr> &inputs) const {
-  MS_ASSERT(session_impl_ != nullptr);
-  std::string error_msg;
-  if (!session_impl_->CheckModelInputs(graph_id_, inputs, &error_msg)) {
-    return Status(kMCInvalidInput, error_msg);
-  }
-  return kSuccess;
-}
-
-Status AscendInferExecutor::ExecuteModel(const std::vector<MSTensor> &request, std::vector<MSTensor> *reply) {
-  MS_EXCEPTION_IF_NULL(reply);
-  if (context_ == nullptr) {
-    MS_LOG(ERROR) << "rtCtx is nullptr";
-    return kMCDeviceError;
-  }
-  rtError_t rt_ret = rtCtxSetCurrent(context_);
-  if (rt_ret != RT_ERROR_NONE) {
-    MS_LOG(ERROR) << "Set Ascend rtCtx failed";
-    return kMCDeviceError;
-  }
-
-  vector<tensor::TensorPtr> inputs;
-  for (size_t i = 0; i < request.size(); i++) {
-    auto item = request[i];
-    auto input = inputs_info_[i];
-    if (input->Size() != item.DataSize()) {
-      MS_LOG(ERROR) << "Input " << i << " data size " << item.DataSize() << " not match model input data size "
-                    << input->Size();
-      return kMCInvalidInput;
-    }
-    auto ret = memcpy_s(input->data_c(), input->Size(), item.MutableData(), item.DataSize());
-    if (ret != EOK) {
-      MS_LOG(ERROR) << "MSTensor copy failed";
-      return kMCFailed;
-    }
-    inputs.push_back(input);
-  }
-  last_inputs_ = inputs;
-  std::vector<tensor::TensorPtr> outputs = RunGraph(inputs);
-  if (outputs.empty()) {
-    MS_LOG(ERROR) << "Execute Model Failed";
-    return kMCFailed;
-  }
-  last_outputs_ = outputs;
-  reply->clear();
-  *reply = GetOutputs();
-  return kSuccess;
-}
-
-std::vector<MSTensor> AscendInferExecutor::GetInputs() {
-  if (!load_flag_) {
-    Status ret = Load(device_id_);
-    if (ret != kSuccess) {
-      MS_LOG(ERROR) << "PrepareModel failed.";
-      return {};
-    }
-  }
-
-  std::vector<MSTensor> result(inputs_info_.size());
-  for (size_t i = 0; i < inputs_info_.size(); ++i) {
-    auto &tensor = inputs_info_[i];
-    void *data = nullptr;
-    size_t data_size = tensor->Size();
-    if (i < last_inputs_.size()) {
-      data = last_inputs_[i]->data_c();
-      data_size = last_inputs_[i]->Size();
-    }
-    result[i] =
-      MSTensor(input_names_[i], static_cast<enum DataType>(tensor->data_type()), tensor->shape(), data, data_size);
-  }
-  return result;
-}
-
-std::vector<MSTensor> AscendInferExecutor::GetOutputs() {
-  if (!load_flag_) {
-    Status ret = Load(device_id_);
-    if (ret != kSuccess) {
-      MS_LOG(ERROR) << "PrepareModel failed.";
-      return {};
-    }
-  }
-
-  std::vector<MSTensor> result(outputs_info_.size());
-  for (size_t i = 0; i < outputs_info_.size(); ++i) {
-    auto &tensor = outputs_info_[i];
-    void *data = nullptr;
-    size_t data_size = tensor->Size();
-    if (i < last_outputs_.size()) {
-      data = last_outputs_[i]->data_c();
-      data_size = last_outputs_[i]->Size();
-    }
-    result[i] =
-      MSTensor(output_names_[i], static_cast<enum DataType>(tensor->data_type()), tensor->shape(), data, data_size);
-  }
-  return result;
-}
-
-Status AscendInferExecutor::Load(uint32_t device_id) {
-  // check graph type
-  if (graph_->ModelType() != ModelType::kMindIR) {
-    MS_LOG(ERROR) << "Unsupported model type " << graph_->ModelType();
-    return kMCInvalidInput;
-  }
-
-  const auto &graph_data = GraphImpl::MutableGraphData();
-  MS_EXCEPTION_IF_NULL(graph_data);
-  auto func_graph = graph_data->GetFuncGraph();
-
-  // init
-  device_id_ = device_id;
-  Status ret = InitEnv();
-  if (ret != kSuccess) {
-    MS_LOG(ERROR) << "InitEnv failed.";
-    return ret;
-  }
-
-  // load model
-  if (!load_flag_) {
-    ret = CompileGraph(func_graph);
-    if (ret != kSuccess) {
-      MS_LOG(ERROR) << "Compile graph model failed";
-      return ret;
-    }
-    MS_EXCEPTION_IF_NULL(session_impl_);
-    session_impl_->GetModelInputsInfo(graph_id_, &inputs_info_, &input_names_);
-    session_impl_->GetModelOutputsInfo(graph_id_, &outputs_info_, &output_names_);
-    if (inputs_info_.size() != input_names_.size()) {
-      MS_LOG_ERROR << "Get model inputs info failed";
-      return kMCInvalidInput;
-    }
-    if (outputs_info_.size() != output_names_.size()) {
-      MS_LOG_ERROR << "Get model outputs info failed";
-      return kMCInvalidInput;
-    }
-
-    // save d context
-    rtError_t rt_ret = rtCtxGetCurrent(&context_);
-    if (rt_ret != RT_ERROR_NONE || context_ == nullptr) {
-      MS_LOG(ERROR) << "the ascend device context is null";
-      return kMCDeviceError;
-    }
-
-    MS_LOG(INFO) << "Load model success";
-    load_flag_ = true;
-  }
-
-  rtError_t rt_ret = rtCtxSetCurrent(context_);
-  if (rt_ret != RT_ERROR_NONE) {
-    MS_LOG(ERROR) << "Set the ascend device context failed";
-    return kMCDeviceError;
-  }
-
-  return kSuccess;
-}
-
-Status AscendInferExecutor::Run(const std::vector<MSTensor> &inputs, std::vector<MSTensor> *outputs) {
-  MS_EXCEPTION_IF_NULL(outputs);
-  if (!load_flag_) {
-    Status ret = Load(device_id_);
-    if (ret != kSuccess) {
-      MS_LOG(ERROR) << "PrepareModel failed.";
-      return ret;
-    }
-  }
-
-  if (inputs.size() != inputs_info_.size()) {
-    MS_LOG(ERROR) << "inputs count not match, required count " << inputs_info_.size() << ", given count "
-                  << inputs.size();
-    return kMCInvalidInput;
-  }
-
-  for (size_t i = 0; i < inputs_info_.size(); ++i) {
-    if (inputs[i].DataSize() != inputs_info_[i]->Size()) {
-      MS_LOG(ERROR) << "input " << i << " data size not match, required size " << inputs_info_[i]->Size()
-                    << ", given count " << inputs[i].DataSize();
-      return kMCInvalidInput;
-    }
-  }
-
-  Status ret = ExecuteModel(inputs, outputs);
-  if (ret != kSuccess) {
-    MS_LOG(ERROR) << "Execute Model Failed";
-    return ret;
-  }
-  if (outputs_info_.size() != outputs->size()) {
-    MS_LOG(ERROR) << "Predict output size " << outputs->size() << " not match output size got from model info "
-                  << outputs_info_.size();
-    return kMCFailed;
-  }
-
-  return kSuccess;
-}
-
-AscendInferExecutor::MsEnvGuard::MsEnvGuard(uint32_t device_id) {
-  MS_LOG(INFO) << "Start to init device " << device_id;
-  device_id_ = device_id;
-  RegAllOp();
-  auto ms_context = MsContext::GetInstance();
-  if (ms_context == nullptr) {
-    MS_LOG(ERROR) << "Get Context failed!";
-    errno_ = kMCFailed;
-    return;
-  }
-
-  auto env_hccl_mode = common::GetEnv(kHcclEnable);
-  if (!env_hccl_mode.empty() && env_hccl_mode != std::to_string(0)) {
-    MS_LOG(INFO) << "Enable hccl parallel mode.";
-    ms_context->set_param<bool>(MS_CTX_ENABLE_HCCL, true);
-  }
-
-  ms_context->set_param<int>(MS_CTX_EXECUTION_MODE, kGraphMode);
-  ms_context->set_param<uint32_t>(MS_CTX_DEVICE_ID, device_id_);
-  ms_context->set_param<std::string>(MS_CTX_DEVICE_TARGET, kAscendDevice);
-  ms_context->set_param<bool>(MS_CTX_IS_MULTI_GRAPH_SINK, true);
-
-  if (ms_context->get_param<bool>(MS_CTX_ENABLE_HCCL)) {
-    InitHccl();
-    auto para_group_file = common::GetEnv(kHcclGroupFile);
-    if (para_group_file.empty()) {
-      MS_LOG(INFO) << "Cannot get Env " << kHcclGroupFile << ", skip.";
-    } else {
-      MS_LOG(INFO) << "Get env " << kHcclGroupFile << " success: " << para_group_file;
-      if (!CreateGroupsByCkptFile(para_group_file)) {
-        MS_LOG(ERROR) << "CreateGroupsByCkptFile failed.";
-        errno_ = kMCFailed;
-        return;
-      }
-    }
-  } else {
-    auto ret = rtSetDevice(static_cast<int32_t>(device_id_));
-    if (ret != RT_ERROR_NONE) {
-      MS_LOG(EXCEPTION) << "Device " << device_id_ << " call rtSetDevice failed, ret[" << static_cast<int>(ret) << "]";
-    }
-  }
-
-  MS_LOG(INFO) << "Device " << device_id << " init env success.";
-  errno_ = kSuccess;
-}
-
-AscendInferExecutor::MsEnvGuard::~MsEnvGuard() {
-  MS_LOG(INFO) << "Start finalize device " << device_id_;
-  session::ExecutorManager::Instance().Clear();
-  device::KernelRuntimeManager::Instance().ClearRuntimeResource();
-
-  auto ms_context = MsContext::GetInstance();
-  if (ms_context == nullptr) {
-    MS_LOG(ERROR) << "Get Context failed!";
-    return;
-  }
-
-  if (ms_context->get_param<bool>(MS_CTX_ENABLE_HCCL)) {
-    PythonEnvGuard guard;
-    if (!context::CloseTsd(ms_context)) {
-      MS_LOG(ERROR) << "CloseTsd failed!";
-      return;
-    }
-  } else {
-    auto ret = rtDeviceReset(static_cast<int32_t>(device_id_));
-    if (ret != RT_ERROR_NONE) {
-      MS_LOG(ERROR) << "Device " << device_id_ << " call rtDeviceReset failed, ret[" << static_cast<int>(ret) << "]";
-      return;
-    }
-  }
-
-  MS_LOG(INFO) << "End finalize device " << device_id_;
-}
-
-std::shared_ptr<AscendInferExecutor::MsEnvGuard> AscendInferExecutor::MsEnvGuard::GetEnv(uint32_t device_id) {
-  std::shared_ptr<MsEnvGuard> acl_env;
-  std::lock_guard<std::mutex> lock(global_ms_env_mutex_);
-  auto iter = global_ms_env_.find(device_id);
-  if (iter != global_ms_env_.end()) {
-    acl_env = iter->second.lock();
-  }
-
-  if (acl_env != nullptr) {
-    MS_LOG(INFO) << "Env has been initialized, skip.";
-    return acl_env;
-  }
-
-  acl_env = std::make_shared<MsEnvGuard>(device_id);
-  if (acl_env->GetErrno() != kSuccess) {
-    MS_LOG(ERROR) << "Init ascend env Failed";
-    return nullptr;
-  }
-
-  global_ms_env_.emplace(device_id, acl_env);
-  MS_LOG(INFO) << "Env init success";
-  return acl_env;
-}
-
-bool AscendInferExecutor::CheckDeviceSupport(mindspore::DeviceType device_type) {
-  // for Ascend, only support kAscend and kAscend910
-  if (device_type != kAscend && device_type != kAscend910) {
-    return false;
-  }
-  return IsAscend910Soc();
-}
-
-std::map<uint32_t, std::weak_ptr<AscendInferExecutor::MsEnvGuard>> AscendInferExecutor::MsEnvGuard::global_ms_env_;
-std::mutex AscendInferExecutor::MsEnvGuard::global_ms_env_mutex_;
-
-PythonEnvGuard::PythonEnvGuard() {
-  origin_init_status_ = PythonIsInited();
-  InitPython();
-}
-
-PythonEnvGuard::~PythonEnvGuard() {
-  // finalize when init by this
-  if (!origin_init_status_) {
-    FinalizePython();
-  }
-}
-
-bool PythonEnvGuard::PythonIsInited() const { return Py_IsInitialized() != 0; }
-
-void PythonEnvGuard::InitPython() const {
-  if (!PythonIsInited()) {
-    Py_Initialize();
-  }
-}
-
-void PythonEnvGuard::FinalizePython() const {
-  if (PythonIsInited()) {
-    Py_Finalize();
-  }
-}
-}  // namespace mindspore
--- a/mindspore/lite/src/extendrt/ascend/ascend_infer.h
+++ b/mindspore/lite/src/extendrt/ascend/ascend_infer.h
@ -1,87 +0,0 @@
-/**
- * Copyright 2020 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_LITE_SRC_EXTENDRT_ASCEND_INFER_EXECUTOR_H_
-#define MINDSPORE_LITE_SRC_EXTENDRT_ASCEND_INFER_EXECUTOR_H_
-#include <functional>
-#include <map>
-#include <string>
-#include <vector>
-#include <memory>
-#include <utility>
-#include "include/api/status.h"
-#include "include/api/graph.h"
-#include "extendrt/graph_executor.h"
-#include "runtime/context.h"
-
-namespace mindspore {
-class AscendInferExecutor : public GraphExecutor {
- public:
-  AscendInferExecutor();
-  ~AscendInferExecutor() override;
-
-  Status Execute(const ExecutePlan &plan, const std::vector<MSTensor> &inputs, std::vector<MSTensor> *outputs) override;
-
- protected:
-  bool CheckDeviceSupport(mindspore::DeviceType device_type) override;
-  Status Load(uint32_t device_id);
-  Status InitEnv();
-  Status FinalizeEnv();
-  Status CheckModelInputs(const std::vector<tensor::TensorPtr> &inputs) const;
-
- private:
-  uint32_t graph_id_;
-  std::string device_type_;
-  uint32_t device_id_;
-  rtContext_t context_;
-  std::vector<tensor::TensorPtr> inputs_info_;
-  std::vector<tensor::TensorPtr> outputs_info_;
-  std::vector<tensor::TensorPtr> last_inputs_;
-  std::vector<tensor::TensorPtr> last_outputs_;
-  std::vector<std::string> input_names_;
-  std::vector<std::string> output_names_;
-  bool load_flag_;
-
-  std::shared_ptr<MsEnvGuard> env_guard_;
-};
-
-class AscendInferSession::MsEnvGuard {
- public:
-  explicit MsEnvGuard(uint32_t device_id);
-  ~MsEnvGuard();
-  Status GetErrno() const { return errno_; }
-  static std::shared_ptr<MsEnvGuard> GetEnv(uint32_t device_id);
-
- private:
-  static std::map<uint32_t, std::weak_ptr<MsEnvGuard>> global_ms_env_;
-  static std::mutex global_ms_env_mutex_;
-
-  Status errno_;
-  uint32_t device_id_;
-};
-
-class PythonEnvGuard {
- public:
-  PythonEnvGuard();
-  ~PythonEnvGuard();
-
- private:
-  bool PythonIsInited() const;
-  void InitPython() const;
-  void FinalizePython() const;
-  bool origin_init_status_;
-};
-}  // namespace mindspore
-#endif  // MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_GRAPH_ASCEND_ASCEND_GRAPH_IMPL_H_
--- a/mindspore/lite/src/extendrt/cxx_api/context.cc
+++ b/mindspore/lite/src/extendrt/cxx_api/context.cc
@ -22,6 +22,7 @@
 #include "src/litert/inner_allocator.h"
 #include "src/common/log_adapter.h"
 #include "src/extendrt/delegate/tensorrt/distribution/distribution_base.h"
+#include "src/extendrt/delegate_graph_executor.h"

 namespace mindspore {
 constexpr auto kModelOptionCpuEnableFP16 = "mindspore.option.cpu.enable_fp16";
@ -171,15 +172,15 @@ std::vector<int32_t> Context::GetThreadAffinityCoreList() const {
  return data_->affinity_core_list_;
 }

-void Context::SetDelegate(const std::shared_ptr<Delegate> &delegate) {
+void Context::set_delegate(const std::shared_ptr<AbstractDelegate> &delegate) {
  if (data_ == nullptr) {
    MS_LOG(ERROR) << "Invalid context.";
    return;
  }
-  data_->delegate = delegate;
+  data_->delegate = std::dynamic_pointer_cast<GraphSinkDelegate>(delegate);
 }

-std::shared_ptr<Delegate> Context::GetDelegate() const {
+std::shared_ptr<AbstractDelegate> Context::get_delegate() const {
  if (data_ == nullptr) {
    MS_LOG(ERROR) << "Invalid context.";
    return nullptr;
@ -187,6 +188,12 @@ std::shared_ptr<Delegate> Context::GetDelegate() const {
  return data_->delegate;
 }

+// deprecated
+void Context::SetDelegate(const std::shared_ptr<Delegate> &delegate) { MS_LOG(ERROR) << "Invalid delegate."; }
+
+// deprecated
+std::shared_ptr<Delegate> Context::GetDelegate() const { return nullptr; }
+
 void Context::SetMultiModalHW(bool float_mode) {
  if (data_ == nullptr) {
    MS_LOG(ERROR) << "Invalid context.";
--- a/mindspore/lite/src/extendrt/cxx_api/context.h
+++ b/mindspore/lite/src/extendrt/cxx_api/context.h
@ -27,6 +27,7 @@
 #include <experimental/any>
 #endif
 #include "include/api/context.h"
+#include "include/api/delegate_api.h"

 namespace mindspore {
 struct Context::Data {
@ -42,7 +43,7 @@ struct Context::Data {
  int32_t thread_num = 0;  // defaults are automatically adjusted based on computer performance
  bool enable_parallel_ = false;
  std::vector<int32_t> affinity_core_list_;
-  std::shared_ptr<Delegate> delegate = nullptr;
+  std::shared_ptr<AbstractDelegate> delegate = nullptr;
  bool float_mode = false;
 };

--- a/mindspore/lite/src/extendrt/delegate/delegate_utils.h
+++ b/mindspore/lite/src/extendrt/delegate/delegate_utils.h
@ -16,7 +16,6 @@
 #ifndef MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_DELEGATE_UTILS_H_
 #define MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_DELEGATE_UTILS_H_
 #include <vector>
-#include "include/api/delegate.h"
 #include "src/common/log_adapter.h"
 #include "include/errorcode.h"
 #include "core/base/base.h"
--- a/mindspore/lite/src/extendrt/delegate/factory.h
+++ b/mindspore/lite/src/extendrt/delegate/factory.h
@ -20,14 +20,15 @@
 #include <string>
 #include <memory>

-#include "include/api/delegate.h"
 #include "utils/hash_map.h"
-
-#include "extendrt/delegate/graph_executor/factory.h"
-#include "extendrt/delegate/graph_executor/type.h"
-
+#include "runtime/hardware/device_context.h"
+#include "src/extendrt/delegate_graph_executor.h"
+#include "include/api/context.h"
 namespace mindspore {
-typedef std::shared_ptr<Delegate> (*DelegateCreator)(const std::shared_ptr<mindspore::DelegateConfig> &config);
+using mindspore::device::GraphExecutor;
+// TODO(zhaizhiqiang): Wrap graph executor as delegate.
+// typedef std::shared_ptr<GraphSinkDelegate> (*DelegateCreator)(const std::shared_ptr<Context> &);
+typedef std::shared_ptr<GraphExecutor> (*DelegateCreator)(const std::shared_ptr<Context> &);
 class MS_API DelegateRegistry {
 public:
  DelegateRegistry() = default;
@ -46,10 +47,10 @@ class MS_API DelegateRegistry {
    it->second[provider] = creator;
  }

-  std::shared_ptr<Delegate> GetDelegate(const mindspore::DeviceType &device_type, const std::string &provider,
-                                        const std::shared_ptr<mindspore::DelegateConfig> &config) {
+  std::shared_ptr<GraphExecutor> GetDelegate(const mindspore::DeviceType &device_type, const std::string &provider,
+                                             const std::shared_ptr<Context> &ctx) {
    // first find graph executor delegate
-    auto graph_executor_delegate = GraphExecutorRegistry::GetInstance().GetDelegate(device_type, provider, config);
+    auto graph_executor_delegate = DelegateRegistry::GetInstance().GetDelegate(device_type, provider, ctx);
    if (graph_executor_delegate != nullptr) {
      return graph_executor_delegate;
    }
@ -63,7 +64,7 @@ class MS_API DelegateRegistry {
    if (creator_it == it->second.end()) {
      return nullptr;
    }
-    return creator_it->second(config);
+    return creator_it->second(ctx);
  }

 private:
--- a/mindspore/lite/src/extendrt/delegate/graph_executor/delegate.cc
+++ b/mindspore/lite/src/extendrt/delegate/graph_executor/delegate.cc
@ -1,24 +0,0 @@
-/**
- * Copyright 2019-2021 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <memory>
-
-#include "extendrt/delegate/graph_executor/delegate.h"
-
-namespace mindspore {
-Status GraphExecutorDelegate::Init() { return kSuccess; }
-
-Status GraphExecutorDelegate::Build(DelegateModel<schema::Primitive> *model) { return kSuccess; }
-}  // namespace mindspore
--- a/mindspore/lite/src/extendrt/delegate/graph_executor/delegate.h
+++ b/mindspore/lite/src/extendrt/delegate/graph_executor/delegate.h
@ -1,47 +0,0 @@
-/**
- * Copyright 2019-2021 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_GRAPH_EXECUTOR_DELEGATE_H_
-#define MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_GRAPH_EXECUTOR_DELEGATE_H_
-
-#include <memory>
-
-#include "include/api/delegate.h"
-#include "extendrt/session/lite_graph_executor.h"
-
-namespace mindspore {
-class GraphExecutorDelegate : public Delegate {
- public:
-  GraphExecutorDelegate() = default;
-  explicit GraphExecutorDelegate(std::shared_ptr<mindspore::LiteGraphExecutor> graph_executor)
-      : graph_executor_(graph_executor) {}
-  virtual ~GraphExecutorDelegate() = default;
-
-  virtual Status Init();
-
-  virtual Status Build(DelegateModel<schema::Primitive> *model);
-
-  std::shared_ptr<mindspore::LiteGraphExecutor> GetGraphExecutor() { return graph_executor_; }
-
-  void SetGraphExecutor(std::shared_ptr<mindspore::LiteGraphExecutor> graph_executor) {
-    graph_executor_ = graph_executor;
-  }
-
- private:
-  std::shared_ptr<mindspore::LiteGraphExecutor> graph_executor_;
-};
-}  // namespace mindspore
-
-#endif  // MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_GRAPH_EXECUTOR_DELEGATE_H_
--- a/mindspore/lite/src/extendrt/delegate/graph_executor/factory.cc
+++ b/mindspore/lite/src/extendrt/delegate/graph_executor/factory.cc
@ -1,23 +0,0 @@
-/**
- * Copyright 2019-2021 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "extendrt/delegate/graph_executor/factory.h"
-
-namespace mindspore {
-GraphExecutorRegistry &GraphExecutorRegistry::GetInstance() {
-  static GraphExecutorRegistry instance;
-  return instance;
-}
-}  // namespace mindspore
--- a/mindspore/lite/src/extendrt/delegate/graph_executor/factory.h
+++ b/mindspore/lite/src/extendrt/delegate/graph_executor/factory.h
@ -1,93 +0,0 @@
-/**
- * Copyright 2019-2021 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_GRAPH_EXECUTOR_FACTORY_H_
-#define MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_GRAPH_EXECUTOR_FACTORY_H_
-
-#include <functional>
-#include <string>
-#include <memory>
-
-#include "extendrt/delegate/graph_executor/delegate.h"
-#include "extendrt/delegate/graph_executor/type.h"
-#include "utils/hash_map.h"
-
-namespace mindspore {
-typedef std::shared_ptr<LiteGraphExecutor> (*GraphExecutorCreator)(
-  const std::shared_ptr<mindspore::DelegateConfig> &config);
-class MS_API GraphExecutorRegistry {
- public:
-  GraphExecutorRegistry() = default;
-  virtual ~GraphExecutorRegistry() = default;
-
-  static GraphExecutorRegistry &GetInstance();
-
-  void RegGraphExecutor(const mindspore::DeviceType &device_type, const std::string &provider,
-                        GraphExecutorCreator creator) {
-    auto it = creator_map_.find(device_type);
-    if (it == creator_map_.end()) {
-      HashMap<std::string, GraphExecutorCreator> map;
-      map[provider] = creator;
-      creator_map_[device_type] = map;
-      return;
-    }
-    it->second[provider] = creator;
-  }
-
-  std::shared_ptr<LiteGraphExecutor> GetGraphExecutor(const mindspore::DeviceType &device_type,
-                                                      const std::string &provider,
-                                                      const std::shared_ptr<mindspore::DelegateConfig> &config) {
-    auto it = creator_map_.find(device_type);
-    if (it == creator_map_.end()) {
-      return nullptr;
-    }
-    auto creator_it = it->second.find(provider);
-    if (creator_it == it->second.end()) {
-      return nullptr;
-    }
-    return creator_it->second(config);
-  }
-
-  std::shared_ptr<GraphExecutorDelegate> GetDelegate(const mindspore::DeviceType &device_type,
-                                                     const std::string &provider,
-                                                     const std::shared_ptr<mindspore::DelegateConfig> &config) {
-    auto graph_executor = GetGraphExecutor(device_type, provider, config);
-    if (graph_executor == nullptr) {
-      return nullptr;
-    }
-
-    auto delegate = std::make_shared<mindspore::GraphExecutorDelegate>();
-    delegate->SetGraphExecutor(graph_executor);
-    return delegate;
-  }
-
- private:
-  mindspore::HashMap<DeviceType, mindspore::HashMap<std::string, GraphExecutorCreator>> creator_map_;
-};
-
-class GraphExecutorRegistrar {
- public:
-  GraphExecutorRegistrar(const mindspore::DeviceType &device_type, const std::string &provider,
-                         GraphExecutorCreator creator) {
-    GraphExecutorRegistry::GetInstance().RegGraphExecutor(device_type, provider, creator);
-  }
-  ~GraphExecutorRegistrar() = default;
-};
-
-#define REG_GRAPH_EXECUTOR(device_type, provider, creator) \
-  static GraphExecutorRegistrar g_##device_type##provider##GraphExecutor(device_type, provider, creator);
-}  // namespace mindspore
-
-#endif  // MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_GRAPH_EXECUTOR_FACTORY_H_
--- a/mindspore/lite/src/extendrt/delegate/graph_executor/litert/graph_executor.cc
+++ b/mindspore/lite/src/extendrt/delegate/graph_executor/litert/graph_executor.cc
@ -28,7 +28,7 @@

 #include "extendrt/delegate/graph_executor/litert/converters.h"
 #include "extendrt/delegate/graph_executor/litert/graph_executor.h"
-#include "extendrt/delegate/graph_executor/factory.h"
+#include "extendrt/delegate/factory.h"

 #include "tools/common/meta_graph_serializer.h"
 #include "extendrt/utils/tensor_utils.h"
@ -285,11 +285,9 @@ std::shared_ptr<lite::LiteSession> LiteRTGraphExecutor::CreateLiteSession(lite::
  return session;
 }

-static std::shared_ptr<LiteGraphExecutor> LiteRTGraphExecutorCreator(
-  const std::shared_ptr<mindspore::DelegateConfig> &config) {
-  MS_EXCEPTION_IF_NULL(config);
-  return std::make_shared<LiteRTGraphExecutor>(config->GetContext());
+static std::shared_ptr<device::GraphExecutor> LiteRTGraphExecutorCreator(const std::shared_ptr<Context> &ctx) {
+  return std::make_shared<LiteRTGraphExecutor>(ctx);
 }

-REG_GRAPH_EXECUTOR(kCPU, litert_provider, LiteRTGraphExecutorCreator);
+REG_DELEGATE(kCPU, litert_provider, LiteRTGraphExecutorCreator);
 }  // namespace mindspore
--- a/mindspore/lite/src/extendrt/delegate/graph_executor/type.h
+++ b/mindspore/lite/src/extendrt/delegate/graph_executor/type.h
@ -1,32 +0,0 @@
-/**
- * Copyright 2019-2021 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_LITE_EXTENDRT_DELEGATE_GRAPH_EXECUTOR_TYPE_H_
-#define MINDSPORE_LITE_EXTENDRT_DELEGATE_GRAPH_EXECUTOR_TYPE_H_
-
-#include <memory>
-#include <vector>
-
-#include "extendrt/delegate/type.h"
-
-namespace mindspore {
-class GraphExecutorConfig : public DelegateConfig {
- public:
-  GraphExecutorConfig() = default;
-  explicit GraphExecutorConfig(const std::shared_ptr<Context> &context) : DelegateConfig(context) {}
-  ~GraphExecutorConfig() = default;
-};
-}  // namespace mindspore
-#endif  // MINDSPORE_LITE_EXTENDRT_DELEGATE_GRAPH_EXECUTOR_TYPE_H_
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_graph_executor.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_graph_executor.cc
@ -24,7 +24,6 @@
 #include <utility>
 #include "ccsrc/kernel/kernel.h"
 #include "src/extendrt/delegate/delegate_utils.h"
-#include "src/extendrt/delegate/graph_executor/factory.h"
 #include "ccsrc/kernel/common_utils.h"
 #include "ccsrc/backend/common/optimizer/helper.h"
 #include "ccsrc/include/common/utils/convert_utils.h"
@ -705,13 +704,11 @@ std::vector<tensor::Tensor> TensorRTExecutor::GetOutputInfos(const FuncGraphPtr
  return tensors;
 }

-static std::shared_ptr<LiteGraphExecutor> TensorRTGraphExecutorCreator(
-  const std::shared_ptr<mindspore::DelegateConfig> &config) {
-  MS_EXCEPTION_IF_NULL(config);
-  auto executor = std::make_shared<TensorRTExecutor>(config->GetContext());
+static std::shared_ptr<device::GraphExecutor> TensorRTGraphExecutorCreator(const std::shared_ptr<Context> &ctx) {
+  auto executor = std::make_shared<TensorRTExecutor>(ctx);
  executor->Init();
  return executor;
 }

-REG_GRAPH_EXECUTOR(kGPU, tensorrt_provider, TensorRTGraphExecutorCreator);
+REG_DELEGATE(kGPU, tensorrt_provider, TensorRTGraphExecutorCreator);
 }  // namespace mindspore::lite
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_graph_executor.h
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_graph_executor.h
@ -22,7 +22,6 @@
 #include <memory>
 #include <set>
 #include <map>
-#include "include/api/delegate.h"
 #include "src/extendrt/delegate/tensorrt/tensorrt_subgraph.h"
 #include "src/extendrt/delegate/parameter_cache/embedding_cache_manager.h"
 #include "include/api/kernel.h"
@ -30,7 +29,7 @@
 #include "src/common/log_adapter.h"
 #include "include/api/context.h"
 #include "core/base/base.h"
-
+#include "extendrt/delegate/factory.h"
 #include "extendrt/session/lite_graph_executor.h"
 #include "ccsrc/backend/common/session/kernel_graph.h"

--- a/mindspore/lite/src/extendrt/delegate/type.h
+++ b/mindspore/lite/src/extendrt/delegate/type.h
@ -1,39 +0,0 @@
-/**
- * Copyright 2019-2021 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_LITE_EXTENDRT_DELEGATE_TYPE_H_
-#define MINDSPORE_LITE_EXTENDRT_DELEGATE_TYPE_H_
-
-#include <memory>
-#include <vector>
-
-#include "include/api/context.h"
-
-namespace mindspore {
-class DelegateConfig {
- public:
-  DelegateConfig() = default;
-  explicit DelegateConfig(const std::shared_ptr<Context> &context) : context_(context) {}
-  virtual ~DelegateConfig() = default;
-
-  // void SetContext(const std::shared_ptr<Context> &context) { context_ = context; }
-
-  const std::shared_ptr<Context> &GetContext() { return context_; }
-
- protected:
-  const std::shared_ptr<Context> context_;
-};
-}  // namespace mindspore
-#endif  // MINDSPORE_LITE_EXTENDRT_DELEGATE_TYPE_H_
--- a/mindspore/lite/src/extendrt/delegate_graph_executor.cc
+++ b/mindspore/lite/src/extendrt/delegate_graph_executor.cc
@ -0,0 +1,57 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "src/extendrt/delegate_graph_executor.h"
+#include <set>
+#include <memory>
+#include "src/extendrt/subgraph_kernel.h"
+#include "tools/common/func_graph_subgraph.h"
+#include "ops/fusion/partial_fusion.h"
+namespace mindspore {
+// Graph sink delegate, the whole FuncGraph as a node to execute.
+void GraphSinkDelegate::ReplaceNodes(const std::shared_ptr<FuncGraph> &graph) {
+  sink_graph_ = graph;
+  // replace the whole graph to a partial node.
+  lite::SubGraph helper(graph);
+  auto nodes = graph->order_list();
+  std::set<CNodePtr> cnodeset;
+  for (auto it = nodes.begin(); it != nodes.end(); it++) {
+    cnodeset.emplace(*it);
+  }
+  helper.Reset(cnodeset);
+  helper.ApplySubGraph();
+  return;
+}
+
+bool GraphSinkDelegate::IsDelegateNode(const std::shared_ptr<CNode> &node) {
+  auto partial_prim = std::make_shared<mindspore::ops::PartialFusion>();
+  if (!IsPrimitiveCNode(node, partial_prim->GetPrim())) {
+    return false;
+  }
+  auto graph = GetCNodeFuncGraph(node);
+  if (graph.get() == sink_graph_.get()) {
+    return true;
+  }
+  return false;
+}
+
+std::shared_ptr<kernel::KernelMod> GraphExecutorDelegate::CreateKernel(const std::shared_ptr<CNode> &node) {
+  if (!IsDelegateNode(node)) {
+    return nullptr;
+  }
+  auto kernel = std::make_shared<kernel::SubgraphKernel>(sink_graph_, executor_);
+  return kernel;
+}
+}  // namespace mindspore
--- a/mindspore/lite/src/extendrt/delegate_graph_executor.h
+++ b/mindspore/lite/src/extendrt/delegate_graph_executor.h
@ -0,0 +1,55 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_LITE_EXTENDRT_DELEGATE_GRAPH_EXECUTOR_H_
+#define MINDSPORE_LITE_EXTENDRT_DELEGATE_GRAPH_EXECUTOR_H_
+#include <vector>
+#include <memory>
+#include "include/api/delegate_api.h"
+#include "ir/func_graph.h"
+#include "ir/anf.h"
+#include "runtime/hardware/device_context.h"
+#include "tools/common/func_graph_subgraph.h"
+#include "kernel/kernel.h"
+namespace mindspore {
+// Graph sink delegate, the whole FuncGraph as a node to execute.
+class GraphSinkDelegate : public IDelegate<FuncGraph, CNode, kernel::KernelMod> {
+ public:
+  GraphSinkDelegate(const std::vector<mindspore::MSTensor> &inputs, const std::vector<mindspore::MSTensor> &outputs)
+      : IDelegate<FuncGraph, CNode, kernel::KernelMod>(inputs, outputs) {}
+  virtual ~GraphSinkDelegate() = default;
+  void ReplaceNodes(const std::shared_ptr<FuncGraph> &graph) override;
+
+  bool IsDelegateNode(const std::shared_ptr<CNode> &node) override;
+
+ protected:
+  FuncGraphPtr sink_graph_;
+};
+
+// wrap graph executor as delegate
+class GraphExecutorDelegate : public GraphSinkDelegate {
+ public:
+  explicit GraphExecutorDelegate(const std::vector<mindspore::MSTensor> &inputs,
+                                 const std::vector<mindspore::MSTensor> &outputs,
+                                 std::shared_ptr<device::GraphExecutor> executor)
+      : GraphSinkDelegate(inputs, outputs), executor_(executor) {}
+  virtual ~GraphExecutorDelegate() = default;
+  std::shared_ptr<kernel::KernelMod> CreateKernel(const std::shared_ptr<CNode> &node) override;
+
+ private:
+  const std::shared_ptr<device::GraphExecutor> executor_;
+};
+}  // namespace mindspore
+#endif
--- a/mindspore/lite/src/extendrt/gpu/gpu_infer.cc
+++ b/mindspore/lite/src/extendrt/gpu/gpu_infer.cc
@ -1,269 +0,0 @@
-/**
- * Copyright 2021 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include "extendrt/gpu/gpu_infer.h"
-#include <algorithm>
-#include "include/api/context.h"
-#include "extendrt/factory.h"
-#include "extendrt/cxx_api/akg_kernel_register.h"
-#include "utils/log_adapter.h"
-#include "mindspore/core/base/base_ref_utils.h"
-#include "backend/common/session/session_factory.h"
-#include "backend/common/session/executor_manager.h"
-#include "runtime/device/kernel_runtime_manager.h"
-#include "plugin/device/gpu/hal/device/cuda_driver.h"
-namespace mindspore {
-GPUInferExecutor::GPUInferExecutor()
-    : session_impl_(nullptr),
-      graph_id_(0),
-      device_id_(0),
-      inputs_info_(),
-      outputs_info_(),
-      input_names_(),
-      output_names_(),
-      init_flag_(false),
-      load_flag_(false),
-      set_device_id_flag_(false) {}
-
-Status GPUInferExecutor::InitEnv() {
-  if (init_flag_) {
-    MS_LOG(WARNING) << "Initialized again, return success.";
-    return kSuccess;
-  }
-
-  auto ms_context = MsContext::GetInstance();
-  if (ms_context == nullptr) {
-    MS_LOG(ERROR) << "Get Context failed!";
-    return kMCFailed;
-  }
-  ms_context->set_param<int>(MS_CTX_EXECUTION_MODE, kGraphMode);
-  ms_context->set_param<uint32_t>(MS_CTX_DEVICE_ID, device_id_);
-  ms_context->set_param<std::string>(MS_CTX_DEVICE_TARGET, kGPUDevice);
-
-  // Set device id for sync data to host as cudaSetDevice is thread level config.
-  bool ret = device::gpu::CudaDriver::SetDevice(UintToInt(device_id_));
-  if (!ret) {
-    MS_LOG(ERROR) << "Failed to set device id:" << device_id_;
-    return kMCDeviceError;
-  }
-
-  auto &device_infos = graph_context_->MutableDeviceInfo();
-  if (device_infos.size() != 1) {
-    return kMCDeviceError;
-  }
-  auto gpu_info = device_infos[0]->Cast<GPUDeviceInfo>();
-  if (gpu_info == nullptr) {
-    return kMCDeviceError;
-  }
-  ms_context->set_param<bool>(MS_CTX_ENABLE_INFER_OPT, true);
-  ms_context->set_param<std::string>(MS_CTX_INFER_PRECISION_MODE, gpu_info->GetPrecisionMode());
-
-  session_impl_ = session::SessionFactory::Get().Create(kGpuInferenceDevice);
-  if (session_impl_ == nullptr) {
-    MS_LOG(ERROR) << "Session create failed!, please make sure target device:" << kGpuInferenceDevice
-                  << " is available.";
-    return kMCFailed;
-  }
-
-  session_impl_->Init(device_id_);
-  init_flag_ = true;
-  return kSuccess;
-}
-
-Status GPUInferExecutor::FinalizeEnv() {
-  if (!init_flag_) {
-    MS_LOG(WARNING) << "Never initialize before, return success";
-    return kSuccess;
-  }
-
-  MS_LOG_INFO << "Start finalize env";
-  session::ExecutorManager::Instance().Clear();
-  device::KernelRuntimeManager::Instance().ClearRuntimeResource();
-
-  init_flag_ = false;
-  MS_LOG(INFO) << "End finalize env";
-  return kSuccess;
-}
-
-Status GPUInferExecutor::Load(uint32_t device_id) {
-  // check graph type
-  if (graph_->ModelType() != ModelType::kMindIR) {
-    MS_LOG(ERROR) << "Unsupported model type " << graph_->ModelType();
-    return kMCInvalidInput;
-  }
-
-  const auto &graph_data = GraphImpl::MutableGraphData();
-  MS_EXCEPTION_IF_NULL(graph_data);
-  auto func_graph = graph_data->GetFuncGraph();
-
-  // init
-  device_id_ = device_id;
-  Status ret = InitEnv();
-  if (ret != kSuccess) {
-    MS_LOG(ERROR) << "InitEnv failed.";
-    return kMCDeviceError;
-  }
-
-  ret = CompileGraph(func_graph);
-  if (ret != kSuccess) {
-    MS_LOG(ERROR) << "Compile graph model failed";
-    return kMCFailed;
-  }
-  MS_EXCEPTION_IF_NULL(session_impl_);
-  session_impl_->GetModelInputsInfo(graph_id_, &inputs_info_, &input_names_);
-  session_impl_->GetModelOutputsInfo(graph_id_, &outputs_info_, &output_names_);
-  if (inputs_info_.empty() || inputs_info_.size() != input_names_.size()) {
-    MS_LOG_ERROR << "Get model inputs info failed";
-    return kMCInvalidInput;
-  }
-  if (outputs_info_.empty() || outputs_info_.size() != output_names_.size()) {
-    MS_LOG_ERROR << "Get model outputs info failed";
-    return kMCInvalidInput;
-  }
-  load_flag_ = true;
-  return kSuccess;
-}
-
-Status GPUInferExecutor::ExecuteModel(const std::vector<MSTensor> &request, std::vector<MSTensor> *reply) {
-  MS_EXCEPTION_IF_NULL(reply);
-
-  vector<tensor::TensorPtr> inputs;
-  for (size_t i = 0; i < request.size(); i++) {
-    auto &item = request[i];
-    auto input = inputs_info_[i];
-    if (input->Size() != item.DataSize()) {
-      MS_LOG(ERROR) << "Input " << i << " data size " << item.DataSize() << " not match model input data size "
-                    << input->Size();
-      return kMCInvalidInput;
-    }
-    auto ret = memcpy_s(input->data_c(), input->Size(), item.Data().get(), item.DataSize());
-    if (ret != kSuccess) {
-      MS_LOG(ERROR) << "Tensor copy failed";
-      return kMCFailed;
-    }
-    inputs.push_back(input);
-  }
-  last_inputs_ = inputs;
-  std::vector<tensor::TensorPtr> outputs = RunGraph(inputs);
-  if (outputs.empty()) {
-    MS_LOG(ERROR) << "Execute Model Failed";
-    return kMCFailed;
-  }
-  last_outputs_ = outputs;
-  reply->clear();
-  *reply = GetOutputs();
-  return kSuccess;
-}
-
-Status GPUInferExecutor::Run(const std::vector<MSTensor> &inputs, std::vector<MSTensor> *outputs) {
-  MS_EXCEPTION_IF_NULL(outputs);
-  if (!load_flag_) {
-    Status ret = Load(device_id_);
-    if (ret != kSuccess) {
-      MS_LOG(ERROR) << "PrepareModel failed.";
-      return ret;
-    }
-  }
-
-  // The `Load()` and `Run()` running in two threads. `Run()` always running in same thread.
-  // It should set device id once.
-  if (!set_device_id_flag_) {
-    bool ret = device::gpu::CudaDriver::SetDevice(UintToInt(device_id_));
-    if (!ret) {
-      MS_LOG(ERROR) << "Failed to set device id:" << device_id_;
-      return kMCDeviceError;
-    }
-    set_device_id_flag_ = true;
-  }
-
-  if (inputs.size() != inputs_info_.size()) {
-    MS_LOG(ERROR) << "inputs count not match, required count " << inputs_info_.size() << ", given count "
-                  << inputs.size();
-    return kMCInvalidInput;
-  }
-
-  for (size_t i = 0; i < inputs_info_.size(); ++i) {
-    if (inputs[i].DataSize() != inputs_info_[i]->Size()) {
-      MS_LOG(ERROR) << "input " << i << " data size not match, required size " << inputs_info_[i]->Size()
-                    << ", given count " << inputs[i].DataSize();
-      return kMCInvalidInput;
-    }
-  }
-  if (ExecuteModel(inputs, outputs) != kSuccess) {
-    MS_LOG(ERROR) << "Execute Model Failed";
-    return kMCFailed;
-  }
-  if (outputs_info_.size() != outputs->size()) {
-    MS_LOG(ERROR) << "Predict output size " << outputs->size() << " not match output size got from model info "
-                  << outputs_info_.size();
-    return kMCFailed;
-  }
-
-  return kSuccess;
-}
-
-std::vector<MSTensor> GPUInferExecutor::GetInputs() {
-  if (!load_flag_) {
-    Status ret = Load(device_id_);
-    if (ret != kSuccess) {
-      MS_LOG(ERROR) << "PrepareModel failed.";
-      return {};
-    }
-  }
-
-  std::vector<MSTensor> result(inputs_info_.size());
-  for (size_t i = 0; i < inputs_info_.size(); ++i) {
-    auto &tensor = inputs_info_[i];
-    void *data = nullptr;
-    size_t data_size = tensor->Size();
-    if (i < last_inputs_.size()) {
-      data = last_inputs_[i]->data_c();
-      data_size = last_inputs_[i]->Size();
-    }
-    result[i] =
-      MSTensor(input_names_[i], static_cast<enum DataType>(tensor->data_type()), tensor->shape(), data, data_size);
-  }
-  return result;
-}
-
-std::vector<MSTensor> GPUInferExecutor::GetOutputs() {
-  if (!load_flag_) {
-    Status ret = Load(device_id_);
-    if (ret != kSuccess) {
-      MS_LOG(ERROR) << "PrepareModel failed.";
-      return {};
-    }
-  }
-
-  std::vector<MSTensor> result(outputs_info_.size());
-  for (size_t i = 0; i < outputs_info_.size(); ++i) {
-    auto &tensor = outputs_info_[i];
-    void *data = nullptr;
-    size_t data_size = tensor->Size();
-    if (i < last_outputs_.size()) {
-      if (last_outputs_[i]->NeedSyncDeviceToHost()) {
-        last_outputs_[i]->data_sync(false);
-      }
-      data = last_outputs_[i]->data_c();
-      data_size = last_outputs_[i]->Size();
-    }
-    result[i] =
-      MSTensor(output_names_[i], static_cast<enum DataType>(tensor->data_type()), tensor->shape(), data, data_size);
-  }
-  return result;
-}
-
-bool GPUInferExecutor::CheckDeviceSupport(mindspore::DeviceType device_type) { return device_type == kGPU; }
-}  // namespace mindspore
--- a/mindspore/lite/src/extendrt/gpu/gpu_infer.h
+++ b/mindspore/lite/src/extendrt/gpu/gpu_infer.h
@ -1,59 +0,0 @@
-/**
- * Copyright 2021 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_LITE_SRC_EXTENDRT_GPU_INFER_EXECUTOR_H_
-#define MINDSPORE_LITE_SRC_EXTENDRT_GPU_INFER_EXECUTOR_H_
-#include <string>
-#include <vector>
-#include <utility>
-#include <memory>
-#include "include/api/status.h"
-#include "include/api/graph.h"
-#include "ir/anf.h"
-#include "extendrt/graph_executor.h"
-namespace mindspore {
-class GPUInferExecutor : public GraphExecutor {
- public:
-  GPUInferSession();
-  ~GPUInferSession() override = default;
-  Status Execute(const ExecutePlan &plan, const std::vector<MSTensor> &inputs, std::vector<MSTensor> *outputs) override;
-
- protected:
-  bool CheckDeviceSupport(mindspore::DeviceType device_type) override;
-  Status Load(uint32_t device_id);
-  Status InitEnv();
-  Status FinalizeEnv();
-  Status CheckModelInputs(const std::vector<tensor::TensorPtr> &inputs) const;
-
- private:
-  uint32_t graph_id_;
-  std::string device_type_;
-  uint32_t device_id_;
-  std::vector<tensor::TensorPtr> inputs_info_;
-  std::vector<tensor::TensorPtr> outputs_info_;
-  std::vector<tensor::TensorPtr> last_inputs_;
-  std::vector<tensor::TensorPtr> last_outputs_;
-  std::vector<std::string> input_names_;
-  std::vector<std::string> output_names_;
-  bool init_flag_;
-  bool load_flag_;
-  bool set_device_id_flag_;
-
-  // tensor-rt
-  uint32_t batch_size_;
-  uint32_t workspace_size_;
-};
-}  // namespace mindspore
-#endif  // MINDSPORE_LITE_SRC_EXTENDRT_GPU_INFER_EXECUTOR_H_
--- a/mindspore/lite/src/extendrt/graph_compiler.cc
+++ b/mindspore/lite/src/extendrt/graph_compiler.cc
@ -28,7 +28,7 @@ ExcutionPlan GraphCompiler::Compile(FuncGraphPtr func_graph) {
  }

  CompileResult result = LinkSegments();
-  return scheduler.Schedule(result);
+  return Schedule(result);
 }
 GraphId GraphCompiler::CompileSegment(const GraphSegmentPtr &segment) {
  // Generate kernel graph.
--- a/mindspore/lite/src/extendrt/graph_compiler.h
+++ b/mindspore/lite/src/extendrt/graph_compiler.h
@ -13,8 +13,8 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-#ifndef MINDSPORE_LITE_EXTENDRT_GRAPH_COMPILER_H
-#define MINDSPORE_LITE_EXTENDRT_GRAPH_COMPILER_H
+#ifndef MINDSPORE_LITE_EXTENDRT_GRAPH_COMPILER_H_
+#define MINDSPORE_LITE_EXTENDRT_GRAPH_COMPILER_H_
 #include <string>
 #include <memory>
 #include <map>
@ -34,10 +34,10 @@ struct CompileResult {
  GraphId root_;
  std::vector<GraphId> control_nodes_;
  std::vector<KernelGraphPtr> graphs_;
-  // dependency ?
 };
 struct KernelInfo {
  KernelPtr kernel_;
+  bool isSubgraphKernel;
  std::vector<int64_t> inputs_;
  std::vector<int64_t> outputs_;
 };
--- a/mindspore/lite/src/extendrt/graph_executor.h
+++ b/mindspore/lite/src/extendrt/graph_executor.h
@ -1,39 +0,0 @@
-/**
- * Copyright 2019-2021 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_LITE_EXTENDRT_GRAPH_EXECUTOR_H
-#define MINDSPORE_LITE_EXTENDRT_GRAPH_EXECUTOR_H
-#include <string>
-#include <memory>
-#include <map>
-#include <vector>
-#include "include/api/context.h"
-#include "include/api/model.h"
-#include "include/api/graph.h"
-#include "include/api/status.h"
-#include "include/common/utils/utils.h"
-#include "ir/func_graph.h"
-
-namespace mindspore {
-namespace infer {
-class GraphExecutor : public std::enable_shared_from_this<GraphExecutor> {
- public:
-  GraphExecutor() = default;
-  GraphExecutor(const std::string &device_name, uint32_t device_id);
-  Status Execute(const ExecutePlan &plan, const std::vector<MSTensor> &inputs, std::vector<MSTensor> *outputs);
-};
-}  // namespace infer
-}  // namespace mindspore
-#endif
--- a/mindspore/lite/src/extendrt/infer_session.cc
+++ b/mindspore/lite/src/extendrt/infer_session.cc
@ -15,7 +15,7 @@
 */
 #include "extendrt/infer_session.h"

-#include "extendrt/single_op_session.h"
+#include "extendrt/session/single_op_session.h"
 #include "plugin/factory/ms_factory.h"
 #include "kernel/common_utils.h"
 // #include "backend/common/session/session_basic.h"
@ -25,7 +25,6 @@
 #include "include/common/utils/anfalgo.h"
 #include "backend/common/session/anf_runtime_algorithm.h"
 #include "extendrt/delegate/factory.h"
-#include "extendrt/delegate/graph_executor/factory.h"
 #include "extendrt/session/factory.h"
 #include "extendrt/delegate/plugin/tensorrt_executor_plugin.h"

@ -33,19 +32,17 @@ namespace mindspore {
 static const std::vector<PrimitivePtr> ms_infer_cut_list = {prim::kPrimReturn,   prim::kPrimPartial,
                                                            prim::kPrimSwitch,   prim::kPrimMakeTuple,
                                                            prim::kPrimBpropCut, prim::kPrimSwitchLayer};
-static bool is_infer_single_op = false;
-static bool is_use_lite_session = true;
+static bool is_infer_single_op = true;
+static bool is_use_lite_session = false;
+// static bool is_use_tensorrt_delegate = true;

 class DefaultInferSession : public InferSession {
 public:
-  DefaultInferSession() = default;
+  explicit DefaultInferSession(const std::shared_ptr<Context> &context) {}
  virtual ~DefaultInferSession() = default;
-  Status Init(const std::shared_ptr<Context> context) override;
+  Status Init(const std::shared_ptr<Context> &context) override;
  Status CompileGraph(FuncGraphPtr graph, const void *data = nullptr, size_t size = 0) override;
-  Status RunGraph() override;
  Status RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) override;
-  Status Resize(const std::vector<tensor::Tensor> &inputs, const std::vector<std::vector<int64_t>> &dims) override;
-
  std::vector<MutableTensorImplPtr> GetOutputs() override;
  std::vector<MutableTensorImplPtr> GetInputs() override;
  std::vector<std::string> GetOutputNames() override;
@ -59,7 +56,7 @@ class DefaultInferSession : public InferSession {
  std::vector<KernelGraphPtr> kernel_graphs_;
 };

-Status DefaultInferSession::Init(const std::shared_ptr<Context> context) {
+Status DefaultInferSession::Init(const std::shared_ptr<Context> &context) {
  MS_LOG(INFO) << "DefaultInferSession::Init";
  kernel_graph_utils_ = std::make_shared<mindspore::KernelGraphUtils>();
  partition_ = std::make_shared<compile::GraphPartition>(ms_infer_cut_list, "ms");
@ -70,25 +67,20 @@ Status DefaultInferSession::CompileGraph(FuncGraphPtr graph, const void *data, s
  return kSuccess;
 }

-Status DefaultInferSession::RunGraph() { return kSuccess; }
 Status DefaultInferSession::RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) {
  return kSuccess;
 }
-Status DefaultInferSession::Resize(const std::vector<tensor::Tensor> &inputs,
-                                   const std::vector<std::vector<int64_t>> &dims) {
-  return kSuccess;
-}
 std::vector<MutableTensorImplPtr> DefaultInferSession::GetOutputs() { return {}; }
 std::vector<MutableTensorImplPtr> DefaultInferSession::GetInputs() { return {}; }
 std::vector<std::string> DefaultInferSession::GetOutputNames() { return std::vector<std::string>(); }
 std::vector<std::string> DefaultInferSession::GetInputNames() { return std::vector<std::string>(); }
 MutableTensorImplPtr DefaultInferSession::GetOutputByTensorName(const std::string &tensorName) { return nullptr; }
 MutableTensorImplPtr DefaultInferSession::GetInputByTensorName(const std::string &name) { return nullptr; }
-std::shared_ptr<InferSession> InferSession::CreateSession(const std::shared_ptr<Context> context) {
+std::shared_ptr<InferSession> InferSession::CreateSession(const std::shared_ptr<Context> &context) {
  HandleGPUContext(context);
-  auto config = SelectSessionArg(context);
-  MS_LOG(DEBUG) << "Session type " << static_cast<int64_t>(config.type_);
-  return SessionRegistry::GetInstance().GetSession(config.type_, config);
+  auto session_type = SelectSession(context);
+  MS_LOG(DEBUG) << "Session type " << static_cast<int64_t>(session_type);
+  return SessionRegistry::GetInstance().GetSession(session_type, context);
 }

 void InferSession::HandleGPUContext(const std::shared_ptr<Context> &context) {
@ -116,50 +108,34 @@ void InferSession::HandleGPUContext(const std::shared_ptr<Context> &context) {
  }
 }

-SessionConfig InferSession::SelectSessionArg(const std::shared_ptr<Context> &context) {
-  SessionConfig config;
-  config.context_ = context;
+SessionType InferSession::SelectSession(const std::shared_ptr<Context> &context) {
  if (context != nullptr) {
-    if (context->GetDelegate() != nullptr) {
-      config.delegates_.emplace_back(context->GetDelegate());
-    }
-    auto delegate_config = std::make_shared<mindspore::DelegateConfig>(context);
    auto &device_contexts = context->MutableDeviceInfo();
    for (auto device_context : device_contexts) {
      MS_EXCEPTION_IF_NULL(device_context);
      if (device_context->GetDeviceType() == kAscend) {
-        config.type_ = kSingleOpSession;
-        return config;
+        return kSingleOpSession;
      }
-      // get graph executor delegate
-      auto delegate = mindspore::DelegateRegistry::GetInstance().GetDelegate(
-        device_context->GetDeviceType(), device_context->GetProvider(), delegate_config);
-      if (delegate == nullptr) {
-        continue;
+      // if (device_context->GetDeviceType() == kGPU && is_use_tensorrt_delegate) {
+      if (device_context->GetDeviceType() == kGPU) {
+        return kDelegateSession;
      }
-      config.delegates_.emplace_back(delegate);
    }
  }

-  if (!config.delegates_.empty()) {
-    // create delegate session object
-    config.type_ = kDelegateSession;
-    return config;
-  }
  if (is_infer_single_op) {
-    config.type_ = kSingleOpSession;
-    return config;
+    return kSingleOpSession;
  }
  if (is_use_lite_session) {
-    config.type_ = kLiteInferSession;
-    return config;
+    return kLiteInferSession;
  }
-  config.type_ = kDefaultSession;
-  return config;
+  return kDefaultSession;
 }

-static std::shared_ptr<InferSession> DefaultSessionCreator(const SessionConfig &config) {
-  return std::make_shared<DefaultInferSession>();
+static std::shared_ptr<InferSession> DefaultSessionCreator(const std::shared_ptr<Context> &ctx) {
+  auto session = std::make_shared<DefaultInferSession>(ctx);
+  session->Init(ctx);
+  return session;
 }
 REG_SESSION(kDefaultSession, DefaultSessionCreator);
 }  // namespace mindspore
--- a/mindspore/lite/src/extendrt/infer_session.h
+++ b/mindspore/lite/src/extendrt/infer_session.h
@ -34,13 +34,14 @@ namespace mindspore {
 class InferSession : public std::enable_shared_from_this<InferSession> {
 public:
  virtual ~InferSession() = default;
-  static std::shared_ptr<InferSession> CreateSession(const std::shared_ptr<Context> context);
-  static SessionConfig SelectSessionArg(const std::shared_ptr<Context> &context);
-  virtual Status Init(const std::shared_ptr<Context> context) = 0;
+  static std::shared_ptr<InferSession> CreateSession(const std::shared_ptr<Context> &context);
+  static SessionType SelectSession(const std::shared_ptr<Context> &context);
+  virtual Status Init(const std::shared_ptr<Context> &context) = 0;
  virtual Status CompileGraph(FuncGraphPtr graph, const void *data = nullptr, size_t size = 0) = 0;
-  virtual Status RunGraph() = 0;
  virtual Status RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) = 0;
-  virtual Status Resize(const std::vector<tensor::Tensor> &inputs, const std::vector<std::vector<int64_t>> &dims) = 0;
+  virtual Status Resize(const std::vector<tensor::Tensor> &inputs, const std::vector<std::vector<int64_t>> &dims) {
+    return kSuccess;
+  }

  virtual std::vector<MutableTensorImplPtr> GetOutputs() = 0;
  virtual std::vector<MutableTensorImplPtr> GetInputs() = 0;
@ -53,6 +54,6 @@ class InferSession : public std::enable_shared_from_this<InferSession> {
  FuncGraphPtr graph_;
  compile::GraphPartitionPtr partition_;
  static void HandleGPUContext(const std::shared_ptr<Context> &context);
-};
+};  // namespace mindspore
 }  // namespace mindspore
 #endif
--- a/mindspore/lite/src/extendrt/session/delegate_session.cc
+++ b/mindspore/lite/src/extendrt/session/delegate_session.cc
@ -14,49 +14,58 @@
 * limitations under the License.
 */

+#include "extendrt/session/delegate_session.h"
 #include <vector>
 #include <string>
 #include <memory>
-
-#include "extendrt/session/delegate_session.h"
-#include "extendrt/session/graph_executor_session.h"
+#include "extendrt/utils/tensor_utils.h"
+#include "src/extendrt/utils/kernel_build_utils.h"
+#include "extendrt/delegate/factory.h"
 #include "extendrt/session/factory.h"
-#include "extendrt/delegate/graph_executor/delegate.h"
-
 namespace mindspore {
-Status DelegateSession::Init(const std::shared_ptr<Context> context) { return kSuccess; }
-Status DelegateSession::CompileGraph(FuncGraphPtr graph, const void *data, size_t size) { return kSuccess; }
-
-Status DelegateSession::RunGraph() { return kSuccess; }
-Status DelegateSession::RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) {
+Status GraphSinkSession::Init(const std::shared_ptr<Context> &context) {
+  MS_LOG(INFO) << "GraphSinkSession::Init";
+  kernel_graph_utils_ = std::make_shared<mindspore::KernelGraphUtils>();
  return kSuccess;
 }
-Status DelegateSession::Resize(const std::vector<tensor::Tensor> &inputs,
-                               const std::vector<std::vector<int64_t>> &dims) {
+Status GraphSinkSession::CompileGraph(FuncGraphPtr graph, const void *data, size_t size) {
+  MS_LOG(INFO) << "GraphSinkSession::CompileGraph";
+  std::vector<KernelGraphPtr> all_out_graph;
+  kernel_graph_ = kernel_graph_utils_->ConstructKernelGraph(graph, &all_out_graph, mindspore::device::DeviceType::kCPU);
+  MS_EXCEPTION_IF_NULL(kernel_graph_);
+  auto &kernel_nodes = kernel_graph_->execution_order();
+  for (const auto &kernel_node : kernel_nodes) {
+    mindspore::infer::SetKernelInfo(kernel_node);
+  }
+  if (graph_executor_->CompileGraph(kernel_graph_, options_)) {
+    kernel_graph_utils_->GetModelInputsInfo(kernel_graph_->graph_id(), &inputs_, &input_names_);
+    kernel_graph_utils_->GetModelOutputsInfo(kernel_graph_->graph_id(), &outputs_, &output_names_);
+    return kSuccess;
+  }
+  return kCoreFailed;
+}
+
+Status GraphSinkSession::RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) {
  return kSuccess;
 }
-std::vector<MutableTensorImplPtr> DelegateSession::GetOutputs() { return {}; }
-std::vector<MutableTensorImplPtr> DelegateSession::GetInputs() { return {}; }
-std::vector<std::string> DelegateSession::GetOutputNames() { return std::vector<std::string>(); }
-std::vector<std::string> DelegateSession::GetInputNames() { return std::vector<std::string>(); }
-MutableTensorImplPtr DelegateSession::GetOutputByTensorName(const std::string &tensorName) { return nullptr; }
-MutableTensorImplPtr DelegateSession::GetInputByTensorName(const std::string &name) { return nullptr; }
-
-static std::shared_ptr<InferSession> DelegateSessionCreator(const SessionConfig &config) {
-  auto delegates = config.delegates_;
-  if (delegates.size() > 1) {
-    MS_LOG(ERROR) << "Not support multi delegates context";
+std::vector<MutableTensorImplPtr> GraphSinkSession::GetOutputs() { return {}; }
+std::vector<MutableTensorImplPtr> GraphSinkSession::GetInputs() { return {}; }
+std::vector<std::string> GraphSinkSession::GetOutputNames() { return std::vector<std::string>(); }
+std::vector<std::string> GraphSinkSession::GetInputNames() { return std::vector<std::string>(); }
+MutableTensorImplPtr GraphSinkSession::GetOutputByTensorName(const std::string &tensorName) { return nullptr; }
+MutableTensorImplPtr GraphSinkSession::GetInputByTensorName(const std::string &name) { return nullptr; }
+static std::shared_ptr<InferSession> DelegateSessionCreator(const std::shared_ptr<Context> &ctx) {
+  auto &device_contexts = ctx->MutableDeviceInfo();
+  if (device_contexts.empty()) {
    return nullptr;
  }
-  auto delegate = delegates.front();
+  auto device_type = device_contexts.at(0)->GetDeviceType();
+  auto provider = device_contexts.at(0)->GetProvider();

-  auto graph_executor_delegate = std::reinterpret_pointer_cast<GraphExecutorDelegate>(delegate);
-  if (graph_executor_delegate != nullptr) {
-    return std::make_shared<GraphExecutorSession>(graph_executor_delegate->GetGraphExecutor());
-  }
-
-  return std::make_shared<DelegateSession>(delegate);
+  auto delegate = DelegateRegistry::GetInstance().GetDelegate(device_type, provider, ctx);
+  auto session = std::make_shared<GraphSinkSession>(delegate);
+  session->Init(ctx);
+  return session;
 }
-
 REG_SESSION(kDelegateSession, DelegateSessionCreator);
 }  // namespace mindspore
--- a/mindspore/lite/src/extendrt/session/delegate_session.h
+++ b/mindspore/lite/src/extendrt/session/delegate_session.h
@ -17,24 +17,25 @@
 #define MINDSPORE_LITE_EXTENDRT_SESSION_DELEGATE_SESSION_H_

 #include <vector>
-#include <memory>
 #include <string>
+#include <memory>
+#include <map>

 #include "extendrt/infer_session.h"
-
+#include "runtime/hardware/device_context.h"
+#include "extendrt/utils/kernel_graph_utils.h"
 namespace mindspore {
-class DelegateSession : public InferSession {
+// TODO(zhaizhiqiang): use GraphSinkDelegateSession instead of GraphSinkSession in future.
+// class GraphSinkDelegateSession
+class GraphSinkSession : public InferSession {
 public:
-  DelegateSession() = default;
-  explicit DelegateSession(std::shared_ptr<mindspore::Delegate> delegate) : delegate_(delegate) {}
-  virtual ~DelegateSession() = default;
+  GraphSinkSession() = default;
+  explicit GraphSinkSession(const std::shared_ptr<device::GraphExecutor> &executor) : graph_executor_(executor) {}
+  virtual ~GraphSinkSession() = default;

-  Status Init(const std::shared_ptr<Context> context) override;
+  Status Init(const std::shared_ptr<Context> &context) override;
  Status CompileGraph(FuncGraphPtr graph, const void *data = nullptr, size_t size = 0) override;
-  Status RunGraph() override;
  Status RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) override;
-  Status Resize(const std::vector<tensor::Tensor> &inputs, const std::vector<std::vector<int64_t>> &dims) override;
-
  std::vector<MutableTensorImplPtr> GetOutputs() override;
  std::vector<MutableTensorImplPtr> GetInputs() override;
  std::vector<std::string> GetOutputNames() override;
@ -43,7 +44,14 @@ class DelegateSession : public InferSession {
  MutableTensorImplPtr GetInputByTensorName(const std::string &name) override;

 private:
-  std::shared_ptr<mindspore::Delegate> delegate_;
+  std::shared_ptr<device::GraphExecutor> graph_executor_;
+  std::map<string, string> options_;
+  KernelGraphUtilsPtr kernel_graph_utils_;
+  KernelGraphPtr kernel_graph_;
+  std::vector<tensor::TensorPtr> inputs_;
+  std::vector<std::string> input_names_;
+  std::vector<tensor::TensorPtr> outputs_;
+  std::vector<std::string> output_names_;
 };
 }  // namespace mindspore

--- a/mindspore/lite/src/extendrt/session/factory.cc
+++ b/mindspore/lite/src/extendrt/session/factory.cc
@ -26,17 +26,18 @@ SessionRegistry &SessionRegistry::GetInstance() {
  return instance;
 }

-void SessionRegistry::RegSession(const mindspore::SessionType &session_type,
-                                 std::function<std::shared_ptr<InferSession>(const SessionConfig &)> creator) {
+void SessionRegistry::RegSession(
+  const mindspore::SessionType &session_type,
+  const std::function<std::shared_ptr<InferSession>(const std::shared_ptr<Context> &)> &creator) {
  session_map_[session_type] = creator;
 }

 std::shared_ptr<InferSession> SessionRegistry::GetSession(const mindspore::SessionType &session_type,
-                                                          const SessionConfig &config) {
+                                                          const std::shared_ptr<Context> &ctx) {
  auto it = session_map_.find(session_type);
  if (it == session_map_.end()) {
    return nullptr;
  }
-  return it->second(config);
+  return it->second(ctx);
 }
 }  // namespace mindspore
--- a/mindspore/lite/src/extendrt/session/factory.h
+++ b/mindspore/lite/src/extendrt/session/factory.h
@ -21,7 +21,7 @@

 #include "extendrt/session/type.h"
 #include "extendrt/infer_session.h"
-
+#include "include/api/context.h"
 namespace mindspore {
 class SessionRegistry {
 public:
@ -31,18 +31,20 @@ class SessionRegistry {
  static SessionRegistry &GetInstance();

  void RegSession(const mindspore::SessionType &session_type,
-                  std::function<std::shared_ptr<InferSession>(const SessionConfig &)> creator);
+                  const std::function<std::shared_ptr<InferSession>(const std::shared_ptr<Context> &)> &creator);

-  std::shared_ptr<InferSession> GetSession(const mindspore::SessionType &session_type, const SessionConfig &config);
+  std::shared_ptr<InferSession> GetSession(const mindspore::SessionType &session_type,
+                                           const std::shared_ptr<Context> &);

 private:
-  mindspore::HashMap<SessionType, std::function<std::shared_ptr<InferSession>(const SessionConfig &)>> session_map_;
+  mindspore::HashMap<SessionType, std::function<std::shared_ptr<InferSession>(const std::shared_ptr<Context> &)>>
+    session_map_;
 };

 class SessionRegistrar {
 public:
  SessionRegistrar(const mindspore::SessionType &session_type,
-                   std::function<std::shared_ptr<InferSession>(const SessionConfig &)> creator) {
+                   const std::function<std::shared_ptr<InferSession>(const std::shared_ptr<Context> &)> &creator) {
    SessionRegistry::GetInstance().RegSession(session_type, creator);
  }
  ~SessionRegistrar() = default;
--- a/mindspore/lite/src/extendrt/session/graph_executor_session.cc
+++ b/mindspore/lite/src/extendrt/session/graph_executor_session.cc
@ -1,187 +0,0 @@
-/**
- * Copyright 2019-2021 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <vector>
-#include <string>
-#include <memory>
-
-#include "extendrt/session/graph_executor_session.h"
-#include "src/extendrt/utils/kernel_build_utils.h"
-#include "extendrt/utils/tensor_default_impl.h"
-#include "extendrt/utils/tensor_utils.h"
-#include "extendrt/delegate/graph_executor/litert/graph_executor.h"
-
-namespace mindspore {
-Status GraphExecutorSession::Init(const std::shared_ptr<Context> context) {
-  MS_LOG(INFO) << "GraphExecutorSession::Init";
-  kernel_graph_utils_ = std::make_shared<mindspore::KernelGraphUtils>();
-  return kSuccess;
-}
-
-Status GraphExecutorSession::CompileGraph(FuncGraphPtr graph, const void *data, size_t size) {
-  MS_LOG(INFO) << "GraphExecutorSession::CompileGraph";
-  func_graph_ = graph;
-  std::vector<KernelGraphPtr> all_out_graph;
-  kernel_graph_ = kernel_graph_utils_->ConstructKernelGraph(graph, &all_out_graph, mindspore::device::DeviceType::kCPU);
-  MS_EXCEPTION_IF_NULL(kernel_graph_);
-  auto &kernel_nodes = kernel_graph_->execution_order();
-  for (const auto &kernel_node : kernel_nodes) {
-    mindspore::infer::SetKernelInfo(kernel_node);
-  }
-  bool ret = true;
-  if (is_use_kernel_graph_) {
-    if (!graph_executor_->CompileGraph(kernel_graph_, options_)) {
-      is_use_kernel_graph_ = false;
-      ret = graph_executor_->CompileGraph(func_graph_, options_);
-    }
-  } else {
-    ret = graph_executor_->CompileGraph(func_graph_, options_);
-  }
-  if (!ret) {
-    MS_LOG(ERROR) << "GraphExecutorSession::CompileGraph compile graph failed";
-    return kCoreFailed;
-  }
-  return InitGraphInputsOutputs();
-}
-
-Status GraphExecutorSession::InitGraphInputsOutputs() {
-  std::vector<tensor::TensorPtr> graph_inputs, graph_outputs;
-  kernel_graph_utils_->GetModelInputsInfo(kernel_graph_->graph_id(), &graph_inputs, &input_names_);
-  kernel_graph_utils_->GetModelOutputsInfo(kernel_graph_->graph_id(), &graph_outputs, &output_names_);
-  if (graph_inputs.size() != input_names_.size()) {
-    MS_LOG(ERROR) << "Graph input size " << graph_inputs.size() << " != input names size " << input_names_.size();
-    return kCoreFailed;
-  }
-  if (graph_outputs.size() != output_names_.size()) {
-    MS_LOG(ERROR) << "Graph output size " << graph_outputs.size() << " != output names size " << output_names_.size();
-    return kCoreFailed;
-  }
-  inputs_.clear();
-  auto new_inputs = graph_executor_->GetInputInfos(kernel_graph_);
-  if (new_inputs.empty()) {
-    for (size_t i = 0; i < input_names_.size(); i++) {
-      auto &input = graph_inputs[i];
-      auto data_type = static_cast<enum DataType>(input->data_type());
-      auto impl = std::make_shared<TensorDefaultImpl>(input_names_[i], data_type, input->shape_c());
-      inputs_.push_back(impl);
-    }
-  } else {
-    if (new_inputs.size() != input_names_.size()) {
-      MS_LOG(ERROR) << "Input count " << new_inputs.size() << " get from executor != input names count "
-                    << input_names_.size();
-      return kCoreFailed;
-    }
-    for (size_t i = 0; i < input_names_.size(); i++) {
-      auto &input = new_inputs[i];
-      auto data_type = static_cast<enum DataType>(input.data_type());
-      auto impl = std::make_shared<TensorDefaultImpl>(input_names_[i], data_type, input.shape_c());
-      inputs_.push_back(impl);
-    }
-  }
-  outputs_.clear();
-  auto new_outputs = graph_executor_->GetOutputInfos(kernel_graph_);
-  if (new_outputs.empty()) {
-    for (size_t i = 0; i < output_names_.size(); i++) {
-      auto &output = graph_outputs[i];
-      auto data_type = static_cast<enum DataType>(output->data_type());
-      auto impl = std::make_shared<TensorDefaultImpl>(output_names_[i], data_type, output->shape_c());
-      outputs_.push_back(impl);
-    }
-  } else {
-    if (new_outputs.size() != output_names_.size()) {
-      MS_LOG(ERROR) << "Output count " << new_outputs.size() << " get from executor != output names count "
-                    << output_names_.size();
-      return kCoreFailed;
-    }
-    for (size_t i = 0; i < output_names_.size(); i++) {
-      auto &output = new_outputs[i];
-      auto data_type = static_cast<enum DataType>(output.data_type());
-      auto impl = std::make_shared<TensorDefaultImpl>(output_names_[i], data_type, output.shape_c());
-      outputs_.push_back(impl);
-    }
-  }
-  return kSuccess;
-}
-
-Status GraphExecutorSession::RunGraph() { return kSuccess; }
-
-Status GraphExecutorSession::RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) {
-  MS_LOG(INFO) << "GraphExecutorSession::RunGraph";
-  MS_EXCEPTION_IF_NULL(graph_executor_);
-  MS_EXCEPTION_IF_NULL(outputs);
-  bool ret = true;
-  if (is_use_kernel_graph_) {
-    ret = graph_executor_->RunGraph(kernel_graph_, inputs, outputs, options_);
-  } else {
-    ret = graph_executor_->RunGraph(func_graph_, inputs, outputs, options_);
-  }
-  if (!ret) {
-    MS_LOG(ERROR) << "GraphExecutorSession::RunGraph run graph failed";
-    return kCoreFailed;
-  }
-  return kSuccess;
-}
-
-Status GraphExecutorSession::Resize(const std::vector<tensor::Tensor> &inputs,
-                                    const std::vector<std::vector<int64_t>> &new_shapes) {
-  MS_LOG(INFO) << "GraphExecutorSession::Resize";
-  MS_EXCEPTION_IF_NULL(graph_executor_);
-  auto ret = graph_executor_->Resize(kernel_graph_, inputs, new_shapes);
-  if (!ret) {
-    return kCoreFailed;
-  }
-  auto new_outputs = graph_executor_->GetOutputInfos(kernel_graph_);
-  if (new_outputs.empty()) {
-    return kSuccess;
-  }
-  if (new_outputs.size() != outputs_.size()) {
-    MS_LOG(ERROR) << "Output count " << new_outputs.size() << " get from executor != last output count "
-                  << outputs_.size();
-    return kCoreFailed;
-  }
-  for (size_t i = 0; i < new_shapes.size(); i++) {
-    auto &input_shape = new_shapes[i];
-    inputs_[i]->SetShape(input_shape);
-    inputs_[i]->SetData(nullptr, false);  // reset data
-  }
-  for (size_t i = 0; i < outputs_.size(); i++) {
-    auto &output = new_outputs[i];
-    outputs_[i]->SetShape(output.shape_c());
-    outputs_[i]->SetData(nullptr, false);  // reset data
-  }
-  return kSuccess;
-}
-std::vector<MutableTensorImplPtr> GraphExecutorSession::GetOutputs() { return outputs_; }
-std::vector<MutableTensorImplPtr> GraphExecutorSession::GetInputs() { return inputs_; }
-std::vector<std::string> GraphExecutorSession::GetOutputNames() { return output_names_; }
-std::vector<std::string> GraphExecutorSession::GetInputNames() { return input_names_; }
-MutableTensorImplPtr GraphExecutorSession::GetOutputByTensorName(const std::string &tensorName) {
-  for (size_t i = 0; i < output_names_.size(); i++) {
-    if (output_names_[i] == tensorName) {
-      return outputs_[i];
-    }
-  }
-  return nullptr;
-}
-MutableTensorImplPtr GraphExecutorSession::GetInputByTensorName(const std::string &name) {
-  for (size_t i = 0; i < input_names_.size(); i++) {
-    if (input_names_[i] == name) {
-      return inputs_[i];
-    }
-  }
-  return nullptr;
-}
-}  // namespace mindspore
--- a/mindspore/lite/src/extendrt/session/graph_executor_session.h
+++ b/mindspore/lite/src/extendrt/session/graph_executor_session.h
@ -1,65 +0,0 @@
-/**
- * Copyright 2019-2021 Huawei Technologies Co., Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#ifndef MINDSPORE_LITE_EXTENDRT_SESSION_GRAPH_EXECUTOR_SESSION_H_
-#define MINDSPORE_LITE_EXTENDRT_SESSION_GRAPH_EXECUTOR_SESSION_H_
-
-#include <vector>
-#include <string>
-#include <memory>
-#include <map>
-
-#include "extendrt/session/delegate_session.h"
-#include "extendrt/session/lite_graph_executor.h"
-#include "extendrt/utils/kernel_graph_utils.h"
-
-namespace mindspore {
-class GraphExecutorSession : public DelegateSession {
- public:
-  GraphExecutorSession() = default;
-  explicit GraphExecutorSession(std::shared_ptr<mindspore::LiteGraphExecutor> graph_executor)
-      : graph_executor_(graph_executor) {}
-  virtual ~GraphExecutorSession() = default;
-
-  Status Init(const std::shared_ptr<Context> context) override;
-  Status CompileGraph(FuncGraphPtr graph, const void *data = nullptr, size_t size = 0) override;
-  Status RunGraph() override;
-  Status RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) override;
-  Status Resize(const std::vector<tensor::Tensor> &inputs, const std::vector<std::vector<int64_t>> &dims) override;
-
-  std::vector<MutableTensorImplPtr> GetOutputs() override;
-  std::vector<MutableTensorImplPtr> GetInputs() override;
-  std::vector<std::string> GetOutputNames() override;
-  std::vector<std::string> GetInputNames() override;
-  MutableTensorImplPtr GetOutputByTensorName(const std::string &tensorName) override;
-  MutableTensorImplPtr GetInputByTensorName(const std::string &name) override;
-
- private:
-  std::shared_ptr<mindspore::LiteGraphExecutor> graph_executor_;
-  std::map<std::string, std::string> options_;
-  bool is_use_kernel_graph_ = true;
-  KernelGraphUtilsPtr kernel_graph_utils_;
-  KernelGraphPtr kernel_graph_;
-  FuncGraphPtr func_graph_;
-  std::vector<MutableTensorImplPtr> inputs_;
-  std::vector<std::string> input_names_;
-  std::vector<MutableTensorImplPtr> outputs_;
-  std::vector<std::string> output_names_;
-
-  Status InitGraphInputsOutputs();
-};
-}  // namespace mindspore
-
-#endif  // MINDSPORE_LITE_EXTENDRT_SESSION_GRAPH_EXECUTOR_SESSION_H_
--- a/mindspore/lite/src/extendrt/session/lite_infer_session.cc
+++ b/mindspore/lite/src/extendrt/session/lite_infer_session.cc
@ -29,7 +29,7 @@
 namespace mindspore {
 const size_t tensor_max_size = 0x1000000;

-Status LiteInferSession::Init(const std::shared_ptr<Context> context) {
+Status LiteInferSession::Init(const std::shared_ptr<Context> &context) {
  MS_LOG(INFO) << "SingleOpInferSession::Init";
  context_ = context;
  lite_session_ = CreateLiteSession(ContextUtils::Convert(context_.get()));
@ -125,10 +125,6 @@ std::vector<int32_t> LiteInferSession::TruncateShape(const std::vector<int64_t>
  return truncated_shape;
 }

-Status LiteInferSession::RunGraph() {
-  auto ret = lite_session_->RunGraph();
-  return static_cast<StatusCode>(ret);
-}
 Status LiteInferSession::RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) {
  MS_LOG(INFO) << "SingleOpInferSession::RunGraph with input and outputs";
  MS_EXCEPTION_IF_NULL(outputs);
@ -186,7 +182,7 @@ Status LiteInferSession::RunGraph(const std::vector<tensor::Tensor> &inputs, std
      }
    }
  }
-  auto ret = RunGraph();
+  auto ret = static_cast<StatusCode>(lite_session_->RunGraph());
  ResetTensorData(old_data, input_tensors);
  if (ret != kSuccess) {
    MS_LOG(ERROR) << "Run graph failed.";
@ -202,10 +198,6 @@ Status LiteInferSession::RunGraph(const std::vector<tensor::Tensor> &inputs, std
  *outputs = TensorUtils::MSTensorToTensor(res);
  return kSuccess;
 }
-Status LiteInferSession::Resize(const std::vector<tensor::Tensor> &inputs,
-                                const std::vector<std::vector<int64_t>> &dims) {
-  return kSuccess;
-}

 std::vector<MutableTensorImplPtr> LiteInferSession::GetOutputs() {
  auto outputs = lite_session_->GetOutputs();
@ -302,8 +294,10 @@ std::vector<tensor::TensorPtr> LiteInferSession::ConvertToTensors(
  return tensors;
 }

-static std::shared_ptr<InferSession> LiteInferSessionCreator(const SessionConfig &config) {
-  return std::make_shared<LiteInferSession>(config.context_);
+static std::shared_ptr<InferSession> LiteInferSessionCreator(const std::shared_ptr<Context> &ctx) {
+  auto session = std::make_shared<LiteInferSession>();
+  session->Init(ctx);
+  return session;
 }
 REG_SESSION(kLiteInferSession, LiteInferSessionCreator);
 }  // namespace mindspore
--- a/mindspore/lite/src/extendrt/session/lite_infer_session.h
+++ b/mindspore/lite/src/extendrt/session/lite_infer_session.h
@ -27,14 +27,11 @@ namespace mindspore {
 class LiteInferSession : public InferSession {
 public:
  LiteInferSession() = default;
-  explicit LiteInferSession(const std::shared_ptr<Context> context) : context_(context) {}
+  explicit LiteInferSession(const std::shared_ptr<Context> &context) : context_(context) {}
  virtual ~LiteInferSession() = default;
-  Status Init(const std::shared_ptr<Context> context) override;
+  Status Init(const std::shared_ptr<Context> &context) override;
  Status CompileGraph(FuncGraphPtr graph, const void *data = nullptr, size_t size = 0) override;
-  Status RunGraph() override;
  Status RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) override;
-  Status Resize(const std::vector<tensor::Tensor> &inputs, const std::vector<std::vector<int64_t>> &dims) override;
-
  std::vector<MutableTensorImplPtr> GetOutputs() override;
  std::vector<MutableTensorImplPtr> GetInputs() override;
  std::vector<std::string> GetOutputNames() override;
--- a/mindspore/lite/src/extendrt/session/single_op_session.cc
+++ b/mindspore/lite/src/extendrt/session/single_op_session.cc
@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ * Copyright 2019-2021  uawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -19,7 +19,7 @@
 #include <string>
 #include <vector>

-#include "src/extendrt/single_op_session.h"
+#include "src/extendrt/session/single_op_session.h"
 #include "src/extendrt/infer_device_address.h"

 #include "plugin/factory/ms_factory.h"
@ -53,7 +53,7 @@ Status SingleOpInferSession::AscendInit(const std::shared_ptr<Context> &context)
  return kSuccess;
 }

-Status SingleOpInferSession::Init(const std::shared_ptr<Context> context) {
+Status SingleOpInferSession::Init(const std::shared_ptr<Context> &context) {
  MS_LOG(INFO) << "SingleOpInferSession::Init";
  MS_EXCEPTION_IF_NULL(context);
  kernel_graph_utils_ = std::make_shared<mindspore::KernelGraphUtils>();
@ -158,7 +158,6 @@ Status SingleOpInferSession::CompileGraph(FuncGraphPtr graph, const void *data,
  return kSuccess;
 }

-Status SingleOpInferSession::RunGraph() { return kSuccess; }
 Status SingleOpInferSession::RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) {
  MS_LOG(INFO) << "SingleOpInferSession::RunGraph with input and outputs";
  MS_EXCEPTION_IF_NULL(kernel_graph_);
@ -244,27 +243,6 @@ Status SingleOpInferSession::ResizeGraphInputs(const std::vector<tensor::Tensor>
    graph_input->set_abstract(abstract);
  }
  return kSuccess;
-}  // namespace mindspore
-
-Status SingleOpInferSession::Resize(const std::vector<tensor::Tensor> &inputs,
-                                    const std::vector<std::vector<int64_t>> &dims) {
-  if (ResizeGraphInputs(inputs, dims) != kSuccess) {
-    MS_LOG(EXCEPTION) << "Resize graph input error. ";
-  }
-  auto &kernel_nodes = kernel_graph_->execution_order();
-  for (const auto &kernel_node : kernel_nodes) {
-    std::string kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
-    MS_LOG(INFO) << "SingleOpInferSession::Resize " << kernel_name;
-    auto kernel_mod = AnfAlgo::GetKernelMod(kernel_node);
-    if (kernel_mod == nullptr) {
-      MS_LOG(EXCEPTION) << "Kernel mod is nullptr, kernel name: " << kernel_name;
-    }
-    auto args = kernel::AbstractArgsFromCNode(kernel_node);
-    if (kernel_mod->Resize(args.op, args.inputs, args.outputs) != kSuccess) {
-      MS_LOG(EXCEPTION) << "Kernel mod resize failed, kernel name: " << kernel_name;
-    }
-  }
-  return kSuccess;
 }
 std::vector<MutableTensorImplPtr> SingleOpInferSession::GetOutputs() { return outputs_; }
 std::vector<MutableTensorImplPtr> SingleOpInferSession::GetInputs() { return inputs_; }
@ -295,8 +273,10 @@ MutableTensorImplPtr SingleOpInferSession::GetInputByTensorName(const std::strin
  return nullptr;
 }

-static std::shared_ptr<InferSession> SingleOpSessionCreator(const SessionConfig &config) {
-  return std::make_shared<SingleOpInferSession>();
+static std::shared_ptr<InferSession> SingleOpSessionCreator(const std::shared_ptr<Context> &ctx) {
+  auto session = std::make_shared<SingleOpInferSession>();
+  session->Init(ctx);
+  return session;
 }
 REG_SESSION(kSingleOpSession, SingleOpSessionCreator);
 }  // namespace mindspore
--- a/mindspore/lite/src/extendrt/session/single_op_session.h
+++ b/mindspore/lite/src/extendrt/session/single_op_session.h
@ -19,7 +19,6 @@
 #include <string>
 #include <memory>
 #include <vector>
-
 #include "src/extendrt/infer_session.h"
 #include "extendrt/utils/kernel_graph_utils.h"

@ -28,13 +27,10 @@ class SingleOpInferSession : public InferSession {
 public:
  SingleOpInferSession() = default;
  virtual ~SingleOpInferSession() = default;
-  Status Init(const std::shared_ptr<Context> context) override;
+  Status Init(const std::shared_ptr<Context> &context) override;
  Status AscendInit(const std::shared_ptr<Context> &context);
  Status CompileGraph(FuncGraphPtr graph, const void *data = nullptr, size_t size = 0) override;
-  Status RunGraph() override;
  Status RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) override;
-  Status Resize(const std::vector<tensor::Tensor> &inputs, const std::vector<std::vector<int64_t>> &dims) override;
-
  std::vector<MutableTensorImplPtr> GetOutputs() override;
  std::vector<MutableTensorImplPtr> GetInputs() override;
  std::vector<std::string> GetOutputNames() override;
--- a/mindspore/lite/src/extendrt/session/type.h
+++ b/mindspore/lite/src/extendrt/session/type.h
@ -19,15 +19,7 @@
 #include <memory>
 #include <vector>

-#include "include/api/delegate.h"
-
 namespace mindspore {
 enum SessionType { kDefaultSession = 0, kSingleOpSession, kLiteInferSession, kDelegateSession, kNoneSession };
-
-struct SessionConfig {
-  SessionType type_;
-  std::shared_ptr<Context> context_;
-  std::vector<std::shared_ptr<Delegate>> delegates_;
-};
 }  // namespace mindspore
 #endif  // MINDSPORE_LITE_EXTENDRT_SESSION_TYPE_H_
--- a/mindspore/lite/src/extendrt/subgraph_kernel.cc
+++ b/mindspore/lite/src/extendrt/subgraph_kernel.cc
@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ * Copyright 2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -13,10 +13,25 @@
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
-#include "extendrt/subgraph_kernel.h"
-namespace mindspore::infer {
+#include "src/extendrt/subgraph_kernel.h"
+namespace mindspore::kernel {
 bool SubgraphKernel::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
                            const std::vector<AddressPtr> &outputs, void *stream_ptr) {
+  // TODO(zhaizhiqiang): Construct input/output tensor::Tensor for graph executor.
+  std::vector<tensor::Tensor> in;
+  std::vector<tensor::Tensor> out;
+  std::map<string, string> compile_options;
+  executor_->RunGraph(subgraph_, in, &out, compile_options);
  return true;
 }
-}  // namespace mindspore::infer
+bool SubgraphKernel::Init(const BaseOperatorPtr &opdef, const std::vector<KernelTensorPtr> &inputs,
+                          const std::vector<KernelTensorPtr> &outputs) {
+  std::map<string, string> compile_options;
+  return executor_->CompileGraph(subgraph_, compile_options);
+}
+int SubgraphKernel::Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
+                           const std::vector<KernelTensorPtr> &outputs,
+                           const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost) {
+  return 0;
+}
+}  // namespace mindspore::kernel
--- a/mindspore/lite/src/extendrt/subgraph_kernel.h
+++ b/mindspore/lite/src/extendrt/subgraph_kernel.h
@ -1,5 +1,5 @@
 /**
- * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ * Copyright 2022 Huawei Technologies Co., Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
@ -19,22 +19,28 @@
 #include <memory>
 #include <map>
 #include <vector>
-#include "include/api/context.h"
-#include "include/api/model.h"
-#include "include/api/graph.h"
-#include "include/api/status.h"
-#include "include/common/utils/utils.h"
+#include "kernel/kernel.h"
 #include "ir/func_graph.h"
-#include "ccsrc/kernel/kernel.h"
-struct KernelInfo;
-class SubGraphKernel : public KernelMod {
+#include "runtime/hardware/device_context.h"
+namespace mindspore::kernel {
+class SubgraphKernel : public KernelMod {
 public:
-  SubGraphKernel() = default;
-  virtual ~SubGraphKernel() = default;
+  SubgraphKernel(FuncGraphPtr subgraph, std::shared_ptr<device::GraphExecutor> executor)
+      : subgraph_(subgraph), executor_(executor) {}
+  virtual ~SubgraphKernel() = default;
+  bool Init(const BaseOperatorPtr & /* base_operator */, const std::vector<KernelTensorPtr> & /* inputs */,
+            const std::vector<KernelTensorPtr> & /* outputs */) override;
+
+  int Resize(const BaseOperatorPtr &base_operator, const std::vector<KernelTensorPtr> &inputs,
+             const std::vector<KernelTensorPtr> &outputs,
+             const std::map<uint32_t, tensor::TensorPtr> &inputsOnHost) override;
+
  bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
              const std::vector<AddressPtr> &outputs, void *stream_ptr) override;

 protected:
-  std::vector<KernelInfo> kernels_;
+  FuncGraphPtr subgraph_;
+  std::shared_ptr<device::GraphExecutor> executor_;
 };
+}  // namespace mindspore::kernel
 #endif
--- a/mindspore/lite/src/litert/cxx_api/context.cc
+++ b/mindspore/lite/src/litert/cxx_api/context.cc
@ -158,6 +158,23 @@ std::vector<int32_t> Context::GetThreadAffinityCoreList() const {
  return data_->affinity_core_list_;
 }

+void Context::set_delegate(const std::shared_ptr<AbstractDelegate> &delegate) {
+  // if (data_ == nullptr) {
+  //   MS_LOG(ERROR) << "Invalid context.";
+  //   return;
+  // }
+  // data_->delegate = std::dynamic_pointer_cast<Delegate>(delegate);
+}
+
+std::shared_ptr<AbstractDelegate> Context::get_delegate() const {
+  if (data_ == nullptr) {
+    MS_LOG(ERROR) << "Invalid context.";
+    return nullptr;
+  }
+  return data_->delegate;
+}
+
+// deprecated
 void Context::SetDelegate(const std::shared_ptr<Delegate> &delegate) {
  if (data_ == nullptr) {
    MS_LOG(ERROR) << "Invalid context.";
@ -166,6 +183,7 @@ void Context::SetDelegate(const std::shared_ptr<Delegate> &delegate) {
  data_->delegate = delegate;
 }

+// deprecated
 std::shared_ptr<Delegate> Context::GetDelegate() const {
  if (data_ == nullptr) {
    MS_LOG(ERROR) << "Invalid context.";
--- a/mindspore/lite/src/litert/cxx_api/context.h
+++ b/mindspore/lite/src/litert/cxx_api/context.h
@ -27,6 +27,7 @@
 #include <experimental/any>
 #endif
 #include "include/api/context.h"
+#include "include/api/delegate.h"

 namespace mindspore {
 struct Context::Data {