Cloud inference tensor opt

2022-08-11 17:41:15 +08:00 · 2022-08-11 17:41:15 +08:00 · f33ea707cd
parent aaeae5d3ae
commit f33ea707cd
41 changed files with 1080 additions and 513 deletions
--- a/include/api/types.h
+++ b/include/api/types.h
@ -81,10 +81,11 @@ class MS_API MSTensor {
  /// \param[in] shape The shape of the MSTensor.
  /// \param[in] data The data pointer that points to allocated memory.
  /// \param[in] data_len The length of the memory, in bytes.
+  /// \param[in] own_data Whether the data memory should be freed in MSTensor destruction.
  ///
  /// \return A pointer of MSTensor.
  static inline MSTensor *CreateRefTensor(const std::string &name, DataType type, const std::vector<int64_t> &shape,
-                                          const void *data, size_t data_len) noexcept;
+                                          const void *data, size_t data_len, bool own_data = true) noexcept;

  /// \brief Creates a MSTensor object, whose device data can be directly accessed by Model, must be used in pairs with
  /// DestroyTensorPtr.
@ -96,8 +97,8 @@ class MS_API MSTensor {
  /// \param[in] data_len The length of the memory, in bytes.
  ///
  /// \return A pointer of MSTensor.
-  static inline MSTensor *CreateDevTensor(const std::string &name, DataType type, const std::vector<int64_t> &shape,
-                                          const void *data, size_t data_len) noexcept;
+  static inline MSTensor CreateDeviceTensor(const std::string &name, DataType type, const std::vector<int64_t> &shape,
+                                            void *data, size_t data_len) noexcept;

  /// \brief Creates a MSTensor object from local file, must be used in pairs with DestroyTensorPtr.
  ///
@ -125,7 +126,7 @@ class MS_API MSTensor {
  /// \return A vector container containing several strings.
  static inline std::vector<std::string> TensorToStrings(const MSTensor &tensor);

-  /// \brief Destroy an object created by Clone, StringsToTensor, CreateRefTensor, CreateDevTensor or CreateTensor. Do
+  /// \brief Destroy an object created by Clone, StringsToTensor, CreateRefTensor or CreateTensor. Do
  /// not use it to destroy MSTensor from other sources.
  ///
  /// \param[in] tensor A MSTensor object.
@ -207,6 +208,13 @@ class MS_API MSTensor {
  /// \return The boolean value that indicates whether the MSTensor equals tensor.
  bool operator==(const MSTensor &tensor) const;

+  /// \brief Get the boolean value that indicates whether the MSTensor not equals tensor.
+  ///
+  /// \param[in] another MSTensor.
+  ///
+  /// \return The boolean value that indicates whether the MSTensor not equals tensor.
+  bool operator!=(const MSTensor &tensor) const;
+
  /// \brief Set the shape of for the MSTensor. Only valid for Lite.
  ///
  /// \param[in] shape Shape of the MSTensor, a vector of int64_t.
@ -251,7 +259,20 @@ class MS_API MSTensor {
  /// \note The memory pointed to origin data pointer of MSTensor needs to be managed by the user
  ///
  /// \param[in] data A pointer to the data of the MSTensor.
-  void SetData(void *data);
+  /// \param[in] own_data Whether the data memory should be freed in MSTensor destruction.
+  void SetData(void *data, bool own_data = true);
+
+  /// \brief Set the device data address for the MSTensor. Only valid for Lite.
+  ///
+  /// \note The memory pointed to origin data pointer of MSTensor needs to be managed by the user
+  ///
+  /// \param[in] data A pointer to the device data of the MSTensor.
+  void SetDeviceData(void *data);
+
+  /// \brief Get the device data address of the MSTensor set by SetDeviceData. Only valid for Lite.
+  ///
+  /// \return A pointer to the device data of the MSTensor.
+  void *GetDeviceData();

  /// \brief Get the quantization parameters of the MSTensor. Only valid for Lite.
  ///
@ -270,9 +291,9 @@ class MS_API MSTensor {
  static MSTensor *CreateTensor(const std::vector<char> &name, enum DataType type, const std::vector<int64_t> &shape,
                                const void *data, size_t data_len) noexcept;
  static MSTensor *CreateRefTensor(const std::vector<char> &name, enum DataType type, const std::vector<int64_t> &shape,
-                                   const void *data, size_t data_len) noexcept;
-  static MSTensor *CreateDevTensor(const std::vector<char> &name, enum DataType type, const std::vector<int64_t> &shape,
-                                   const void *data, size_t data_len) noexcept;
+                                   const void *data, size_t data_len, bool own_data) noexcept;
+  static MSTensor CreateDeviceTensor(const std::vector<char> &name, enum DataType type,
+                                     const std::vector<int64_t> &shape, void *data, size_t data_len) noexcept;
  static MSTensor *CreateTensorFromFile(const std::vector<char> &file, enum DataType type,
                                        const std::vector<int64_t> &shape) noexcept;
  static MSTensor *CharStringsToTensor(const std::vector<char> &name, const std::vector<std::vector<char>> &str);
@ -313,13 +334,13 @@ MSTensor *MSTensor::CreateTensor(const std::string &name, enum DataType type, co
 }

 MSTensor *MSTensor::CreateRefTensor(const std::string &name, enum DataType type, const std::vector<int64_t> &shape,
-                                    const void *data, size_t data_len) noexcept {
-  return CreateRefTensor(StringToChar(name), type, shape, data, data_len);
+                                    const void *data, size_t data_len, bool own_data) noexcept {
+  return CreateRefTensor(StringToChar(name), type, shape, data, data_len, own_data);
 }

-MSTensor *MSTensor::CreateDevTensor(const std::string &name, enum DataType type, const std::vector<int64_t> &shape,
-                                    const void *data, size_t data_len) noexcept {
-  return CreateDevTensor(StringToChar(name), type, shape, data, data_len);
+MSTensor MSTensor::CreateDeviceTensor(const std::string &name, enum DataType type, const std::vector<int64_t> &shape,
+                                      void *data, size_t data_len) noexcept {
+  return CreateDeviceTensor(StringToChar(name), type, shape, data, data_len);
 }

 MSTensor *MSTensor::CreateTensorFromFile(const std::string &file, enum DataType type,
--- a/mindspore/ccsrc/cxx_api/types.cc
+++ b/mindspore/ccsrc/cxx_api/types.cc
@ -155,7 +155,8 @@ MSTensor *MSTensor::CreateTensor(const std::vector<char> &name, enum DataType ty
 }

 MSTensor *MSTensor::CreateRefTensor(const std::vector<char> &name, enum DataType type,
-                                    const std::vector<int64_t> &shape, const void *data, size_t data_len) noexcept {
+                                    const std::vector<int64_t> &shape, const void *data, size_t data_len,
+                                    bool) noexcept {
  std::string name_str = CharToString(name);
  try {
    std::shared_ptr<Impl> impl = std::make_shared<TensorReferenceImpl>(name_str, type, shape, data, data_len, false);
@ -170,19 +171,18 @@ MSTensor *MSTensor::CreateRefTensor(const std::vector<char> &name, enum DataType
  }
 }

-MSTensor *MSTensor::CreateDevTensor(const std::vector<char> &name, enum DataType type,
-                                    const std::vector<int64_t> &shape, const void *data, size_t data_len) noexcept {
+MSTensor MSTensor::CreateDeviceTensor(const std::vector<char> &name, enum DataType type,
+                                      const std::vector<int64_t> &shape, void *data, size_t data_len) noexcept {
  std::string name_str = CharToString(name);
  try {
    std::shared_ptr<Impl> impl = std::make_shared<TensorReferenceImpl>(name_str, type, shape, data, data_len, true);
-    MSTensor *ret = new MSTensor(impl);
-    return ret;
+    return MSTensor(impl);
  } catch (const std::bad_alloc &) {
    MS_LOG(ERROR) << "Malloc memory failed.";
-    return nullptr;
+    return MSTensor(nullptr);
  } catch (...) {
    MS_LOG(ERROR) << "Unknown error occurred.";
-    return nullptr;
+    return MSTensor(nullptr);
  }
 }

@ -382,6 +382,10 @@ bool MSTensor::operator==(std::nullptr_t) const { return impl_ == nullptr; }

 bool MSTensor::operator!=(std::nullptr_t) const { return impl_ != nullptr; }

+bool MSTensor::operator==(const MSTensor &tensor) const { return impl_ == tensor.impl_; }
+
+bool MSTensor::operator!=(const MSTensor &tensor) const { return impl_ != tensor.impl_; }
+
 MSTensor *MSTensor::Clone() const {
  MS_EXCEPTION_IF_NULL(impl_);
  try {
@ -456,7 +460,11 @@ void MSTensor::SetFormat(mindspore::Format) { MS_LOG_EXCEPTION << "Invalid imple

 mindspore::Format MSTensor::format() const { MS_LOG_EXCEPTION << "Invalid implement."; }

-void MSTensor::SetData(void *) { MS_LOG_EXCEPTION << "Invalid implement."; }
+void MSTensor::SetData(void *, bool) { MS_LOG_EXCEPTION << "Invalid implement."; }
+
+void MSTensor::SetDeviceData(void *) { MS_LOG_EXCEPTION << "Invalid implement."; }
+
+void *MSTensor::GetDeviceData() { MS_LOG_EXCEPTION << "Invalid implement."; }

 std::vector<QuantParam> MSTensor::QuantParams() const { MS_LOG_EXCEPTION << "Invalid implement."; }

--- a/mindspore/core/ir/tensor.cc
+++ b/mindspore/core/ir/tensor.cc
@ -628,7 +628,8 @@ Tensor::Tensor(const Tensor &tensor)
      device_event_(tensor.device_event_),
      lazy_callback_(tensor.lazy_callback_),
      user_data_(tensor.user_data_),
-      compression_type_(tensor.compression_type_) {}
+      compression_type_(tensor.compression_type_),
+      tensor_name_(tensor.tensor_name_) {}

 Tensor::Tensor(const Tensor &tensor, TypeId data_type)
    : MetaTensor(data_type, tensor.shape_),
@ -649,7 +650,8 @@ Tensor::Tensor(const Tensor &tensor, TypeId data_type)
      device_event_(tensor.device_event_),
      lazy_callback_(tensor.lazy_callback_),
      user_data_(tensor.user_data_),
-      compression_type_(tensor.compression_type_) {}
+      compression_type_(tensor.compression_type_),
+      tensor_name_(tensor.tensor_name_) {}

 Tensor::Tensor(TypeId data_type, const ShapeVector &shape, TensorDataPtr data)
    : MetaTensor(data_type, shape), data_(std::move(data)), id_(MakeId()) {}
--- a/mindspore/core/ir/tensor.h
+++ b/mindspore/core/ir/tensor.h
@ -669,6 +669,16 @@ class MS_CORE_API Tensor final : public MetaTensor {
  /// \return tensor compression type.
  TensorCompressionType compression_type() const { return compression_type_; }

+  /// \brief Set tensor name.
+  ///
+  /// \param[in] tensor_name The tensor name.
+  void set_name(const std::string &tensor_name) { tensor_name_ = tensor_name; }
+
+  /// \brief Get the tensor name.
+  ///
+  /// \return tensor name.
+  const std::string &name() const { return tensor_name_; }
+
 private:
  void ExecuteLazyTask() const;

@ -695,6 +705,8 @@ class MS_CORE_API Tensor final : public MetaTensor {
  std::function<void(void)> lazy_callback_{nullptr};
  UserData user_data_;
  TensorCompressionType compression_type_{kNoCompression};
+
+  std::string tensor_name_;
 };

 // CSRTensor entity class
--- a/mindspore/lite/CMakeLists.txt
+++ b/mindspore/lite/CMakeLists.txt
@ -67,6 +67,10 @@ if(MACHINE_LINUX_ARM64)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv8-a+fp16")
 endif()

+if(DEFINED ENV{MSLITE_ENABLE_CLOUD_FUSION_INFERENCE})
+    set(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE $ENV{MSLITE_ENABLE_CLOUD_FUSION_INFERENCE})
+endif()
+
 if(DEFINED ENV{MSLITE_ENABLE_EXPERIMENTAL_KERNEL})
    set(MSLITE_ENABLE_EXPERIMENTAL_KERNEL $ENV{MSLITE_ENABLE_EXPERIMENTAL_KERNEL})
 endif()
@ -90,6 +94,10 @@ if(DEFINED ENV{MSLITE_ENABLE_TRAIN})
    set(MSLITE_ENABLE_TRAIN $ENV{MSLITE_ENABLE_TRAIN})
 endif()

+if(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE)
+    set(MSLITE_ENABLE_TRAIN OFF)
+endif()
+
 if(DEFINED ENV{MSLITE_ENABLE_SERVER_INFERENCE})
    set(MSLITE_ENABLE_SERVER_INFERENCE $ENV{MSLITE_ENABLE_SERVER_INFERENCE})
 endif()
@ -187,6 +195,10 @@ if(DEFINED ENV{MSLITE_ENABLE_MODEL_ENCRYPTION})
    endif()
 endif()

+if(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE)
+    set(MSLITE_ENABLE_MODEL_ENCRYPTION ON)
+endif()
+
 if(DEFINED ENV{MSLITE_ENABLE_COVERAGE})
    set(MSLITE_ENABLE_COVERAGE $ENV{MSLITE_ENABLE_COVERAGE})
 endif()
@ -211,10 +223,6 @@ if(MSLITE_ENABLE_GITEE_MIRROR)
    set(ENABLE_GITEE ON)
 endif()

-if(DEFINED ENV{MSLITE_ENABLE_CLOUD_FUSION_INFERENCE})
-    set(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE $ENV{MSLITE_ENABLE_CLOUD_FUSION_INFERENCE})
-endif()
-
 if(DEFINED ENV{ENABLE_FAST_HASH_TABLE})
    add_compile_definitions(ENABLE_FAST_HASH_TABLE)
    include_directories(${TOP_DIR}/third_party/robin_hood/include)
--- a/mindspore/lite/src/common/mutable_tensor_impl.h
+++ b/mindspore/lite/src/common/mutable_tensor_impl.h
@ -0,0 +1,44 @@
+/**
+ * Copyright 2020 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_COMMON_MUTABLE_TESNOR_IMPL_H_
+#define MINDSPORE_LITE_SRC_COMMON_MUTABLE_TESNOR_IMPL_H_
+
+#include <string>
+#include <memory>
+#include <vector>
+#include "ir/api_tensor_impl.h"
+
+namespace mindspore {
+class MutableTensorImpl : public MSTensor::Impl {
+ public:
+  virtual void SetName(const std::string &name) = 0;
+  virtual void SetDataType(mindspore::DataType data_type) = 0;
+  virtual void SetShape(const std::vector<int64_t> &shape) = 0;
+  virtual mindspore::Format Format() const = 0;
+  virtual void SetFormat(mindspore::Format format) = 0;
+  virtual void SetData(void *data, bool own_data) = 0;
+  virtual bool IsConst() const = 0;
+  virtual void SetAllocator(const std::shared_ptr<Allocator> &allocator) = 0;
+  virtual std::shared_ptr<Allocator> GetAllocator() const = 0;
+  virtual std::vector<QuantParam> GetQuantParams() const = 0;
+  virtual void SetQuantParams(const std::vector<QuantParam> &quant_param) = 0;
+  virtual void SetDeviceData(void *data) = 0;
+  virtual void *GetDeviceData() = 0;
+};
+using MutableTensorImplPtr = std::shared_ptr<MutableTensorImpl>;
+}  // namespace mindspore
+#endif  // MINDSPORE_LITE_SRC_COMMON_MUTABLE_TESNOR_IMPL_H_
--- a/mindspore/lite/src/extendrt/cxx_api/model/model_impl.cc
+++ b/mindspore/lite/src/extendrt/cxx_api/model/model_impl.cc
@ -89,21 +89,10 @@ std::vector<MSTensor> ModelImpl::GetInputs() {
  std::vector<MSTensor> inputs;

  auto graph_inputs = session_->GetInputs();
-  auto graph_input_names = session_->GetInputNames();

  for (size_t i = 0; i < graph_inputs.size(); i++) {
-    auto graph_input = graph_inputs[i];
-    std::string graph_input_name = graph_input_names[i];
-    auto type_id = graph_input->data_type_c();
-    auto data_type = static_cast<mindspore::DataType>(type_id);
-    auto ms_tensor_ptr = MSTensor::CreateRefTensor(graph_input_name, data_type, graph_input->shape_c(),
-                                                   graph_input->data_c(), graph_input->Size());
-    if (ms_tensor_ptr == nullptr) {
-      MS_LOG_WARNING << "Failed to create input tensor ";
-      return {};
-    }
-    inputs.push_back(*ms_tensor_ptr);
-    delete ms_tensor_ptr;
+    auto tensor_impl = graph_inputs[i];
+    inputs.push_back(MSTensor(tensor_impl));
  }
  return inputs;
 }
@ -111,23 +100,10 @@ std::vector<MSTensor> ModelImpl::GetInputs() {
 std::vector<MSTensor> ModelImpl::GetOutputs() {
  MS_EXCEPTION_IF_NULL(session_);
  std::vector<MSTensor> outputs;
-
  auto graph_outputs = session_->GetOutputs();
-  auto graph_output_names = session_->GetOutputNames();
-
  for (size_t i = 0; i < graph_outputs.size(); i++) {
-    auto graph_output = graph_outputs[i];
-    std::string graph_output_name = graph_output_names[i];
-    auto type_id = graph_output->data_type_c();
-    auto data_type = static_cast<mindspore::DataType>(type_id);
-    auto ms_tensor_ptr = MSTensor::CreateRefTensor(graph_output_name, data_type, graph_output->shape_c(),
-                                                   graph_output->data_c(), graph_output->Size());
-    if (ms_tensor_ptr == nullptr) {
-      MS_LOG_WARNING << "Failed to create output tensor ";
-      return {};
-    }
-    outputs.push_back(*ms_tensor_ptr);
-    delete ms_tensor_ptr;
+    auto tensor_impl = graph_outputs[i];
+    outputs.push_back(MSTensor(tensor_impl));
  }
  return outputs;
 }
@ -137,17 +113,12 @@ MSTensor ModelImpl::GetInputByTensorName(const std::string &name) {
    MS_LOG(ERROR) << "Session is null.";
    return MSTensor(nullptr);
  }
-  auto tensor_ptr = session_->GetInputByTensorName(name);
-  if (tensor_ptr == nullptr) {
+  auto tensor_impl = session_->GetInputByTensorName(name);
+  if (tensor_impl == nullptr) {
    MS_LOG(ERROR) << "Model does not contains tensor " << name << " .";
    return MSTensor(nullptr);
  }
-  auto ms_inputs = TensorUtils::TensorPtrToMSTensor({tensor_ptr}, {name});
-  if (ms_inputs.empty()) {
-    MS_LOG(ERROR) << "Tensor to ms tensor failed." << name << " .";
-    return MSTensor(nullptr);
-  }
-  return ms_inputs[0];
+  return MSTensor(tensor_impl);
 }

 std::vector<std::string> ModelImpl::GetOutputTensorNames() {
@ -164,35 +135,58 @@ MSTensor ModelImpl::GetOutputByTensorName(const std::string &name) {
    MS_LOG(ERROR) << "Session is null.";
    return MSTensor(nullptr);
  }
-  auto tensor_ptr = session_->GetOutputByTensorName(name);
-  if (tensor_ptr == nullptr) {
+  auto tensor_impl = session_->GetOutputByTensorName(name);
+  if (tensor_impl == nullptr) {
    MS_LOG(ERROR) << "Model does not contains tensor " << name << " .";
    return MSTensor(nullptr);
  }
-  auto ms_outputs = TensorUtils::TensorPtrToMSTensor({tensor_ptr}, {name});
-  if (ms_outputs.empty()) {
-    MS_LOG(ERROR) << "Tensor to ms tensor failed." << name << " .";
-    return MSTensor(nullptr);
-  }
-  return ms_outputs[0];
+  return MSTensor(tensor_impl);
 }

 Status ModelImpl::Predict(const std::vector<MSTensor> &inputs, std::vector<MSTensor> *outputs) {
  MS_EXCEPTION_IF_NULL(session_);
  MS_EXCEPTION_IF_NULL(outputs);
-  outputs->clear();
-  std::vector<mindspore::tensor::TensorPtr> graph_inputs = TensorUtils::MSTensorToTensorPtr(inputs);
-  std::vector<mindspore::tensor::TensorPtr> graph_outputs;
+  std::vector<mindspore::tensor::Tensor> graph_inputs = TensorUtils::MSTensorToTensor(inputs);
+  std::vector<mindspore::tensor::Tensor> graph_outputs;
+  std::vector<mindspore::tensor::Tensor> org_graph_outputs;
+  if (!outputs->empty()) {
+    graph_outputs = TensorUtils::MSTensorToTensor(*outputs);
+    org_graph_outputs = graph_outputs;
+  }
  auto ret = session_->RunGraph(graph_inputs, &graph_outputs);
  if (ret != kSuccess) {
    MS_LOG(ERROR) << "ModelImpl::Predict RunGraph failed with " << ret;
    return ret;
  }
-  auto ms_outputs = TensorUtils::TensorPtrToMSTensor(graph_outputs, session_->GetOutputNames());
-  (void)std::copy(ms_outputs.begin(), ms_outputs.end(), std::back_inserter(*outputs));
+  if (outputs->empty() || org_graph_outputs != graph_outputs) {
+    *outputs = TensorUtils::TensorToMSTensor(graph_outputs, session_->GetOutputNames());
+  }
+  auto session_outputs = GetOutputs();
+  if (graph_outputs.size() != session_outputs.size()) {
+    MS_LOG(ERROR) << "Outputs count get from session " << session_outputs.size() << " != outputs count of RunGraph "
+                  << graph_outputs.size();
+    return kCoreFailed;
+  }
+  for (size_t i = 0; i < session_outputs.size(); i++) {
+    auto &session_output = session_outputs[i];
+    auto &execute_output = outputs->at(i);
+    session_output.SetShape(execute_output.Shape());
+    if (session_output.Data().get() != execute_output.Data().get()) {
+      session_output.SetData(execute_output.MutableData(), false);
+    }
+    if (session_output.GetDeviceData() != execute_output.GetDeviceData()) {
+      session_output.SetDeviceData(execute_output.GetDeviceData());
+    }
+  }
  return kSuccess;
 }

+Status ModelImpl::Predict() {
+  auto inputs = GetInputs();
+  auto outputs = GetOutputs();
+  return Predict(inputs, &outputs);
+}
+
 bool ModelImpl::HasPreprocess() { return graph_->graph_data_->GetPreprocess().empty() ? false : true; }

 Status ModelImpl::Preprocess(const std::vector<std::vector<MSTensor>> &inputs, std::vector<MSTensor> *outputs) {
--- a/mindspore/lite/src/extendrt/cxx_api/model_pool/model_worker.cc
+++ b/mindspore/lite/src/extendrt/cxx_api/model_pool/model_worker.cc
@ -200,16 +200,21 @@ Status ModelWorker::Predict(const std::vector<MSTensor> &inputs, std::vector<MST
  bool need_copy_output = true;
  auto model_output = model_->GetOutputs();
  for (size_t i = 0; i < outputs->size(); i++) {
-    if (outputs->at(i).Data() != nullptr) {
+    auto &output = outputs->at(i);
+    if (output.Data() != nullptr || output.GetDeviceData() != nullptr) {
      /* user set graph-output-tensor from outside */
-      model_output[i].SetData(outputs->at(i).MutableData());
+      model_output[i].SetShape(output.Shape());
+      model_output[i].SetData(output.MutableData(), false);
+      model_output[i].SetDeviceData(output.GetDeviceData());
      model_output[i].SetAllocator(nullptr);
      need_copy_output = false;
    }
  }
  for (size_t i = 0; i < inputs.size(); i++) {
-    model_input[i].SetData(const_cast<MSTensor &>(inputs[i]).MutableData());
-    model_input[i].SetShape(inputs[i].Shape());
+    auto &input = inputs[i];
+    model_input[i].SetShape(input.Shape());
+    model_input[i].SetData(const_cast<MSTensor &>(input).MutableData(), false);
+    model_input[i].SetDeviceData(const_cast<MSTensor &>(input).GetDeviceData());
  }
  auto status = model_->Predict(model_input, &model_output, before, after);
  if (status != kSuccess) {
@ -232,6 +237,7 @@ Status ModelWorker::Predict(const std::vector<MSTensor> &inputs, std::vector<MST
    for (size_t i = 0; i < outputs->size(); i++) {
      outputs->at(i).SetShape(model_output[i].Shape());
      model_output[i].SetData(nullptr);
+      model_output[i].SetDeviceData(nullptr);
      model_output[i].SetAllocator(nullptr);
    }
  }
--- a/mindspore/lite/src/extendrt/cxx_api/types.cc
+++ b/mindspore/lite/src/extendrt/cxx_api/types.cc
@ -20,6 +20,11 @@
 #include "mindspore/core/ir/api_tensor_impl.h"
 #include "mindspore/core/utils/convert_utils_base.h"
 #include "utils/file_utils.h"
+#include "common/utils.h"
+#include "mindspore/core/ir/tensor.h"
+#include "runtime/device/device_address.h"
+#include "extendrt/utils/tensor_utils.h"
+#include "extendrt/utils/tensor_default_impl.h"

 namespace mindspore {
 class Buffer::Impl {
@ -71,105 +76,12 @@ class Buffer::Impl {
  std::vector<uint8_t> data_;
 };

-class MutableTensorImpl : public MSTensor::Impl {
- public:
-  MutableTensorImpl() = default;
-  MutableTensorImpl(const std::string &name, enum DataType type, const std::vector<int64_t> &shape)
-      : name_(name), type_(type), shape_(shape) {}
-
-  virtual void SetData(void *data) = 0;
-
-  void SetShape(const std::vector<int64_t> &shape) { shape_ = shape; }
-  void SetDataType(mindspore::DataType data_type) { type_ = data_type; }
-  void SetTensorName(const std::string &name) { name_ = name; }
-
-  mindspore::Format GetFormat() const { return format_; }
-  void SetFormat(mindspore::Format format) { format_ = format; }
-
-  const std::string &Name() const override { return name_; }
-  enum DataType DataType() const override { return type_; }
-  const std::vector<int64_t> &Shape() const override { return shape_; }
-
-  void SetAllocator(const std::shared_ptr<Allocator> &allocator) { allocator_ = allocator; }
-  std::shared_ptr<Allocator> GetAllocator() const { return allocator_; }
-
-  std::vector<QuantParam> QuantParams() const { return quant_param_; }
-
-  void SetQuantParams(const std::vector<QuantParam> &quant_param) { quant_param_ = quant_param; }
-
- protected:
-  std::string name_;
-  enum DataType type_ = DataType::kTypeUnknown;
-  enum Format format_ = mindspore::NCHW;
-  std::vector<int64_t> shape_;
-  std::shared_ptr<Allocator> allocator_ = nullptr;
-  std::vector<QuantParam> quant_param_;
-};
-
-class TensorDefaultImpl : public MutableTensorImpl {
- public:
-  TensorDefaultImpl() : buffer_() {}
-  ~TensorDefaultImpl() override = default;
-  TensorDefaultImpl(const std::string &name, enum DataType type, const std::vector<int64_t> &shape, const void *data,
-                    size_t data_len)
-      : MutableTensorImpl(name, type, shape), buffer_(data, data_len) {}
-
-  std::shared_ptr<const void> Data() const override {
-    return std::shared_ptr<const void>(buffer_.Data(), [](const void *) {});
-  }
-
-  void SetData(void *data) override {
-    auto data_len = buffer_.DataSize();
-    buffer_.SetData(data, data_len);
-  }
-
-  void *MutableData() override { return buffer_.MutableData(); }
-  size_t DataSize() const override { return buffer_.DataSize(); }
-
-  bool IsDevice() const override { return false; }
-
-  std::shared_ptr<Impl> Clone() const override {
-    return std::make_shared<TensorDefaultImpl>(name_, type_, shape_, buffer_.Data(), buffer_.DataSize());
-  }
-
- private:
-  Buffer buffer_;
-};
-
-class TensorReferenceImpl : public MutableTensorImpl {
- public:
-  TensorReferenceImpl() = default;
-  ~TensorReferenceImpl() override = default;
-  TensorReferenceImpl(const std::string &name, enum DataType type, const std::vector<int64_t> &shape, const void *data,
-                      size_t data_len, bool is_device)
-      : MutableTensorImpl(name, type, shape), data_(data), data_size_(data_len), is_device_(is_device) {}
-
-  std::shared_ptr<const void> Data() const override {
-    return std::shared_ptr<const void>(data_, [](const void *) {});
-  }
-
-  void SetData(void *data) override { data_ = data; }
-
-  void *MutableData() override { return const_cast<void *>(data_); }
-  size_t DataSize() const override { return data_size_; }
-
-  bool IsDevice() const override { return is_device_; }
-
-  std::shared_ptr<Impl> Clone() const override {
-    return std::make_shared<TensorReferenceImpl>(name_, type_, shape_, data_, data_size_, is_device_);
-  }
-
- protected:
-  const void *data_ = nullptr;
-  size_t data_size_ = 0;
-  bool is_device_ = false;
-};
-
 MSTensor *MSTensor::CreateTensor(const std::vector<char> &name, enum DataType type, const std::vector<int64_t> &shape,
                                 const void *data, size_t data_len) noexcept {
  std::string name_str = CharToString(name);
  try {
-    std::shared_ptr<Impl> impl = std::make_shared<TensorDefaultImpl>(name_str, type, shape, data, data_len);
+    std::shared_ptr<Impl> impl =
+      std::make_shared<TensorDefaultImpl>(name_str, type, shape, data, data_len, false, false);
    MSTensor *ret = new MSTensor(impl);
    return ret;
  } catch (const std::bad_alloc &) {
@ -182,10 +94,17 @@ MSTensor *MSTensor::CreateTensor(const std::vector<char> &name, enum DataType ty
 }

 MSTensor *MSTensor::CreateRefTensor(const std::vector<char> &name, enum DataType type,
-                                    const std::vector<int64_t> &shape, const void *data, size_t data_len) noexcept {
+                                    const std::vector<int64_t> &shape, const void *data, size_t data_len,
+                                    bool own_data) noexcept {
  std::string name_str = CharToString(name);
  try {
-    std::shared_ptr<Impl> impl = std::make_shared<TensorReferenceImpl>(name_str, type, shape, data, data_len, false);
+    std::shared_ptr<Impl> impl =
+      std::make_shared<TensorDefaultImpl>(name_str, type, shape, data, data_len, true, own_data);
+    if (data_len < impl->DataSize()) {
+      MS_LOG(ERROR) << "The size " << data_len << " of data cannot be less that the memory size required by the shape "
+                    << shape << " and data type " << TypeIdToString(static_cast<enum TypeId>(type));
+      return nullptr;
+    }
    MSTensor *ret = new MSTensor(impl);
    return ret;
  } catch (const std::bad_alloc &) {
@ -197,19 +116,24 @@ MSTensor *MSTensor::CreateRefTensor(const std::vector<char> &name, enum DataType
  }
 }

-MSTensor *MSTensor::CreateDevTensor(const std::vector<char> &name, enum DataType type,
-                                    const std::vector<int64_t> &shape, const void *data, size_t data_len) noexcept {
+MSTensor MSTensor::CreateDeviceTensor(const std::vector<char> &name, enum DataType type,
+                                      const std::vector<int64_t> &shape, void *data, size_t data_size) noexcept {
  std::string name_str = CharToString(name);
  try {
-    std::shared_ptr<Impl> impl = std::make_shared<TensorReferenceImpl>(name_str, type, shape, data, data_len, true);
-    MSTensor *ret = new MSTensor(impl);
-    return ret;
+    auto impl = std::make_shared<TensorDefaultImpl>(name_str, type, shape);
+    if (data_size < impl->DataSize()) {
+      MS_LOG(ERROR) << "The size " << data_size << " of data cannot be less that the memory size required by the shape "
+                    << shape << " and data type " << TypeIdToString(static_cast<enum TypeId>(type));
+      return MSTensor(nullptr);
+    }
+    impl->SetDeviceData(data);
+    return MSTensor(impl);
  } catch (const std::bad_alloc &) {
    MS_LOG(ERROR) << "Malloc memory failed.";
-    return nullptr;
+    return MSTensor(nullptr);
  } catch (...) {
    MS_LOG(ERROR) << "Unknown error occurred.";
-    return nullptr;
+    return MSTensor(nullptr);
  }
 }

@ -399,13 +323,17 @@ MSTensor::MSTensor(std::nullptr_t) : impl_(nullptr) {}
 MSTensor::MSTensor(const std::shared_ptr<Impl> &impl) : impl_(impl) { MS_EXCEPTION_IF_NULL(impl); }
 MSTensor::MSTensor(const std::vector<char> &name, enum DataType type, const std::vector<int64_t> &shape,
                   const void *data, size_t data_len)
-    : impl_(std::make_shared<TensorDefaultImpl>(CharToString(name), type, shape, data, data_len)) {}
+    : impl_(std::make_shared<TensorDefaultImpl>(CharToString(name), type, shape, data, data_len, false, false)) {}
 MSTensor::~MSTensor() = default;

 bool MSTensor::operator==(std::nullptr_t) const { return impl_ == nullptr; }

 bool MSTensor::operator!=(std::nullptr_t) const { return impl_ != nullptr; }

+bool MSTensor::operator==(const MSTensor &tensor) const { return impl_ == tensor.impl_; }
+
+bool MSTensor::operator!=(const MSTensor &tensor) const { return impl_ != tensor.impl_; }
+
 MSTensor *MSTensor::Clone() const {
  MS_EXCEPTION_IF_NULL(impl_);
  try {
@ -478,7 +406,7 @@ void MSTensor::SetDataType(enum DataType data_type) {

 void MSTensor::SetTensorName(const std::vector<char> &tensor_name) {
  MS_EXCEPTION_IF_NULL(impl_);
-  std::static_pointer_cast<MutableTensorImpl>(impl_)->SetTensorName(CharToString(tensor_name));
+  std::static_pointer_cast<MutableTensorImpl>(impl_)->SetName(CharToString(tensor_name));
 }

 void MSTensor::SetAllocator(std::shared_ptr<Allocator> allocator) {
@ -498,17 +426,27 @@ void MSTensor::SetFormat(mindspore::Format format) {

 mindspore::Format MSTensor::format() const {
  MS_EXCEPTION_IF_NULL(impl_);
-  return std::static_pointer_cast<MutableTensorImpl>(impl_)->GetFormat();
+  return std::static_pointer_cast<MutableTensorImpl>(impl_)->Format();
 }

-void MSTensor::SetData(void *data) {
+void MSTensor::SetData(void *data, bool own_data) {
  MS_EXCEPTION_IF_NULL(impl_);
-  std::static_pointer_cast<MutableTensorImpl>(impl_)->SetData(data);
+  std::static_pointer_cast<MutableTensorImpl>(impl_)->SetData(data, own_data);
+}
+
+void MSTensor::SetDeviceData(void *data) {
+  MS_EXCEPTION_IF_NULL(impl_);
+  std::static_pointer_cast<MutableTensorImpl>(impl_)->SetDeviceData(data);
+}
+
+void *MSTensor::GetDeviceData() {
+  MS_EXCEPTION_IF_NULL(impl_);
+  return std::static_pointer_cast<MutableTensorImpl>(impl_)->GetDeviceData();
 }

 std::vector<QuantParam> MSTensor::QuantParams() const {
  MS_EXCEPTION_IF_NULL(impl_);
-  return std::static_pointer_cast<MutableTensorImpl>(impl_)->QuantParams();
+  return std::static_pointer_cast<MutableTensorImpl>(impl_)->GetQuantParams();
 }

 void MSTensor::SetQuantParams(std::vector<QuantParam> quant_param) {
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/distribution/distribution_base.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/distribution/distribution_base.cc
@ -15,9 +15,10 @@
 */

 #include "src/extendrt/delegate/tensorrt/distribution/distribution_base.h"
+#include "src/extendrt/delegate/plugin/tensorrt_executor_plugin.h"

 namespace mindspore::lite {
-int GetGPUGroupSize() { return 1; }
+int GetGPUGroupSize() { return TensorRTPlugin::GetInstance().GetGPUGroupSize(); }

-int GetRankID() { return 0; }
+int GetRankID() { return TensorRTPlugin::GetInstance().GetRankID(); }
 }  // namespace mindspore::lite
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/tensor_info.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/tensor_info.cc
@ -38,13 +38,29 @@ class TensorInfoImpl {
        tensor_val_(tensor_val) {
    is_const_ = (data_ != nullptr);
    if (data_ == nullptr || data_len_ == 0) {
-      auto ele_num = std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies<int64_t>());
-      auto type_size = DataTypeSize(static_cast<enum TypeId>(dType_));
+      auto ele_num = ElementNum();
+      auto type_size = item_size();
      temp_data_.resize(ele_num * type_size);
      data_ = temp_data_.data();
      data_len_ = temp_data_.size();
    }
  }
+  void SetShape(const std::vector<int64_t> &shape) {
+    shape_ = shape;
+    auto new_elem_num = ElementNum();
+    auto type_size = item_size();
+    auto data_size = new_elem_num * type_size;
+    if (data_size != temp_data_.size() && data_ == temp_data_.data()) {
+      temp_data_.resize(data_size);
+      data_ = temp_data_.data();
+      data_len_ = data_size;
+    }
+  }
+
+  int64_t ElementNum() const { return std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies<int64_t>()); }
+
+  size_t item_size() const { return DataTypeSize(static_cast<enum TypeId>(dType_)); }
+
  std::string name_;
  mindspore::DataType dType_ = mindspore::DataType::kTypeUnknown;
  std::vector<int64_t> shape_;
@ -109,7 +125,7 @@ size_t TensorInfo::DataSize() const {
  if (impl_ == nullptr) {
    return 0;
  }
-  return impl_->data_len_;
+  return ElementNum() * item_size();
 }

 bool TensorInfo::IsConst() const {
@ -119,13 +135,18 @@ bool TensorInfo::IsConst() const {
  return impl_->is_const_ && impl_->data_ != nullptr;
 }

-size_t TensorInfo::item_size() const { return DataTypeSize(static_cast<enum TypeId>(DataType())); }
+size_t TensorInfo::item_size() const {
+  if (impl_ == nullptr) {
+    return 0;
+  }
+  return impl_->item_size();
+}

 void TensorInfo::SetShape(const std::vector<int64_t> &shape) {
  if (impl_ == nullptr) {
    return;
  }
-  impl_->shape_ = shape;
+  impl_->SetShape(shape);
 }

 void TensorInfo::SetData(const void *data, size_t data_len) {
@ -140,11 +161,7 @@ int64_t TensorInfo::ElementNum() const {
  if (impl_ == nullptr) {
    return 0;
  }
-  if (impl_->shape_.empty()) {
-    // element number of scalar is 1
-    return 1;
-  }
-  return std::accumulate(impl_->shape_.begin(), impl_->shape_.end(), 1, std::multiplies<int64_t>());
+  return impl_->ElementNum();
 }

 TensorInfo &TensorInfo::operator=(const TensorInfo &other) {
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_allocator.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_allocator.cc
@ -106,6 +106,33 @@ int TensorRTAllocator::SyncMemDeviceToHost(tensor::Tensor *host_tensor, const st
  return SyncMemInHostAndDevice(host_tensor->data_c(), device_tensor_name, host_tensor->Size(), false, sync);
 }

+int TensorRTAllocator::SyncMemDeviceToHost(void *dst_data, size_t data_size, const std::string &device_tensor_name) {
+  if (dst_data == nullptr) {
+    MS_LOG(ERROR) << " dst host data cannot be nullptr.";
+    return RET_ERROR;
+  }
+  auto it = cuda_tensor_map_.find(device_tensor_name);
+  if (it == cuda_tensor_map_.end()) {
+    MS_LOG(ERROR) << " cannot find device address " << device_tensor_name;
+    return RET_ERROR;
+  }
+  CudaTensorParam &current_cuda_tensor = it->second;
+  // is memcpy from device to host, the host mem is valid, change tag for mem pool.
+  current_cuda_tensor.is_valid_mem = true;
+  auto device_ptr = current_cuda_tensor.data;
+  if (device_ptr == nullptr) {
+    MS_LOG(ERROR) << "device_ptr is null for " << device_tensor_name;
+    return RET_ERROR;
+  }
+  auto cuda_ret = cudaMemcpy(dst_data, device_ptr, data_size, cudaMemcpyDeviceToHost);
+  if (cuda_ret != cudaSuccess) {
+    MS_LOG(ERROR) << "copy mem failed,ret " << cudaGetErrorName(cuda_ret);
+    return RET_ERROR;
+  }
+  MS_LOG(INFO) << "cuda memcpy success for " << device_tensor_name;
+  return RET_OK;
+}
+
 int TensorRTAllocator::SyncMemInHostAndDevice(tensor::Tensor *host_tensor, const std::string &device_tensor_name,
                                              bool is_host2device, bool sync) {
  if (host_tensor == NULL) {
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_allocator.h
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_allocator.h
@ -54,6 +54,7 @@ class TensorRTAllocator {

  int SyncMemHostToDevice(const tensor::Tensor &host_tensor, const std::string &device_tensor_name, bool sync = true);
  int SyncMemDeviceToHost(tensor::Tensor *host_tensor, const std::string &device_tensor_name, bool sync = true);
+  int SyncMemDeviceToHost(void *dst_data, size_t data_size, const std::string &device_tensor_name);

  int ClearDeviceMem();

--- a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_graph_executor.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_graph_executor.cc
@ -295,29 +295,19 @@ Status GetModelOutputsInfo(KernelGraphPtr kernel_graph, std::vector<NodeWithOutp
  MS_EXCEPTION_IF_NULL(kernel_graph);
  MS_EXCEPTION_IF_NULL(tensor_info_list_ptr);
  auto &tensor_info_list = *tensor_info_list_ptr;
-  auto kernel_graph_outputs = kernel_graph->outputs();
+  auto outputs = kernel_graph->outputs();
  // find parameters of graph inputs
-  for (size_t i = 0; i < kernel_graph_outputs.size(); ++i) {
-    auto output = kernel_graph_outputs[i];
+  for (size_t i = 0; i < outputs.size(); ++i) {
+    auto output = outputs[i];
    auto cur_abstract = output->abstract();
+    size_t output_num = 1;
    if (cur_abstract->isa<abstract::AbstractTuple>()) {
      auto abs_tuple = cur_abstract->Clone()->cast<abstract::AbstractTuplePtr>();
      MS_EXCEPTION_IF_NULL(abs_tuple);
-      size_t output_num = abs_tuple->elements().size();
-      for (size_t output_idx = 0; output_idx < output_num; ++output_idx) {
-        auto tensor_id = common::AnfAlgo::VisitKernelWithReturnType(output, output_idx);
-        auto it =
-          std::find_if(tensor_info_list.begin(), tensor_info_list.end(),
-                       [&tensor_id](const NodeWithOutputIndex &index) { return index.kernel_index == tensor_id; });
-        if (it != tensor_info_list.end()) {
-          output_tensors->push_back(it->tensor_info);
-        } else {
-          MS_LOG_ERROR << "Cannot find output tensor info " << tensor_id.first->fullname_with_scope();
-          return mindspore::kLiteError;
-        }
-      }
-    } else {
-      auto tensor_id = common::AnfAlgo::VisitKernelWithReturnType(output, 0);
+      output_num = abs_tuple->elements().size();
+    }
+    for (size_t output_idx = 0; output_idx < output_num; ++output_idx) {
+      auto tensor_id = common::AnfAlgo::VisitKernelWithReturnType(output, output_idx);
      auto it =
        std::find_if(tensor_info_list.begin(), tensor_info_list.end(),
                     [&tensor_id](const NodeWithOutputIndex &index) { return index.kernel_index == tensor_id; });
@ -443,12 +433,6 @@ Status TensorRTExecutor::BuildSubGraph(const KernelGraphPtr &kernel_graph) {
  if (status != kSuccess) {
    return status;
  }
-  auto build_trt_graph = [kernel_graph](const std::vector<TensorRTOp *> &tensorrt_ops) {
-    auto inputs = GraphInTensors<TensorRTOp>(tensorrt_ops);
-    auto outputs = GraphOutTensors<TensorRTOp>(tensorrt_ops);
-    auto ctx = TrtGraphContext{tensorrt_ops, inputs, outputs, nullptr};
-    return ctx;
-  };
  for (const auto &kernel_node : kernel_nodes) {
    auto node_name = kernel_node->fullname_with_scope();
    std::string kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
@ -468,11 +452,11 @@ Status TensorRTExecutor::BuildSubGraph(const KernelGraphPtr &kernel_graph) {
    tensorrt_op->SetRuntime(this->runtime_);
    tensorrt_ops.push_back(tensorrt_op);
  }
-  if (!tensorrt_ops.empty()) {
-    auto trt_ctx = build_trt_graph(tensorrt_ops);
-    tensorrt_ops.clear();
-    tensorrt_graph_list_.push_back(trt_ctx);
+  status = GetModelOutputsInfo(kernel_graph, &tensor_info_list, &outputs_);
+  if (status != kSuccess) {
+    return status;
  }
+  tensorrt_graph_list_.push_back(TrtGraphContext{tensorrt_ops, inputs_, outputs_, nullptr});
  status = UpdateTrtSubGraphInputsDepend();
  if (status != kSuccess) {
    return status;
@ -486,10 +470,6 @@ Status TensorRTExecutor::BuildSubGraph(const KernelGraphPtr &kernel_graph) {
      return mindspore::kLiteError;
    }
  }
-  status = GetModelOutputsInfo(kernel_graph, &tensor_info_list, &outputs_);
-  if (status != kSuccess) {
-    return status;
-  }
  return mindspore::kSuccess;
 }

@ -625,20 +605,27 @@ bool TensorRTExecutor::RunGraph(const FuncGraphPtr &graph, const std::vector<ten
    MS_LOG(ERROR) << "TensorRTGraph is nullptr.";
    return false;
  }
-  tensor_val_map_.clear();
  if (inputs.size() != inputs_.size()) {
-    MS_LOG(ERROR) << "Graph inputs size " << inputs_.size() << " != execute input size " << inputs.size();
+    MS_LOG(ERROR) << "Graph inputs size " << inputs_.size() << " != execute outputs size " << inputs.size();
    return false;
  }
+  if (!outputs->empty() && outputs_.size() != outputs->size()) {
+    MS_LOG(ERROR) << "Graph outputs size " << inputs_.size() << " != expected outputs size " << outputs->size();
+    return false;
+  }
+  if (tensorrt_graph_list_.size() == 1) {
+    return tensorrt_graph_list_[0].sub_graph->Execute(inputs, outputs) == RET_OK;
+  }
+  std::map<TensorInfo, std::shared_ptr<tensor::Tensor>> tensor_val_map;
  for (size_t i = 0; i < inputs.size(); i++) {
-    tensor_val_map_[inputs_[i]] = std::make_shared<tensor::Tensor>(inputs[i]);
+    tensor_val_map[inputs_[i]] = std::make_shared<tensor::Tensor>(inputs[i]);
  }
  for (auto &sub_graph : tensorrt_graph_list_) {
    std::vector<tensor::Tensor> sub_inputs;
    std::vector<tensor::Tensor> sub_outputs;
    for (auto &item : sub_graph.inputs) {
-      auto it = tensor_val_map_.find(item);
-      if (it == tensor_val_map_.end()) {
+      auto it = tensor_val_map.find(item);
+      if (it == tensor_val_map.end()) {
        MS_LOG(ERROR) << "Cannot find input tensor " << item.Name() << " in tensor val map";
        return false;
      }
@ -659,12 +646,13 @@ bool TensorRTExecutor::RunGraph(const FuncGraphPtr &graph, const std::vector<ten
      return false;
    }
    for (size_t i = 0; i < sub_graph.outputs.size(); i++) {
-      tensor_val_map_[sub_graph.outputs[i]] = std::make_shared<tensor::Tensor>(sub_outputs[i]);
+      tensor_val_map[sub_graph.outputs[i]] = std::make_shared<tensor::Tensor>(sub_outputs[i]);
    }
  }
+  outputs->clear();
  for (auto &item : outputs_) {
-    auto it = tensor_val_map_.find(item);
-    if (it == tensor_val_map_.end()) {
+    auto it = tensor_val_map.find(item);
+    if (it == tensor_val_map.end()) {
      MS_LOG(ERROR) << "Cannot find input tensor " << item.Name() << " in tensor val map";
      return false;
    }
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_graph_executor.h
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_graph_executor.h
@ -81,7 +81,6 @@ class TensorRTExecutor : public device::GraphExecutor {
  cudaStream_t stream_{nullptr};
  std::vector<kernel::Kernel> kernel_list_;

-  std::map<TensorInfo, std::shared_ptr<tensor::Tensor>> tensor_val_map_;
  std::vector<TrtGraphContext> tensorrt_graph_list_;

  std::vector<nvinfer1::Dims> min_dims_;
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_subgraph.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_subgraph.cc
@ -25,6 +25,7 @@
 #include <functional>
 #include <fstream>
 #include "src/extendrt/delegate/delegate_utils.h"
+#include "src/common/utils.h"

 #include "ops/transpose.h"
 #include "ops/reshape.h"
@ -482,24 +483,16 @@ int TensorRTSubGraph::Prepare() {
      return RET_ERROR;
    }
  }
-  for (auto tensor : outputs_) {
+  for (auto &tensor : outputs_) {
    int index = this->engine_->getBindingIndex(tensor.Name().c_str());
    auto out_dims = trt_context_->getBindingDimensions(index);
    int elem_num = std::accumulate(out_dims.d, out_dims.d + out_dims.nbDims, 1, std::multiplies<int>());
    DebugDims(out_dims);
-    std::map<enum DataType, size_t> TypeByte = {
-      {DataType::kTypeUnknown, 0},       {DataType::kObjectTypeString, 0},  {DataType::kNumberTypeBool, 1},
-      {DataType::kNumberTypeInt8, 1},    {DataType::kNumberTypeInt16, 2},   {DataType::kNumberTypeInt32, 4},
-      {DataType::kNumberTypeInt64, 8},   {DataType::kNumberTypeUInt8, 1},   {DataType::kNumberTypeUInt16, 2},
-      {DataType::kNumberTypeUInt32, 4},  {DataType::kNumberTypeUInt64, 8},  {DataType::kNumberTypeFloat16, 2},
-      {DataType::kNumberTypeFloat32, 4}, {DataType::kNumberTypeFloat64, 8},
-    };
-    if (tensor.Data() == nullptr) {
-      MS_LOG(INFO) << "Set output shape by tensorrt binding output";
-      tensor.SetShape(lite::ConvertMSShape(out_dims));
-      tensor.MutableData();
-    }
-    auto device_ptr = runtime_->GetAllocator()->MallocDeviceMem(tensor, elem_num * TypeByte[tensor.DataType()]);
+    auto new_shape = lite::ConvertMSShape(out_dims);
+    MS_LOG(INFO) << "Set output shape of " << tensor.Name() << " to " << new_shape << "  by tensorrt binding output";
+    tensor.SetShape(new_shape);
+    auto type_size = DataTypeSize(static_cast<enum TypeId>(tensor.DataType()));
+    auto device_ptr = runtime_->GetAllocator()->MallocDeviceMem(tensor, elem_num * type_size);
    if (device_ptr == nullptr) {
      MS_LOG(ERROR) << "malloc for outputs tensor device memory failed.";
      return RET_ERROR;
@ -510,37 +503,20 @@ int TensorRTSubGraph::Prepare() {
  return RET_OK;
 }

-int TensorRTSubGraph::ReSizeIfNeed(const std::vector<tensor::Tensor> &inputs) {
-  bool need_resize = false;
+int TensorRTSubGraph::OnNewInputShapes(const std::vector<tensor::Tensor> &inputs) {
  if (inputs_.size() != inputs.size()) {
    MS_LOG(ERROR) << "Graph inputs size " << inputs_.size() << " != resize input size " << inputs.size();
    return RET_ERROR;
  }
-  for (size_t i = 0; i < inputs_.size(); i++) {
-    if (inputs_[i].Shape() != inputs[i].shape()) {
-      need_resize = true;
-      break;
-    }
-  }
-  if (need_resize) {
-    return ReSize(inputs);
-  }
-  return RET_OK;
-}
-
-int TensorRTSubGraph::ReSize(const std::vector<tensor::Tensor> &inputs) {
-  if (inputs_.size() != inputs.size()) {
-    MS_LOG(ERROR) << "Graph inputs size " << inputs_.size() << " != resize input size " << inputs.size();
-    return RET_ERROR;
-  }
-  if (input_batchsize_index_ == -1) {
-    MS_LOG(ERROR) << "current network don't support resize.";
-    return RET_ERROR;
-  }
+  int batch_size = -1;
  for (size_t i = 0; i < trt_in_tensor_name_.size(); i++) {
    if (inputs_[i].Shape() == inputs[i].shape()) {
      continue;
    }
+    if (input_batchsize_index_ == -1) {
+      MS_LOG(ERROR) << "current network don't support resize.";
+      return RET_ERROR;
+    }
    inputs_[i].SetShape(inputs[i].shape());
    if (ctx_->network() != nullptr) {
      for (int j = 0; j < ctx_->network()->getNbInputs(); j++) {
@ -558,18 +534,16 @@ int TensorRTSubGraph::ReSize(const std::vector<tensor::Tensor> &inputs) {

    MS_LOG(INFO) << "resize at input_batch_index " << input_batchsize_index_ << ", update batch size to "
                 << inputs_[i].Shape()[input_batchsize_index_];
-    runtime_->SetBatchSize(inputs_[i].Shape()[input_batchsize_index_]);
-
-    // inputs_ is dupulated by mindrt, name is untustable.
-    auto device_ptr = runtime_->GetAllocator()->MallocDeviceMem(trt_in_tensor_name_[i], inputs_[i].DataSize(),
-                                                                ConvertDataType(inputs_[i].DataType()));
-    if (device_ptr == nullptr) {
-      MS_LOG(ERROR) << "realloc for input tensor device memory failed.";
+    int new_batch_size = inputs_[i].Shape()[input_batchsize_index_];
+    if (batch_size != -1 && batch_size != new_batch_size) {
+      MS_LOG(ERROR) << "Batch size " << batch_size << " of input 0 != batch size " << new_batch_size << " of input "
+                    << i;
      return RET_ERROR;
    }
+    batch_size = new_batch_size;
+    runtime_->SetBatchSize(batch_size);
+
    int index = this->engine_->getBindingIndex(trt_in_tensor_name_[i].c_str());
-    MS_LOG(INFO) << "device index " << index << " for tensor : " << trt_in_tensor_name_[i] << " attr: " << device_ptr;
-    tensor_bindings_[index] = device_ptr;
    // Set actual input size
    nvinfer1::Dims input_dims = ConvertCudaDims(inputs_[i].Shape());
    for (int od = 0; od < input_dims.nbDims; od++) {
@ -585,6 +559,140 @@ int TensorRTSubGraph::ReSize(const std::vector<tensor::Tensor> &inputs) {
    MS_LOG(ERROR) << "input dims need to be specified.";
    return RET_ERROR;
  }
+  if (batch_size != -1) {
+    for (size_t i = 0; i < trt_out_tensor_name_.size(); i++) {
+      int index = this->engine_->getBindingIndex(trt_out_tensor_name_[i].c_str());
+      auto out_dims = trt_context_->getBindingDimensions(index);
+      auto new_shape = lite::ConvertMSShape(out_dims);
+      MS_LOG(INFO) << "Set output shape of " << trt_out_tensor_name_[i] << " to " << new_shape
+                   << "  by tensorrt binding output";
+      outputs_[i].SetShape(new_shape);
+    }
+  }
+  return RET_OK;
+}
+
+int TensorRTSubGraph::PreExecute(const std::vector<tensor::Tensor> &inputs,
+                                 const std::vector<tensor::Tensor> &outputs) {
+  if (inputs_.size() != inputs.size()) {
+    MS_LOG(ERROR) << "Graph inputs size " << inputs_.size() << " != execute inputs size " << inputs.size();
+    return RET_ERROR;
+  }
+  if (!outputs.empty() && outputs.size() != outputs_.size()) {
+    MS_LOG(ERROR) << "Graph outputs size " << outputs_.size() << " != execute outputs size " << outputs.size();
+    return RET_ERROR;
+  }
+  auto ret = OnNewInputShapes(inputs);
+  if (ret != RET_OK) {
+    return ret;
+  }
+  for (size_t i = 0; i < trt_in_tensor_name_.size(); i++) {
+    auto trt_tensor_name = trt_in_tensor_name_[i];
+    void *device_ptr = nullptr;
+    auto input_device_address = inputs[i].device_address();
+    if (input_device_address != nullptr && input_device_address->GetMutablePtr() != nullptr) {
+      device_ptr = input_device_address->GetMutablePtr();
+    } else {
+      device_ptr = runtime_->GetAllocator()->MallocDeviceMem(trt_tensor_name, inputs_[i].DataSize(),
+                                                             ConvertDataType(inputs_[i].DataType()));
+      if (device_ptr == nullptr) {
+        MS_LOG(ERROR) << "realloc for input tensor device memory failed.";
+        return RET_ERROR;
+      }
+      ret = runtime_->GetAllocator()->SyncMemHostToDevice(inputs[i], trt_tensor_name);
+      if (ret != RET_OK) {
+        MS_LOG(ERROR) << "sync mem from host to device failed for " << trt_tensor_name;
+        return RET_ERROR;
+      }
+      runtime_->GetAllocator()->MarkMemValid(trt_tensor_name, true);
+    }
+    int index = this->engine_->getBindingIndex(trt_tensor_name.c_str());
+    MS_LOG(INFO) << "device index " << index << " for tensor : " << trt_tensor_name << " attr: " << device_ptr;
+    tensor_bindings_[index] = device_ptr;
+  }
+  for (size_t i = 0; i < trt_out_tensor_name_.size(); i++) {
+    const auto &trt_out_tensor_name = trt_out_tensor_name_[i];
+    int index = this->engine_->getBindingIndex(trt_out_tensor_name.c_str());
+    void *device_ptr = nullptr;
+    if (outputs.size() > i) {
+      auto &output = outputs[i];
+      if (output.device_address() && output.device_address()->GetMutablePtr()) {
+        device_ptr = output.device_address()->GetMutablePtr();
+      }
+    }
+    if (!device_ptr) {
+      device_ptr = runtime_->GetAllocator()->MallocDeviceMem(trt_out_tensor_name, outputs_[i].DataSize(),
+                                                             ConvertDataType(outputs_[i].DataType()));
+      if (device_ptr == nullptr) {
+        MS_LOG(ERROR) << "realloc for outputs tensor device memory failed.";
+        return RET_ERROR;
+      }
+    }
+    tensor_bindings_[index] = device_ptr;
+  }
+  return RET_OK;
+}
+
+int TensorRTSubGraph::PostExecute(std::vector<tensor::Tensor> *outputs) {
+  if (!outputs->empty() && outputs->size() != outputs_.size()) {
+    MS_LOG(ERROR) << "Graph outputs size " << outputs_.size() << " != execute outputs size " << outputs->size();
+    return RET_ERROR;
+  }
+  auto has_outputs = !outputs->empty();
+  for (size_t i = 0; i < trt_out_tensor_name_.size(); i++) {
+    const auto &trt_out_tensor_name = trt_out_tensor_name_[i];
+    int index = this->engine_->getBindingIndex(trt_out_tensor_name.c_str());
+    // actual output tensor dims
+    auto out_dims = this->trt_context_->getBindingDimensions(index);
+    std::vector<int64_t> new_shape = lite::ConvertMSShape(out_dims);
+    // batchsize resize need set new batch size
+    if (input_batchsize_index_ != -1) {
+      if (runtime_->GetBatchSize() != new_shape[output_batchsize_index_]) {
+        new_shape[output_batchsize_index_] = runtime_->GetBatchSize();
+      }
+    }
+    outputs_[i].SetShape(new_shape);
+    for (int od = 0; od < out_dims.nbDims; od++) {
+      MS_LOG(DEBUG) << "out tensor " << trt_out_tensor_name << " dims at " << od << " is " << new_shape[od];
+    }
+    runtime_->GetAllocator()->MarkMemValid(trt_out_tensor_name, true);
+    if (has_outputs) {
+      auto &tensor = outputs->at(i);
+      auto dst_device = tensor.device_address();
+      if (dst_device == nullptr || dst_device->GetMutablePtr() == nullptr) {
+        if (tensor.Size() < outputs_[i].DataSize()) {
+          MS_LOG(ERROR) << "Parameter output data size " << tensor.Size()
+                        << " cannot less than execute output data size " << outputs_[i].DataSize()
+                        << ", output shape: " << new_shape;
+          return RET_ERROR;
+        }
+        auto host_address = tensor.data_c();
+        if (host_address == nullptr) {
+          MS_LOG(ERROR) << "Specified output device or host address cannot be nullptr";
+          return RET_ERROR;
+        }
+        int sync_ret =
+          runtime_->GetAllocator()->SyncMemDeviceToHost(host_address, outputs_[i].DataSize(), trt_out_tensor_name);
+        if (sync_ret != RET_OK) {
+          MS_LOG(ERROR) << "sync mem from device to host failed for " << trt_out_tensor_name;
+          return sync_ret;
+        }
+      }
+    } else {
+      tensor::Tensor output_tensor(static_cast<enum TypeId>(outputs_[i].DataType()), new_shape);
+      int sync_ret = runtime_->GetAllocator()->SyncMemDeviceToHost(&output_tensor, trt_out_tensor_name);
+      if (sync_ret != RET_OK) {
+        MS_LOG(ERROR) << "sync mem from device to host failed for " << trt_out_tensor_name;
+        return sync_ret;
+      }
+      outputs->push_back(output_tensor);
+    }
+    runtime_->GetAllocator()->MarkMemValid(trt_out_tensor_name, false);
+  }
+  // make mem invalid, prepare for next execute
+  for (size_t i = 0; i < inputs_.size(); i++) {
+    runtime_->GetAllocator()->MarkMemValid(trt_in_tensor_name_[i], false);
+  }
  return RET_OK;
 }

@ -612,63 +720,20 @@ bool TensorRTSubGraph::ValidInputResizeDims(const nvinfer1::Dims &construct_dims
 }

 int TensorRTSubGraph::Execute(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) {
-  int ret = ReSizeIfNeed(inputs);
+  int ret = lite::SetCudaDevice(device_info_);
  if (ret != RET_OK) {
    return ret;
  }
-  ret = lite::SetCudaDevice(device_info_);
+  outputs->clear();
+  ret = PreExecute(inputs, *outputs);
  if (ret != RET_OK) {
    return ret;
  }
-  for (size_t i = 0; i < inputs.size(); i++) {
-    if (runtime_->GetAllocator()->GetMemIsValid(trt_in_tensor_name_[i])) {
-      MS_LOG(INFO) << "no need memcpy to cuda for input tensor: " << trt_in_tensor_name_[i];
-      continue;
-    }
-    ret = runtime_->GetAllocator()->SyncMemHostToDevice(inputs[i], trt_in_tensor_name_[i]);
-    if (ret != RET_OK) {
-      MS_LOG(ERROR) << "sync mem from host to device failed for " << trt_in_tensor_name_[i];
-      return ret;
-    }
-    runtime_->GetAllocator()->MarkMemValid(trt_in_tensor_name_[i], true);
-  }
-
  if (!this->trt_context_->executeV2(tensor_bindings_)) {
    MS_LOG(ERROR) << "TensorRT execute failed.";
    return RET_ERROR;
  }
-
-  for (size_t i = 0; i < trt_out_tensor_name_.size(); i++) {
-    int index = this->engine_->getBindingIndex(trt_out_tensor_name_[i].c_str());
-    // actual output tensor dims
-    auto out_dims = this->trt_context_->getBindingDimensions(index);
-    std::vector<int64_t> new_shape = lite::ConvertMSShape(out_dims);
-    // batchsize resize need set new batch size
-    if (input_batchsize_index_ != -1) {
-      if (runtime_->GetBatchSize() != new_shape[output_batchsize_index_]) {
-        new_shape[output_batchsize_index_] = runtime_->GetBatchSize();
-      }
-    }
-    for (int od = 0; od < out_dims.nbDims; od++) {
-      MS_LOG(DEBUG) << "out tensor " << trt_out_tensor_name_[i] << " dims at " << od << " is " << new_shape[od];
-    }
-    tensor::Tensor output_tensor(static_cast<enum TypeId>(outputs_[i].DataType()), new_shape);
-    outputs_[i].SetShape(new_shape);
-
-    runtime_->GetAllocator()->MarkMemValid(trt_out_tensor_name_[i], true);
-    int sync_ret = runtime_->GetAllocator()->SyncMemDeviceToHost(&output_tensor, trt_out_tensor_name_[i]);
-    if (sync_ret != RET_OK) {
-      MS_LOG(ERROR) << "sync mem from device to host failed for " << trt_out_tensor_name_[i];
-      return sync_ret;
-    }
-    runtime_->GetAllocator()->MarkMemValid(trt_out_tensor_name_[i], false);
-    outputs->push_back(output_tensor);
-  }
-  // make mem invalid, prepare for next execute
-  for (size_t i = 0; i < inputs_.size(); i++) {
-    runtime_->GetAllocator()->MarkMemValid(trt_in_tensor_name_[i], false);
-  }
-  return RET_OK;
+  return PostExecute(outputs);
 }

 ITensorHelper TensorRTSubGraph::FindTensorRTInputs(TensorRTOp *cur_op, const TensorInfo &in_tensor) {
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_subgraph.h
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_subgraph.h
@ -50,9 +50,6 @@ class TensorRTSubGraph {

  int Execute(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs);

-  int ReSizeIfNeed(const std::vector<tensor::Tensor> &inputs);
-  int ReSize(const std::vector<tensor::Tensor> &inputs);
-
  int BuildTensorRTGraph();

  int Init(cudaStream_t stream);
@ -92,6 +89,11 @@ class TensorRTSubGraph {

  bool ValidInputResizeDims(const nvinfer1::Dims &construct_dims, const std::vector<int64_t> &resize_input_shape);

+  int PreExecute(const std::vector<tensor::Tensor> &inputs, const std::vector<tensor::Tensor> &outputs);
+  int PostExecute(std::vector<tensor::Tensor> *outputs);
+
+  int OnNewInputShapes(const std::vector<tensor::Tensor> &inputs);
+
  std::string name_;
  std::vector<TensorInfo> inputs_;
  std::vector<TensorInfo> outputs_;
--- a/mindspore/lite/src/extendrt/infer_session.cc
+++ b/mindspore/lite/src/extendrt/infer_session.cc
@ -43,15 +43,15 @@ class DefaultInferSession : public InferSession {
  Status Init(const std::shared_ptr<Context> context) override;
  Status CompileGraph(FuncGraphPtr graph, const void *data = nullptr, size_t size = 0) override;
  Status RunGraph() override;
-  Status RunGraph(const std::vector<tensor::TensorPtr> &inputs, std::vector<tensor::TensorPtr> *outputs) override;
+  Status RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) override;
  Status Resize(const std::vector<tensor::TensorPtr> &inputs, const std::vector<std::vector<int64_t>> &dims) override;

-  std::vector<tensor::TensorPtr> GetOutputs() override;
-  std::vector<tensor::TensorPtr> GetInputs() override;
+  std::vector<MutableTensorImplPtr> GetOutputs() override;
+  std::vector<MutableTensorImplPtr> GetInputs() override;
  std::vector<std::string> GetOutputNames() override;
  std::vector<std::string> GetInputNames() override;
-  tensor::TensorPtr GetOutputByTensorName(const std::string &tensorName) override;
-  tensor::TensorPtr GetInputByTensorName(const std::string &name) override;
+  MutableTensorImplPtr GetOutputByTensorName(const std::string &tensorName) override;
+  MutableTensorImplPtr GetInputByTensorName(const std::string &name) override;

 private:
  KernelGraphUtilsPtr kernel_graph_utils_;
@ -71,20 +71,19 @@ Status DefaultInferSession::CompileGraph(FuncGraphPtr graph, const void *data, s
 }

 Status DefaultInferSession::RunGraph() { return kSuccess; }
-Status DefaultInferSession::RunGraph(const std::vector<tensor::TensorPtr> &inputs,
-                                     std::vector<tensor::TensorPtr> *outputs) {
+Status DefaultInferSession::RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) {
  return kSuccess;
 }
 Status DefaultInferSession::Resize(const std::vector<tensor::TensorPtr> &inputs,
                                   const std::vector<std::vector<int64_t>> &dims) {
  return kSuccess;
 }
-std::vector<tensor::TensorPtr> DefaultInferSession::GetOutputs() { return std::vector<tensor::TensorPtr>(); }
-std::vector<tensor::TensorPtr> DefaultInferSession::GetInputs() { return std::vector<tensor::TensorPtr>(); }
+std::vector<MutableTensorImplPtr> DefaultInferSession::GetOutputs() { return {}; }
+std::vector<MutableTensorImplPtr> DefaultInferSession::GetInputs() { return {}; }
 std::vector<std::string> DefaultInferSession::GetOutputNames() { return std::vector<std::string>(); }
 std::vector<std::string> DefaultInferSession::GetInputNames() { return std::vector<std::string>(); }
-tensor::TensorPtr DefaultInferSession::GetOutputByTensorName(const std::string &tensorName) { return nullptr; }
-tensor::TensorPtr DefaultInferSession::GetInputByTensorName(const std::string &name) { return nullptr; }
+MutableTensorImplPtr DefaultInferSession::GetOutputByTensorName(const std::string &tensorName) { return nullptr; }
+MutableTensorImplPtr DefaultInferSession::GetInputByTensorName(const std::string &name) { return nullptr; }
 std::shared_ptr<InferSession> InferSession::CreateSession(const std::shared_ptr<Context> context) {
  HandleGPUContext(context);
  auto config = SelectSessionArg(context);
--- a/mindspore/lite/src/extendrt/infer_session.h
+++ b/mindspore/lite/src/extendrt/infer_session.h
@ -27,6 +27,8 @@
 #include "ir/func_graph.h"
 #include "backend/graph_compiler/graph_partition.h"
 #include "extendrt/session/type.h"
+#include "common/mutable_tensor_impl.h"
+#include "extendrt/utils/kernel_graph_utils.h"

 namespace mindspore {
 class InferSession : public std::enable_shared_from_this<InferSession> {
@ -37,16 +39,16 @@ class InferSession : public std::enable_shared_from_this<InferSession> {
  virtual Status Init(const std::shared_ptr<Context> context) = 0;
  virtual Status CompileGraph(FuncGraphPtr graph, const void *data = nullptr, size_t size = 0) = 0;
  virtual Status RunGraph() = 0;
-  virtual Status RunGraph(const std::vector<tensor::TensorPtr> &inputs, std::vector<tensor::TensorPtr> *outputs) = 0;
+  virtual Status RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) = 0;
  virtual Status Resize(const std::vector<tensor::TensorPtr> &inputs,
                        const std::vector<std::vector<int64_t>> &dims) = 0;

-  virtual std::vector<tensor::TensorPtr> GetOutputs() = 0;
-  virtual std::vector<tensor::TensorPtr> GetInputs() = 0;
+  virtual std::vector<MutableTensorImplPtr> GetOutputs() = 0;
+  virtual std::vector<MutableTensorImplPtr> GetInputs() = 0;
  virtual std::vector<std::string> GetOutputNames() = 0;
  virtual std::vector<std::string> GetInputNames() = 0;
-  virtual tensor::TensorPtr GetOutputByTensorName(const std::string &tensorName) = 0;
-  virtual tensor::TensorPtr GetInputByTensorName(const std::string &name) = 0;
+  virtual MutableTensorImplPtr GetOutputByTensorName(const std::string &tensorName) = 0;
+  virtual MutableTensorImplPtr GetInputByTensorName(const std::string &name) = 0;

 protected:
  FuncGraphPtr graph_;
--- a/mindspore/lite/src/extendrt/session/delegate_session.cc
+++ b/mindspore/lite/src/extendrt/session/delegate_session.cc
@ -28,20 +28,19 @@ Status DelegateSession::Init(const std::shared_ptr<Context> context) { return kS
 Status DelegateSession::CompileGraph(FuncGraphPtr graph, const void *data, size_t size) { return kSuccess; }

 Status DelegateSession::RunGraph() { return kSuccess; }
-Status DelegateSession::RunGraph(const std::vector<tensor::TensorPtr> &inputs,
-                                 std::vector<tensor::TensorPtr> *outputs) {
+Status DelegateSession::RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) {
  return kSuccess;
 }
 Status DelegateSession::Resize(const std::vector<tensor::TensorPtr> &inputs,
                               const std::vector<std::vector<int64_t>> &dims) {
  return kSuccess;
 }
-std::vector<tensor::TensorPtr> DelegateSession::GetOutputs() { return std::vector<tensor::TensorPtr>(); }
-std::vector<tensor::TensorPtr> DelegateSession::GetInputs() { return std::vector<tensor::TensorPtr>(); }
+std::vector<MutableTensorImplPtr> DelegateSession::GetOutputs() { return {}; }
+std::vector<MutableTensorImplPtr> DelegateSession::GetInputs() { return {}; }
 std::vector<std::string> DelegateSession::GetOutputNames() { return std::vector<std::string>(); }
 std::vector<std::string> DelegateSession::GetInputNames() { return std::vector<std::string>(); }
-tensor::TensorPtr DelegateSession::GetOutputByTensorName(const std::string &tensorName) { return nullptr; }
-tensor::TensorPtr DelegateSession::GetInputByTensorName(const std::string &name) { return nullptr; }
+MutableTensorImplPtr DelegateSession::GetOutputByTensorName(const std::string &tensorName) { return nullptr; }
+MutableTensorImplPtr DelegateSession::GetInputByTensorName(const std::string &name) { return nullptr; }

 static std::shared_ptr<InferSession> DelegateSessionCreator(const SessionConfig &config) {
  auto delegates = config.delegates_;
--- a/mindspore/lite/src/extendrt/session/delegate_session.h
+++ b/mindspore/lite/src/extendrt/session/delegate_session.h
@ -32,15 +32,15 @@ class DelegateSession : public InferSession {
  Status Init(const std::shared_ptr<Context> context) override;
  Status CompileGraph(FuncGraphPtr graph, const void *data = nullptr, size_t size = 0) override;
  Status RunGraph() override;
-  Status RunGraph(const std::vector<tensor::TensorPtr> &inputs, std::vector<tensor::TensorPtr> *outputs) override;
+  Status RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) override;
  Status Resize(const std::vector<tensor::TensorPtr> &inputs, const std::vector<std::vector<int64_t>> &dims) override;

-  std::vector<tensor::TensorPtr> GetOutputs() override;
-  std::vector<tensor::TensorPtr> GetInputs() override;
+  std::vector<MutableTensorImplPtr> GetOutputs() override;
+  std::vector<MutableTensorImplPtr> GetInputs() override;
  std::vector<std::string> GetOutputNames() override;
  std::vector<std::string> GetInputNames() override;
-  tensor::TensorPtr GetOutputByTensorName(const std::string &tensorName) override;
-  tensor::TensorPtr GetInputByTensorName(const std::string &name) override;
+  MutableTensorImplPtr GetOutputByTensorName(const std::string &tensorName) override;
+  MutableTensorImplPtr GetInputByTensorName(const std::string &name) override;

 private:
  std::shared_ptr<mindspore::Delegate> delegate_;
--- a/mindspore/lite/src/extendrt/session/graph_executor_session.cc
+++ b/mindspore/lite/src/extendrt/session/graph_executor_session.cc
@ -19,8 +19,8 @@
 #include <memory>

 #include "extendrt/session/graph_executor_session.h"
-#include "extendrt/utils/tensor_utils.h"
 #include "src/extendrt/utils/kernel_build_utils.h"
+#include "extendrt/utils/tensor_default_impl.h"

 namespace mindspore {
 Status GraphExecutorSession::Init(const std::shared_ptr<Context> context) {
@ -38,39 +38,57 @@ Status GraphExecutorSession::CompileGraph(FuncGraphPtr graph, const void *data,
  for (const auto &kernel_node : kernel_nodes) {
    mindspore::infer::SetKernelInfo(kernel_node);
  }
-  if (graph_executor_->CompileGraph(kernel_graph_, options_)) {
-    kernel_graph_utils_->GetModelInputsInfo(kernel_graph_->graph_id(), &inputs_, &input_names_);
-    kernel_graph_utils_->GetModelOutputsInfo(kernel_graph_->graph_id(), &outputs_, &output_names_);
-    return kSuccess;
+  if (!graph_executor_->CompileGraph(kernel_graph_, options_)) {
+    return kCoreFailed;
  }
-  return kCoreFailed;
+  std::vector<tensor::TensorPtr> graph_inputs, graph_outputs;
+  kernel_graph_utils_->GetModelInputsInfo(kernel_graph_->graph_id(), &graph_inputs, &input_names_);
+  kernel_graph_utils_->GetModelOutputsInfo(kernel_graph_->graph_id(), &graph_outputs, &output_names_);
+  if (graph_inputs.size() != input_names_.size()) {
+    MS_LOG(ERROR) << "Graph input size " << graph_inputs.size() << " != input names size " << input_names_.size();
+    return kCoreFailed;
+  }
+  if (graph_outputs.size() != output_names_.size()) {
+    MS_LOG(ERROR) << "Graph output size " << graph_outputs.size() << " != output names size " << output_names_.size();
+    return kCoreFailed;
+  }
+  for (size_t i = 0; i < input_names_.size(); i++) {
+    auto &input = graph_inputs[i];
+    auto data_type = static_cast<enum DataType>(input->data_type());
+    auto impl = std::make_shared<TensorDefaultImpl>(input_names_[i], data_type, input->shape_c());
+    inputs_.push_back(impl);
+  }
+  for (size_t i = 0; i < output_names_.size(); i++) {
+    auto &output = graph_outputs[i];
+    auto data_type = static_cast<enum DataType>(output->data_type());
+    auto impl = std::make_shared<TensorDefaultImpl>(output_names_[i], data_type, output->shape_c());
+    outputs_.push_back(impl);
+  }
+  return kSuccess;
 }

 Status GraphExecutorSession::RunGraph() { return kSuccess; }
-Status GraphExecutorSession::RunGraph(const std::vector<tensor::TensorPtr> &inputs,
-                                      std::vector<tensor::TensorPtr> *outputs) {
+
+Status GraphExecutorSession::RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) {
  MS_LOG(INFO) << "GraphExecutorSession::RunGraph";
+  MS_EXCEPTION_IF_NULL(graph_executor_);
  MS_EXCEPTION_IF_NULL(outputs);
-  std::vector<tensor::Tensor> executor_inputs, executor_outputs;
-  executor_inputs = TensorUtils::TensorPtrToTensor(inputs);
-  auto ret = graph_executor_->RunGraph(kernel_graph_, executor_inputs, &executor_outputs, options_);
+  auto ret = graph_executor_->RunGraph(kernel_graph_, inputs, outputs, options_);
  if (!ret) {
    return kCoreFailed;
  }
-  *outputs = TensorUtils::TensorToTensorPtr(executor_outputs);
-  inputs_ = inputs;
-  outputs_ = *outputs;
  return kSuccess;
 }
+
 Status GraphExecutorSession::Resize(const std::vector<tensor::TensorPtr> &inputs,
                                    const std::vector<std::vector<int64_t>> &dims) {
  return kSuccess;
 }
-std::vector<tensor::TensorPtr> GraphExecutorSession::GetOutputs() { return outputs_; }
-std::vector<tensor::TensorPtr> GraphExecutorSession::GetInputs() { return inputs_; }
+std::vector<MutableTensorImplPtr> GraphExecutorSession::GetOutputs() { return outputs_; }
+std::vector<MutableTensorImplPtr> GraphExecutorSession::GetInputs() { return inputs_; }
 std::vector<std::string> GraphExecutorSession::GetOutputNames() { return output_names_; }
 std::vector<std::string> GraphExecutorSession::GetInputNames() { return input_names_; }
-tensor::TensorPtr GraphExecutorSession::GetOutputByTensorName(const std::string &tensorName) {
+MutableTensorImplPtr GraphExecutorSession::GetOutputByTensorName(const std::string &tensorName) {
  for (size_t i = 0; i < output_names_.size(); i++) {
    if (output_names_[i] == tensorName) {
      return outputs_[i];
@ -78,7 +96,7 @@ tensor::TensorPtr GraphExecutorSession::GetOutputByTensorName(const std::string
  }
  return nullptr;
 }
-tensor::TensorPtr GraphExecutorSession::GetInputByTensorName(const std::string &name) {
+MutableTensorImplPtr GraphExecutorSession::GetInputByTensorName(const std::string &name) {
  for (size_t i = 0; i < input_names_.size(); i++) {
    if (input_names_[i] == name) {
      return inputs_[i];
--- a/mindspore/lite/src/extendrt/session/graph_executor_session.h
+++ b/mindspore/lite/src/extendrt/session/graph_executor_session.h
@ -36,24 +36,24 @@ class GraphExecutorSession : public DelegateSession {
  Status Init(const std::shared_ptr<Context> context) override;
  Status CompileGraph(FuncGraphPtr graph, const void *data = nullptr, size_t size = 0) override;
  Status RunGraph() override;
-  Status RunGraph(const std::vector<tensor::TensorPtr> &inputs, std::vector<tensor::TensorPtr> *outputs) override;
+  Status RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) override;
  Status Resize(const std::vector<tensor::TensorPtr> &inputs, const std::vector<std::vector<int64_t>> &dims) override;

-  std::vector<tensor::TensorPtr> GetOutputs() override;
-  std::vector<tensor::TensorPtr> GetInputs() override;
+  std::vector<MutableTensorImplPtr> GetOutputs() override;
+  std::vector<MutableTensorImplPtr> GetInputs() override;
  std::vector<std::string> GetOutputNames() override;
  std::vector<std::string> GetInputNames() override;
-  tensor::TensorPtr GetOutputByTensorName(const std::string &tensorName) override;
-  tensor::TensorPtr GetInputByTensorName(const std::string &name) override;
+  MutableTensorImplPtr GetOutputByTensorName(const std::string &tensorName) override;
+  MutableTensorImplPtr GetInputByTensorName(const std::string &name) override;

 private:
  std::shared_ptr<mindspore::device::GraphExecutor> graph_executor_;
  std::map<std::string, std::string> options_;
  KernelGraphUtilsPtr kernel_graph_utils_;
  KernelGraphPtr kernel_graph_;
-  std::vector<tensor::TensorPtr> inputs_;
+  std::vector<MutableTensorImplPtr> inputs_;
  std::vector<std::string> input_names_;
-  std::vector<tensor::TensorPtr> outputs_;
+  std::vector<MutableTensorImplPtr> outputs_;
  std::vector<std::string> output_names_;
 };
 }  // namespace mindspore
--- a/mindspore/lite/src/extendrt/session/lite_infer_session.cc
+++ b/mindspore/lite/src/extendrt/session/lite_infer_session.cc
@ -129,8 +129,7 @@ Status LiteInferSession::RunGraph() {
  auto ret = lite_session_->RunGraph();
  return static_cast<StatusCode>(ret);
 }
-Status LiteInferSession::RunGraph(const std::vector<tensor::TensorPtr> &inputs,
-                                  std::vector<tensor::TensorPtr> *outputs) {
+Status LiteInferSession::RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) {
  MS_LOG(INFO) << "SingleOpInferSession::RunGraph with input and outputs";
  MS_EXCEPTION_IF_NULL(outputs);
  MS_EXCEPTION_IF_NULL(lite_session_);
@ -145,7 +144,7 @@ Status LiteInferSession::RunGraph(const std::vector<tensor::TensorPtr> &inputs,
  std::vector<void *> old_data;
  for (size_t i = 0; i < inputs.size(); i++) {
    auto input = input_tensors.at(i);
-    auto user_input = inputs.at(i);
+    auto user_input = &inputs[i];
    if (user_input->data_type() != input->data_type()) {
      ResetTensorData(old_data, input_tensors);
      MS_LOG(EXCEPTION) << "Tensor " << user_input->id() << " has a different data type from input"
@ -200,7 +199,7 @@ Status LiteInferSession::RunGraph(const std::vector<tensor::TensorPtr> &inputs,
    return kLiteError;
  }
  outputs->clear();
-  *outputs = TensorUtils::MSTensorToTensorPtr(res);
+  *outputs = TensorUtils::MSTensorToTensor(res);
  return kSuccess;
 }
 Status LiteInferSession::Resize(const std::vector<tensor::TensorPtr> &inputs,
@ -208,37 +207,23 @@ Status LiteInferSession::Resize(const std::vector<tensor::TensorPtr> &inputs,
  return kSuccess;
 }

-std::vector<tensor::TensorPtr> LiteInferSession::GetOutputs() {
+std::vector<MutableTensorImplPtr> LiteInferSession::GetOutputs() {
  auto outputs = lite_session_->GetOutputs();
-  std::vector<tensor::TensorPtr> output_tensors;
+  std::vector<MutableTensorImplPtr> output_tensors;
  for (auto &iter : outputs) {
    auto output = iter.second;
-    auto type_id = output->data_type();
-    auto shape = output->shape();
-    ShapeVector shape_vec;
-    std::transform(shape.begin(), shape.end(), std::back_inserter(shape_vec),
-                   [](int s) { return static_cast<int64_t>(s); });
-    auto data = output->data();
-    auto data_size = output->Size();
-    auto tensor_ptr = std::make_shared<mindspore::tensor::Tensor>(type_id, shape_vec, data, data_size);
-    output_tensors.emplace_back(tensor_ptr);
+    auto impl = std::make_shared<LiteTensorImpl>(output);
+    output_tensors.emplace_back(impl);
  }
  return output_tensors;
 }

-std::vector<tensor::TensorPtr> LiteInferSession::GetInputs() {
+std::vector<MutableTensorImplPtr> LiteInferSession::GetInputs() {
  auto inputs = lite_session_->GetInputs();
-  std::vector<tensor::TensorPtr> input_tensors;
+  std::vector<MutableTensorImplPtr> input_tensors;
  for (auto &input : inputs) {
-    auto type_id = input->data_type();
-    auto shape = input->shape();
-    ShapeVector shape_vec;
-    std::transform(shape.begin(), shape.end(), std::back_inserter(shape_vec),
-                   [](int s) { return static_cast<int64_t>(s); });
-    auto data = input->data();
-    auto data_size = input->Size();
-    auto tensor_ptr = std::make_shared<mindspore::tensor::Tensor>(type_id, shape_vec, data, data_size);
-    input_tensors.emplace_back(tensor_ptr);
+    auto impl = std::make_shared<LiteTensorImpl>(input);
+    input_tensors.emplace_back(impl);
  }
  return input_tensors;
 }
@ -252,8 +237,26 @@ std::vector<std::string> LiteInferSession::GetOutputNames() {
 }

 std::vector<std::string> LiteInferSession::GetInputNames() { return ConvertToTensorNames(lite_session_->GetInputs()); }
-tensor::TensorPtr LiteInferSession::GetOutputByTensorName(const std::string &tensorName) { return nullptr; }
-tensor::TensorPtr LiteInferSession::GetInputByTensorName(const std::string &name) { return nullptr; }
+MutableTensorImplPtr LiteInferSession::GetOutputByTensorName(const std::string &name) {
+  auto outputs = lite_session_->GetOutputs();
+  for (auto &iter : outputs) {
+    auto output = iter.second;
+    if (output->tensor_name() == name) {
+      return std::make_shared<LiteTensorImpl>(output);
+    }
+  }
+  return nullptr;
+}
+
+MutableTensorImplPtr LiteInferSession::GetInputByTensorName(const std::string &name) {
+  auto inputs = lite_session_->GetInputs();
+  for (auto &input : inputs) {
+    if (input->tensor_name() == name) {
+      return std::make_shared<LiteTensorImpl>(input);
+    }
+  }
+  return nullptr;
+}

 std::shared_ptr<lite::LiteSession> LiteInferSession::CreateLiteSession(lite::InnerContext *context) {
  auto session = std::make_shared<lite::LiteSession>();
--- a/mindspore/lite/src/extendrt/session/lite_infer_session.h
+++ b/mindspore/lite/src/extendrt/session/lite_infer_session.h
@ -32,15 +32,15 @@ class LiteInferSession : public InferSession {
  Status Init(const std::shared_ptr<Context> context) override;
  Status CompileGraph(FuncGraphPtr graph, const void *data = nullptr, size_t size = 0) override;
  Status RunGraph() override;
-  Status RunGraph(const std::vector<tensor::TensorPtr> &inputs, std::vector<tensor::TensorPtr> *outputs) override;
+  Status RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) override;
  Status Resize(const std::vector<tensor::TensorPtr> &inputs, const std::vector<std::vector<int64_t>> &dims) override;

-  std::vector<tensor::TensorPtr> GetOutputs() override;
-  std::vector<tensor::TensorPtr> GetInputs() override;
+  std::vector<MutableTensorImplPtr> GetOutputs() override;
+  std::vector<MutableTensorImplPtr> GetInputs() override;
  std::vector<std::string> GetOutputNames() override;
  std::vector<std::string> GetInputNames() override;
-  tensor::TensorPtr GetOutputByTensorName(const std::string &tensorName) override;
-  tensor::TensorPtr GetInputByTensorName(const std::string &name) override;
+  MutableTensorImplPtr GetOutputByTensorName(const std::string &tensorName) override;
+  MutableTensorImplPtr GetInputByTensorName(const std::string &name) override;

 private:
  std::shared_ptr<lite::LiteSession> CreateLiteSession(lite::InnerContext *context);
@ -54,10 +54,6 @@ class LiteInferSession : public InferSession {
 private:
  std::shared_ptr<lite::LiteSession> lite_session_;
  std::shared_ptr<Context> context_;
-  std::vector<tensor::TensorPtr> inputs_;
-  std::vector<std::string> input_names_;
-  std::vector<tensor::TensorPtr> outputs_;
-  std::vector<std::string> output_names_;
 };
 }  // namespace mindspore

--- a/mindspore/lite/src/extendrt/single_op_session.cc
+++ b/mindspore/lite/src/extendrt/single_op_session.cc
@ -31,6 +31,7 @@
 #include "src/extendrt/kernel/ascend/plugin/ascend_kernel_plugin.h"
 #include "extendrt/session/factory.h"
 #include "extendrt/utils/runtime_utils.h"
+#include "extendrt/utils/tensor_default_impl.h"

 namespace mindspore {
 const size_t tensor_max_size = 0x1000000;
@ -131,15 +132,34 @@ Status SingleOpInferSession::CompileGraph(FuncGraphPtr graph, const void *data,

  RuntimeUtils::AssignKernelGraphAddress(kernel_graph_);

-  kernel_graph_utils_->GetModelInputsInfo(kernel_graph_->graph_id(), &inputs_, &input_names_);
-  kernel_graph_utils_->GetModelOutputsInfo(kernel_graph_->graph_id(), &outputs_, &output_names_);
-
+  std::vector<tensor::TensorPtr> graph_inputs, graph_outputs;
+  kernel_graph_utils_->GetModelInputsInfo(kernel_graph_->graph_id(), &graph_inputs, &input_names_);
+  kernel_graph_utils_->GetModelOutputsInfo(kernel_graph_->graph_id(), &graph_outputs, &output_names_);
+  if (graph_inputs.size() != input_names_.size()) {
+    MS_LOG(ERROR) << "Graph input size " << graph_inputs.size() << " != input names size " << input_names_.size();
+    return kCoreFailed;
+  }
+  if (graph_outputs.size() != output_names_.size()) {
+    MS_LOG(ERROR) << "Graph output size " << graph_outputs.size() << " != output names size " << output_names_.size();
+    return kCoreFailed;
+  }
+  for (size_t i = 0; i < input_names_.size(); i++) {
+    auto &input = graph_inputs[i];
+    auto data_type = static_cast<enum DataType>(input->data_type());
+    auto impl = std::make_shared<TensorDefaultImpl>(input_names_[i], data_type, input->shape_c());
+    inputs_.push_back(impl);
+  }
+  for (size_t i = 0; i < output_names_.size(); i++) {
+    auto &output = graph_outputs[i];
+    auto data_type = static_cast<enum DataType>(output->data_type());
+    auto impl = std::make_shared<TensorDefaultImpl>(output_names_[i], data_type, output->shape_c());
+    outputs_.push_back(impl);
+  }
  return kSuccess;
 }

 Status SingleOpInferSession::RunGraph() { return kSuccess; }
-Status SingleOpInferSession::RunGraph(const std::vector<tensor::TensorPtr> &inputs,
-                                      std::vector<tensor::TensorPtr> *outputs) {
+Status SingleOpInferSession::RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) {
  MS_LOG(INFO) << "SingleOpInferSession::RunGraph with input and outputs";
  MS_EXCEPTION_IF_NULL(kernel_graph_);

@ -179,8 +199,6 @@ Status SingleOpInferSession::RunGraph(const std::vector<tensor::TensorPtr> &inpu
  }

  RuntimeUtils::CopyOutputTensorsFromKernelGraph(outputs, kernel_graph_);
-  outputs_ = *outputs;
-
  return kSuccess;
 }

@ -217,7 +235,7 @@ Status SingleOpInferSession::ResizeGraphInputs(const std::vector<tensor::TensorP
      graph_input_addr->SetSize(tensor_size);
    }
    // update input shape
-    inputs_[i]->set_shape(dims[i]);
+    inputs_[i]->SetShape(dims[i]);
    auto abstract = std::make_shared<abstract::AbstractTensor>(TypeIdToType(type_id), dims[i]);
    if (abstract == nullptr) {
      MS_LOG(ERROR) << "Abstract is nullptr.";
@ -248,13 +266,12 @@ Status SingleOpInferSession::Resize(const std::vector<tensor::TensorPtr> &inputs
  }
  return kSuccess;
 }
-
-std::vector<tensor::TensorPtr> SingleOpInferSession::GetOutputs() { return outputs_; }
-std::vector<tensor::TensorPtr> SingleOpInferSession::GetInputs() { return inputs_; }
+std::vector<MutableTensorImplPtr> SingleOpInferSession::GetOutputs() { return outputs_; }
+std::vector<MutableTensorImplPtr> SingleOpInferSession::GetInputs() { return inputs_; }
 std::vector<std::string> SingleOpInferSession::GetOutputNames() { return output_names_; }
 std::vector<std::string> SingleOpInferSession::GetInputNames() { return input_names_; }

-tensor::TensorPtr SingleOpInferSession::GetOutputByTensorName(const std::string &tensor_name) {
+MutableTensorImplPtr SingleOpInferSession::GetOutputByTensorName(const std::string &tensor_name) {
  for (size_t idx = 0; idx < output_names_.size(); ++idx) {
    if (output_names_[idx] == tensor_name) {
      if (idx < outputs_.size()) {
@ -266,7 +283,7 @@ tensor::TensorPtr SingleOpInferSession::GetOutputByTensorName(const std::string
  return nullptr;
 }

-tensor::TensorPtr SingleOpInferSession::GetInputByTensorName(const std::string &tensor_name) {
+MutableTensorImplPtr SingleOpInferSession::GetInputByTensorName(const std::string &tensor_name) {
  for (size_t idx = 0; idx < input_names_.size(); ++idx) {
    if (input_names_[idx] == tensor_name) {
      if (idx < inputs_.size()) {
--- a/mindspore/lite/src/extendrt/single_op_session.h
+++ b/mindspore/lite/src/extendrt/single_op_session.h
@ -32,24 +32,24 @@ class SingleOpInferSession : public InferSession {
  Status AscendInit(const std::shared_ptr<Context> &context);
  Status CompileGraph(FuncGraphPtr graph, const void *data = nullptr, size_t size = 0) override;
  Status RunGraph() override;
-  Status RunGraph(const std::vector<tensor::TensorPtr> &inputs, std::vector<tensor::TensorPtr> *outputs) override;
+  Status RunGraph(const std::vector<tensor::Tensor> &inputs, std::vector<tensor::Tensor> *outputs) override;
  Status Resize(const std::vector<tensor::TensorPtr> &inputs, const std::vector<std::vector<int64_t>> &dims) override;

-  std::vector<tensor::TensorPtr> GetOutputs() override;
-  std::vector<tensor::TensorPtr> GetInputs() override;
+  std::vector<MutableTensorImplPtr> GetOutputs() override;
+  std::vector<MutableTensorImplPtr> GetInputs() override;
  std::vector<std::string> GetOutputNames() override;
  std::vector<std::string> GetInputNames() override;
-  tensor::TensorPtr GetOutputByTensorName(const std::string &tensorName) override;
-  tensor::TensorPtr GetInputByTensorName(const std::string &name) override;
+  MutableTensorImplPtr GetOutputByTensorName(const std::string &tensorName) override;
+  MutableTensorImplPtr GetInputByTensorName(const std::string &name) override;

 private:
  Status ResizeGraphInputs(const std::vector<tensor::TensorPtr> &inputs, const std::vector<std::vector<int64_t>> &dims);

  KernelGraphUtilsPtr kernel_graph_utils_;
  KernelGraphPtr kernel_graph_;
-  std::vector<tensor::TensorPtr> inputs_;
+  std::vector<MutableTensorImplPtr> inputs_;
  std::vector<std::string> input_names_;
-  std::vector<tensor::TensorPtr> outputs_;
+  std::vector<MutableTensorImplPtr> outputs_;
  std::vector<std::string> output_names_;
  uint32_t device_id_ = 0;
 };
--- a/mindspore/lite/src/extendrt/utils/kernel_graph_utils.cc
+++ b/mindspore/lite/src/extendrt/utils/kernel_graph_utils.cc
@ -914,9 +914,10 @@ void KernelGraphUtils::GetModelInputsInfo(uint32_t graph_id, std::vector<tensor:
      auto kernel_build_info = AnfAlgo::GetSelectKernelBuildInfo(parameter);
      auto data_type = kernel_build_info->GetOutputDeviceType(0);
      auto ms_tensor = std::make_shared<tensor::Tensor>(data_type, input_shape);
-      inputs->push_back(ms_tensor);
      auto abstract = parameter->abstract();
      MS_EXCEPTION_IF_NULL(abstract);
+      ms_tensor->set_name(abstract->name());
+      inputs->push_back(ms_tensor);
      inputs_name->push_back(abstract->name());
    }
  }
@ -972,6 +973,12 @@ void KernelGraphUtils::GetModelOutputsInfo(uint32_t graph_id, std::vector<tensor
  }
  *outputs = TransformVectorRefToMultiTensor(vector_outputs);
  GetOutputNames(anf_outputs, output_names);
+  if (outputs->size() != output_names->size()) {
+    MS_LOG_EXCEPTION << "Output tensor size " << outputs->size() << " != output name size " << output_names->size();
+  }
+  for (size_t i = 0; i < outputs->size(); i++) {
+    outputs->at(i)->set_name(output_names->at(i));
+  }
 }

 CNodePtr KernelGraphUtils::CreateNewCNode(const CNodePtr &cnode, KernelGraphPtr graph,
--- a/mindspore/lite/src/extendrt/utils/runtime_utils.cc
+++ b/mindspore/lite/src/extendrt/utils/runtime_utils.cc
@ -64,7 +64,7 @@ std::vector<AnfNodePtr> RuntimeUtils::GetGraphDataInputs(const KernelGraphPtr &k
  return data_inputs;
 }

-void RuntimeUtils::CopyInputTensorsToKernelGraph(const std::vector<tensor::TensorPtr> &inputs,
+void RuntimeUtils::CopyInputTensorsToKernelGraph(const std::vector<tensor::Tensor> &inputs,
                                                 KernelGraphPtr kernel_graph) {
  MS_EXCEPTION_IF_NULL(kernel_graph);
  auto graph_inputs = GetGraphDataInputs(kernel_graph);
@ -74,20 +74,20 @@ void RuntimeUtils::CopyInputTensorsToKernelGraph(const std::vector<tensor::Tenso
    return;
  }
  for (size_t i = 0; i < graph_inputs.size(); i++) {
-    auto input = inputs[i];
+    auto &input = inputs[i];
    auto graph_input = graph_inputs[i];
    auto graph_input_addr = AnfAlgo::GetMutableOutputAddr(graph_input, 0);
    if (graph_input_addr->ptr_ == nullptr) {
      MS_LOG(EXCEPTION) << "Output_idx" << i << " of input " << graph_input->DebugString()
                        << " output addr ptr is nullptr.";
    }
-    memcpy(graph_input_addr->ptr_, input->data_c(), graph_input_addr->size_);
+    memcpy(graph_input_addr->ptr_, input.data_c(), graph_input_addr->size_);
  }
 }

-void RuntimeUtils::CopyOutputTensorsFromKernelGraph(std::vector<tensor::TensorPtr> *outputs,
-                                                    KernelGraphPtr kernel_graph) {
+void RuntimeUtils::CopyOutputTensorsFromKernelGraph(std::vector<tensor::Tensor> *outputs, KernelGraphPtr kernel_graph) {
  MS_EXCEPTION_IF_NULL(kernel_graph);
+  outputs->clear();
  auto graph_outputs = kernel_graph->outputs();
  for (auto graph_output : graph_outputs) {
    auto real_output_with_index = common::AnfAlgo::VisitKernelWithReturnType(graph_output, 0);
@ -104,8 +104,7 @@ void RuntimeUtils::CopyOutputTensorsFromKernelGraph(std::vector<tensor::TensorPt
      auto s = static_cast<int64_t>(us);
      shape.push_back(s);
    }
-    auto tensor_ptr = std::make_shared<mindspore::tensor::Tensor>(type_id, shape, data, data_size);
-    outputs->push_back(tensor_ptr);
+    outputs->emplace_back(mindspore::tensor::Tensor(type_id, shape, data, data_size));
  }
 }

--- a/mindspore/lite/src/extendrt/utils/runtime_utils.h
+++ b/mindspore/lite/src/extendrt/utils/runtime_utils.h
@ -37,8 +37,8 @@ class RuntimeUtils {
  static kernel::AddressPtr GetAddressFromDevice(device::DeviceAddressPtr address_ptr);

  static std::vector<AnfNodePtr> GetGraphDataInputs(const KernelGraphPtr &kernel_graph);
-  static void CopyInputTensorsToKernelGraph(const std::vector<tensor::TensorPtr> &inputs, KernelGraphPtr kernel_graph);
-  static void CopyOutputTensorsFromKernelGraph(std::vector<tensor::TensorPtr> *outputs, KernelGraphPtr kernel_graph);
+  static void CopyInputTensorsToKernelGraph(const std::vector<tensor::Tensor> &inputs, KernelGraphPtr kernel_graph);
+  static void CopyOutputTensorsFromKernelGraph(std::vector<tensor::Tensor> *outputs, KernelGraphPtr kernel_graph);

  static void AssignKernelGraphAddress(KernelGraphPtr kernel_graph);
  static void AssignValueNodeAddress(KernelGraphPtr kernel_graph);
--- a/mindspore/lite/src/extendrt/utils/tensor_default_impl.h
+++ b/mindspore/lite/src/extendrt/utils/tensor_default_impl.h
@ -0,0 +1,141 @@
+/**
+ * This is the C++ adaptation and derivative work of Myia (https://github.com/mila-iqia/myia/).
+ *
+ * Copyright 2019-2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_EXTENDRT_UTILS_TENSOR_DEFAULT_IMPL_H_
+#define MINDSPORE_LITE_SRC_EXTENDRT_UTILS_TENSOR_DEFAULT_IMPL_H_
+
+#include <vector>
+#include <string>
+#include <memory>
+#include <functional>
+
+#include "include/api/types.h"
+#include "ir/tensor.h"
+#include "runtime/device/device_address.h"
+#include "common/utils.h"
+#include "common/mutable_tensor_impl.h"
+
+namespace mindspore {
+class TensorDefaultImpl : public MutableTensorImpl {
+ public:
+  TensorDefaultImpl() = default;
+  TensorDefaultImpl(const std::string &name, enum DataType type, const std::vector<int64_t> &shape)
+      : name_(name), type_(type), shape_(shape) {
+    buffer_.SetData(nullptr, 0);
+    data_ = buffer_.Data();
+  }
+
+  TensorDefaultImpl(const std::string &name, enum DataType type, const std::vector<int64_t> &shape, const void *data,
+                    size_t data_len, bool ref_data, bool own_data)
+      : name_(name), type_(type), shape_(shape) {
+    if (ref_data) {
+      data_ = data;
+      own_data_ = own_data;
+    } else {
+      if (data == nullptr) {
+        data_len = 0;
+      }
+      buffer_.SetData(data, data_len);
+      data_ = buffer_.Data();
+    }
+  }
+  ~TensorDefaultImpl() {
+    if (own_data_ && data_ != nullptr && data_ != buffer_.Data()) {
+      free(const_cast<void *>(data_));
+    }
+  }
+  void SetShape(const std::vector<int64_t> &shape) override { shape_ = shape; }
+  void SetDataType(mindspore::DataType data_type) override { type_ = data_type; }
+  void SetName(const std::string &name) override { name_ = name; }
+
+  mindspore::Format Format() const override { return format_; }
+  void SetFormat(mindspore::Format format) override { format_ = format; }
+
+  const std::string &Name() const override { return name_; }
+  enum DataType DataType() const override { return type_; }
+  const std::vector<int64_t> &Shape() const override { return shape_; }
+
+  void SetAllocator(const std::shared_ptr<Allocator> &allocator) override { allocator_ = allocator; }
+  std::shared_ptr<Allocator> GetAllocator() const override { return allocator_; }
+
+  std::vector<QuantParam> GetQuantParams() const override { return quant_param_; }
+  void SetQuantParams(const std::vector<QuantParam> &quant_param) override { quant_param_ = quant_param; }
+
+  int64_t ElementNum() const { return std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies<int64_t>()); }
+  size_t DataSize() const override { return ElementNum() * lite::DataTypeSize(static_cast<enum TypeId>(type_)); }
+
+  void SetDeviceData(void *data) override { device_data_ = data; }
+  void *GetDeviceData() override { return device_data_; }
+  bool IsConst() const override { return false; }
+
+  bool IsDevice() const override { return device_data_ != nullptr; }
+
+  std::shared_ptr<const void> Data() const override {
+    ResizeData();
+    return std::shared_ptr<const void>(data_, [](const void *) {});
+  }
+
+  void SetData(void *data, bool own_data) override {
+    data_ = data;
+    own_data_ = own_data;
+  }
+
+  void *MutableData() override {
+    ResizeData();
+    return const_cast<void *>(data_);
+  }
+
+  std::shared_ptr<Impl> Clone() const override {
+    auto impl = std::make_shared<TensorDefaultImpl>(name_, type_, shape_, data_, DataSize(), false, false);
+    if (!impl) {
+      return nullptr;
+    }
+    impl->SetFormat(format_);
+    impl->SetQuantParams(quant_param_);
+    impl->SetDeviceData(device_data_);
+    impl->SetAllocator(allocator_);
+    return impl;
+  }
+
+ protected:
+  std::string name_;
+  enum DataType type_ = DataType::kTypeUnknown;
+  enum Format format_ = mindspore::NCHW;
+  std::vector<int64_t> shape_;
+  std::shared_ptr<Allocator> allocator_ = nullptr;
+  std::vector<QuantParam> quant_param_;
+  void *device_data_ = nullptr;
+
+  mutable Buffer buffer_;
+  mutable const void *data_ = nullptr;
+  bool own_data_ = false;
+
+  void ResizeData() const {
+    if (data_ != nullptr && data_ != buffer_.Data()) {
+      return;
+    }
+    auto data_size = DataSize();
+    if (data_size != buffer_.DataSize()) {
+      buffer_.ResizeData(data_size);
+    }
+    data_ = buffer_.Data();
+  }
+};
+}  // namespace mindspore
+
+#endif  // MINDSPORE_LITE_SRC_EXTENDRT_UTILS_TENSOR_DEFAULT_IMPL_H_
--- a/mindspore/lite/src/extendrt/utils/tensor_utils.cc
+++ b/mindspore/lite/src/extendrt/utils/tensor_utils.cc
@ -18,10 +18,57 @@

 #include <memory>
 #include <algorithm>
+#include <utility>

 #include "extendrt/utils/tensor_utils.h"
+#include "mindspore/ccsrc/kernel/common_utils.h"

 namespace mindspore {
+TensorRefData::TensorRefData(void *data, size_t bytes_size, size_t data_size, size_t ndim)
+    : data_(data), elem_count_(bytes_size), data_size_(data_size), ndim_(ndim) {}
+
+ssize_t TensorRefData::size() const { return static_cast<ssize_t>(elem_count_); }
+
+ssize_t TensorRefData::itemsize() const {
+  if (elem_count_ == 0) {
+    return 0;
+  }
+  return static_cast<ssize_t>(data_size_ / elem_count_);
+}
+
+ssize_t TensorRefData::nbytes() const { return static_cast<ssize_t>(data_size_); }
+
+ssize_t TensorRefData::ndim() const { return static_cast<ssize_t>(ndim_); }
+
+void *TensorRefData::data() { return data_; }
+
+const void *TensorRefData::const_data() const { return data_; }
+
+std::string TensorRefData::ToString(TypeId type, const ShapeVector &shape, bool use_comma) const {
+  std::stringstream stream;
+  stream << "RefTensor:[";
+  for (size_t i = 0; i < shape.size(); i++) {
+    stream << shape[i];
+    if (i + 1 < shape.size()) {
+      stream << ",";
+    }
+  }
+  stream << "]" << type;
+  return stream.str();
+}
+
+mindspore::Format TensorTensorImpl::Format() const {
+  MS_EXCEPTION_IF_NULL(tensor_);
+  return kernel::GetFormatFromStrToEnum(tensor_->device_info().format_);
+}
+
+void TensorTensorImpl::SetFormat(mindspore::Format format) {
+  MS_EXCEPTION_IF_NULL(tensor_);
+  auto device_info = tensor_->device_info();
+  device_info.format_ = kernel::GetFormatFromEnumToStr(format);
+  tensor_->set_device_info(device_info);
+}
+
 std::vector<mindspore::tensor::TensorPtr> TensorUtils::MSTensorToTensorPtr(const std::vector<MSTensor> &ms_tensors) {
  std::vector<mindspore::tensor::TensorPtr> tensor_ptrs;

@ -31,7 +78,8 @@ std::vector<mindspore::tensor::TensorPtr> TensorUtils::MSTensorToTensorPtr(const
    auto shape = ms_tensor.Shape();
    auto data = ms_tensor.MutableData();
    auto data_size = ms_tensor.DataSize();
-    auto tensor_ptr = std::make_shared<mindspore::tensor::Tensor>(type_id, shape, data, data_size);
+    auto ref_tensor_data = std::make_shared<TensorRefData>(data, ms_tensor.ElementNum(), data_size, shape.size());
+    auto tensor_ptr = std::make_shared<mindspore::tensor::Tensor>(type_id, shape, ref_tensor_data);
    tensor_ptrs.push_back(tensor_ptr);
  }
  return tensor_ptrs;
@ -40,22 +88,46 @@ std::vector<mindspore::tensor::TensorPtr> TensorUtils::MSTensorToTensorPtr(const
 std::vector<MSTensor> TensorUtils::TensorPtrToMSTensor(std::vector<mindspore::tensor::TensorPtr> tensor_ptrs,
                                                       const std::vector<std::string> &tensor_names) {
  std::vector<MSTensor> ms_tensors;
-
  for (size_t i = 0; i < tensor_ptrs.size(); i++) {
    auto graph_tensor = tensor_ptrs[i];
    std::string graph_tensor_name = tensor_names[i];
-    auto type_id = graph_tensor->data_type_c();
-    auto data_type = static_cast<mindspore::DataType>(type_id);
-    auto ms_tensor_ptr = MSTensor::CreateRefTensor(graph_tensor_name, data_type, graph_tensor->shape_c(),
-                                                   graph_tensor->data_c(), graph_tensor->Size());
-    if (ms_tensor_ptr == nullptr) {
-      MS_LOG_WARNING << "Failed to create input tensor ";
-      return {};
-    }
-    ms_tensors.push_back(*ms_tensor_ptr);
-    delete ms_tensor_ptr;
+    graph_tensor->set_name(graph_tensor_name);
+    auto tensor_impl = std::make_shared<TensorTensorImpl>(graph_tensor);
+    ms_tensors.push_back(MSTensor(tensor_impl));
  }
+  return ms_tensors;
+}

+std::vector<mindspore::tensor::Tensor> TensorUtils::MSTensorToTensor(const std::vector<MSTensor> &ms_tensors) {
+  std::vector<mindspore::tensor::Tensor> tensors;
+  for (auto ms_tensor : ms_tensors) {
+    auto data_type = ms_tensor.DataType();
+    auto type_id = static_cast<mindspore::TypeId>(data_type);
+    auto shape = ms_tensor.Shape();
+    auto data = ms_tensor.MutableData();
+    auto data_size = ms_tensor.DataSize();
+    auto ref_tensor_data = std::make_shared<TensorRefData>(data, ms_tensor.ElementNum(), data_size, shape.size());
+    mindspore::tensor::Tensor tensor(type_id, shape, ref_tensor_data);
+    auto device_address = ms_tensor.GetDeviceData();
+    if (device_address != nullptr) {
+      auto lite_device_address = std::make_shared<LiteDeviceAddress>(device_address, ms_tensor.DataSize());
+      tensor.set_device_address(lite_device_address);
+    }
+    tensors.emplace_back(std::move(tensor));
+  }
+  return tensors;
+}
+
+std::vector<MSTensor> TensorUtils::TensorToMSTensor(std::vector<mindspore::tensor::Tensor> tensors,
+                                                    const std::vector<std::string> &tensor_names) {
+  std::vector<MSTensor> ms_tensors;
+  for (size_t i = 0; i < tensors.size(); i++) {
+    auto &graph_tensor = tensors[i];
+    std::string graph_tensor_name = tensor_names[i];
+    graph_tensor.set_name(graph_tensor_name);
+    auto tensor_impl = std::make_shared<TensorTensorImpl>(graph_tensor);
+    ms_tensors.emplace_back(MSTensor(tensor_impl));
+  }
  return ms_tensors;
 }

--- a/mindspore/lite/src/extendrt/utils/tensor_utils.h
+++ b/mindspore/lite/src/extendrt/utils/tensor_utils.h
@ -21,11 +21,160 @@

 #include <vector>
 #include <string>
+#include <memory>
+#include <functional>

 #include "include/api/types.h"
 #include "ir/tensor.h"
+#include "runtime/device/device_address.h"
+#include "common/utils.h"
+#include "common/mutable_tensor_impl.h"
+#include "mindspore/core/ir/tensor.h"

 namespace mindspore {
+class TensorRefData : public tensor::TensorData {
+ public:
+  TensorRefData(void *data, size_t elem_count, size_t data_size, size_t ndim);
+  ~TensorRefData() = default;
+
+  ssize_t size() const override;
+  ssize_t itemsize() const override;
+  ssize_t nbytes() const override;
+  ssize_t ndim() const override;
+  void *data() override;
+  const void *const_data() const override;
+  bool is_sub_data() const override { return false; }
+  bool has_sub_data() const override { return false; }
+  std::string ToString(TypeId type, const ShapeVector &shape, bool use_comma) const override;
+
+ private:
+  void *data_ = nullptr;
+  size_t elem_count_ = 0;
+  size_t data_size_ = 0;
+  size_t ndim_ = 0;
+};
+
+constexpr auto kLiteDeviceName = "LiteDevice";
+
+class LiteDeviceAddress : public device::DeviceAddress {
+ public:
+  LiteDeviceAddress(void *ptr, size_t size) : device::DeviceAddress(ptr, size) { device_name_ = kLiteDeviceName; }
+  void SetData(void *data) { set_ptr(data); }
+
+  bool SyncDeviceToHost(const ShapeVector &shape, size_t size, TypeId type, void *host_ptr) const override {
+    return false;
+  }
+  bool SyncHostToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *host_ptr,
+                        const std::string &format) const override {
+    return false;
+  }
+  bool SyncHostToDevice(const ShapeVector &shape, size_t size, TypeId type, const void *host_ptr) const override {
+    return SyncHostToDevice(shape, size, type, host_ptr, "DefaultFormat");
+  }
+  void ClearDeviceMemory() override {}
+};
+
+class TensorTensorImpl : public MutableTensorImpl {
+ public:
+  explicit TensorTensorImpl(const tensor::Tensor &tensor) : tensor_(std::make_shared<tensor::Tensor>(tensor)) {}
+  explicit TensorTensorImpl(const std::shared_ptr<tensor::Tensor> &tensor) : tensor_(tensor) {}
+
+  void SetData(void *, bool) override { MS_LOG_EXCEPTION << "Cannot set data for TensorTensorImpl"; }
+
+  std::shared_ptr<const void> Data() const override {
+    MS_EXCEPTION_IF_NULL(tensor_);
+    return std::shared_ptr<const void>(tensor_->data_c(), [](const void *) {});
+  }
+
+  void *MutableData() override {
+    MS_EXCEPTION_IF_NULL(tensor_);
+    return tensor_->data_c();
+  }
+
+  void SetDeviceData(void *data) override {
+    MS_EXCEPTION_IF_NULL(tensor_);
+    auto data_size = DataSize();
+    auto device_address = std::make_shared<LiteDeviceAddress>(data, data_size);
+    tensor_->set_device_address(device_address);
+  }
+  void *GetDeviceData() override {
+    MS_EXCEPTION_IF_NULL(tensor_);
+    auto device_address = tensor_->device_address();
+    if (device_address == nullptr) {
+      return nullptr;
+    }
+    return device_address->GetMutablePtr();
+  }
+
+  bool IsDevice() const override {
+    MS_EXCEPTION_IF_NULL(tensor_);
+    return tensor_->device_address() != nullptr;
+  }
+
+  bool IsConst() const override { return false; }
+
+  void SetShape(const std::vector<int64_t> &shape) override {
+    MS_EXCEPTION_IF_NULL(tensor_);
+    tensor_->set_shape(shape);
+  }
+  void SetDataType(mindspore::DataType data_type) override {
+    MS_EXCEPTION_IF_NULL(tensor_);
+    tensor_->set_data_type(static_cast<enum TypeId>(data_type));
+  }
+  void SetName(const std::string &name) override {
+    MS_EXCEPTION_IF_NULL(tensor_);
+    tensor_->set_name(name);
+  }
+
+  mindspore::Format Format() const override;
+
+  void SetFormat(mindspore::Format format) override;
+
+  const std::string &Name() const override {
+    MS_EXCEPTION_IF_NULL(tensor_);
+    return tensor_->name();
+  }
+  enum DataType DataType() const override {
+    MS_EXCEPTION_IF_NULL(tensor_);
+    return static_cast<enum DataType>(tensor_->data_type());
+  }
+  const std::vector<int64_t> &Shape() const override {
+    MS_EXCEPTION_IF_NULL(tensor_);
+    return tensor_->shape();
+  }
+
+  void SetAllocator(const std::shared_ptr<Allocator> &allocator) override {
+    MS_EXCEPTION_IF_NULL(tensor_);
+    tensor_->set_user_data("allocator", allocator);
+  }
+  std::shared_ptr<Allocator> GetAllocator() const override {
+    MS_EXCEPTION_IF_NULL(tensor_);
+    return tensor_->user_data<Allocator>("allocator");
+  }
+
+  std::vector<QuantParam> GetQuantParams() const override {
+    MS_EXCEPTION_IF_NULL(tensor_);
+    auto data = tensor_->user_data<std::vector<QuantParam>>("quant_param");
+    return data ? *data : std::vector<QuantParam>();
+  }
+
+  void SetQuantParams(const std::vector<QuantParam> &quant_param) override {
+    MS_EXCEPTION_IF_NULL(tensor_);
+    tensor_->set_user_data("quant_param", std::make_shared<std::vector<QuantParam>>(quant_param));
+  }
+
+  int64_t ElementNum() const {
+    auto &shape = Shape();
+    return std::accumulate(shape.begin(), shape.end(), 1, std::multiplies<int64_t>());
+  }
+  size_t DataSize() const override { return ElementNum() * lite::DataTypeSize(static_cast<enum TypeId>(DataType())); }
+
+  std::shared_ptr<Impl> Clone() const override { return std::make_shared<TensorTensorImpl>(tensor_); }
+
+ private:
+  std::shared_ptr<tensor::Tensor> tensor_ = nullptr;
+};
+
 class TensorUtils {
 public:
  // MSTensor <-> TensorPtr
@ -33,6 +182,10 @@ class TensorUtils {
  static std::vector<MSTensor> TensorPtrToMSTensor(std::vector<mindspore::tensor::TensorPtr> tensor_ptrs,
                                                   const std::vector<std::string> &tensor_names);

+  static std::vector<mindspore::tensor::Tensor> MSTensorToTensor(const std::vector<MSTensor> &ms_tensors);
+  static std::vector<MSTensor> TensorToMSTensor(std::vector<mindspore::tensor::Tensor> tensors,
+                                                const std::vector<std::string> &tensor_names);
+
  // TensorPtr <-> Tensor
  static std::vector<mindspore::tensor::TensorPtr> TensorToTensorPtr(
    const std::vector<mindspore::tensor::Tensor> &tensors);
--- a/mindspore/lite/src/litert/c_api/tensor_c.cc
+++ b/mindspore/lite/src/litert/c_api/tensor_c.cc
@ -146,7 +146,7 @@ MSFormat MSTensorGetFormat(const MSTensorHandle tensor) {
    return kMSFormatNHWC;
  }
  auto impl = static_cast<mindspore::LiteTensorImpl *>(tensor);
-  return static_cast<MSFormat>(impl->format());
+  return static_cast<MSFormat>(impl->Format());
 }

 void MSTensorSetData(MSTensorHandle tensor, void *data) {
@ -155,7 +155,7 @@ void MSTensorSetData(MSTensorHandle tensor, void *data) {
    return;
  }
  auto impl = static_cast<mindspore::LiteTensorImpl *>(tensor);
-  return impl->SetData(data);
+  return impl->SetData(data, true);
 }

 const void *MSTensorGetData(const MSTensorHandle tensor) {
--- a/mindspore/lite/src/litert/cxx_api/tensor/tensor_impl.cc
+++ b/mindspore/lite/src/litert/cxx_api/tensor/tensor_impl.cc
@ -77,6 +77,12 @@ std::shared_ptr<LiteTensorImpl> LiteTensorImpl::CreateTensorImplByDeepCopy(const
  return impl;
 }

+void LiteTensorImpl::SetDeviceData(void *data) { MS_LOG(ERROR) << "Not implement."; }
+void *LiteTensorImpl::GetDeviceData() {
+  MS_LOG(ERROR) << "Not implement.";
+  return nullptr;
+}
+
 #ifndef STRING_KERNEL_CLIP
 std::shared_ptr<LiteTensorImpl> LiteTensorImpl::StringsToTensorImpl(const std::string &name,
                                                                    const std::vector<std::string> &str) {
--- a/mindspore/lite/src/litert/cxx_api/tensor/tensor_impl.h
+++ b/mindspore/lite/src/litert/cxx_api/tensor/tensor_impl.h
@ -30,11 +30,12 @@
 #include "src/tensor.h"
 #include "src/common/log_adapter.h"
 #include "ir/api_tensor_impl.h"
+#include "common/mutable_tensor_impl.h"

 namespace mindspore {
 using mindspore::lite::RET_OK;

-class LiteTensorImpl : public mindspore::MSTensor::Impl {
+class LiteTensorImpl : public MutableTensorImpl {
 public:
  LiteTensorImpl() {}

@ -80,7 +81,7 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl {
    return tensor_name_;
  }

-  void SetName(const std::string &name) {
+  void SetName(const std::string &name) override {
    if (lite_tensor_ == nullptr) {
      MS_LOG(ERROR) << "Invalid tensor.";
      return;
@ -97,7 +98,7 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl {
    return static_cast<enum DataType>(lite_tensor_->data_type());
  }

-  void SetDataType(enum DataType data_type) {
+  void SetDataType(enum DataType data_type) override {
    if (lite_tensor_ == nullptr) {
      MS_LOG(ERROR) << "Invalid tensor.";
      return;
@ -127,7 +128,7 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl {

  std::shared_ptr<mindspore::MSTensor::Impl> Clone() const override { return nullptr; }

-  void SetShape(const std::vector<int64_t> &shape) {
+  void SetShape(const std::vector<int64_t> &shape) override {
    if (lite_tensor_ == nullptr) {
      MS_LOG(ERROR) << "Invalid tensor.";
      return;
@ -138,7 +139,7 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl {
    lite_tensor_->set_shape(tensor_shape);
  }

-  std::shared_ptr<Allocator> allocator() const {
+  std::shared_ptr<Allocator> GetAllocator() const override {
    if (lite_tensor_ == nullptr) {
      MS_LOG(ERROR) << "Invalid tensor.";
      return nullptr;
@ -146,7 +147,7 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl {
    return lite_tensor_->allocator();
  }

-  void SetAllocator(std::shared_ptr<Allocator> allocator) {
+  void SetAllocator(const std::shared_ptr<Allocator> &allocator) override {
    if (lite_tensor_ == nullptr) {
      MS_LOG(ERROR) << "Invalid tensor.";
      return;
@ -154,7 +155,7 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl {
    lite_tensor_->set_allocator(allocator);
  }

-  mindspore::Format format() {
+  mindspore::Format Format() const override {
    if (lite_tensor_ == nullptr) {
      MS_LOG(ERROR) << "Invalid tensor.";
      return mindspore::Format::NHWC;
@ -162,7 +163,7 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl {
    return lite_tensor_->format();
  }

-  void SetFormat(mindspore::Format format) {
+  void SetFormat(mindspore::Format format) override {
    if (lite_tensor_ == nullptr) {
      MS_LOG(ERROR) << "Invalid tensor.";
      return;
@ -185,7 +186,7 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl {
    }
    return lite_tensor_->MutableData();
  }
-  virtual bool IsConst() const {
+  bool IsConst() const override {
    if (lite_tensor_ == nullptr) {
      MS_LOG(ERROR) << "Invalid tensor.";
      return false;
@ -201,15 +202,15 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl {
    return lite_tensor_->Size();
  }

-  void SetData(void *data) {
+  void SetData(void *data, bool own_data) override {
    if (lite_tensor_ == nullptr) {
      MS_LOG(ERROR) << "Invalid tensor.";
      return;
    }
-    lite_tensor_->set_data(data);
+    lite_tensor_->set_data(data, own_data);
  }

-  virtual std::vector<QuantParam> QuantParams() const {
+  std::vector<QuantParam> GetQuantParams() const override {
    if (lite_tensor_ == nullptr) {
      MS_LOG(ERROR) << "Invalid tensor.";
      return std::vector<QuantParam>{};
@ -228,7 +229,7 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl {
    return quant_params;
  }

-  void SetQuantParams(std::vector<QuantParam> quant_params) {
+  void SetQuantParams(const std::vector<QuantParam> &quant_params) override {
    if (lite_tensor_ == nullptr) {
      MS_LOG(ERROR) << "Invalid tensor.";
      return;
@ -261,6 +262,9 @@ class LiteTensorImpl : public mindspore::MSTensor::Impl {

  void set_from_session(bool from_session) { from_session_ = from_session; }

+  void SetDeviceData(void *data) override;
+  void *GetDeviceData() override;
+
 private:
  lite::Tensor *lite_tensor_ = nullptr;
  std::string tensor_name_ = "";
--- a/mindspore/lite/src/litert/cxx_api/types.cc
+++ b/mindspore/lite/src/litert/cxx_api/types.cc
@ -100,6 +100,8 @@ bool MSTensor::operator==(const MSTensor &tensor) const {
  return lite_impl->lite_tensor() == lite_tensor_impl->lite_tensor();
 }

+bool MSTensor::operator!=(const MSTensor &tensor) const { return !operator==(tensor); }
+
 MSTensor *MSTensor::CreateTensor(const std::vector<char> &name, enum DataType type, const std::vector<int64_t> &shape,
                                 const void *data, size_t data_len) noexcept {
  if (data_len > MAX_MALLOC_SIZE) {
@ -146,12 +148,14 @@ MSTensor *MSTensor::CreateTensor(const std::vector<char> &name, enum DataType ty
 }

 MSTensor *MSTensor::CreateRefTensor(const std::vector<char> &name, enum DataType type,
-                                    const std::vector<int64_t> &shape, const void *data, size_t data_len) noexcept {
+                                    const std::vector<int64_t> &shape, const void *data, size_t data_len,
+                                    bool own_data) noexcept {
  auto impl = LiteTensorImpl::CreateTensorImpl(CharToString(name), type, shape, data, data_len);
  if (impl == nullptr) {
    MS_LOG(ERROR) << "Allocate tensor impl failed.";
    return nullptr;
  }
+  impl->set_own_data(own_data);
  auto ms_tensor = new (std::nothrow) MSTensor(impl);
  if (ms_tensor == nullptr) {
    MS_LOG(ERROR) << "Allocate tensor impl failed.";
@ -160,10 +164,10 @@ MSTensor *MSTensor::CreateRefTensor(const std::vector<char> &name, enum DataType
  return ms_tensor;
 }

-MSTensor *MSTensor::CreateDevTensor(const std::vector<char> &name, enum DataType type,
-                                    const std::vector<int64_t> &shape, const void *data, size_t data_len) noexcept {
+MSTensor MSTensor::CreateDeviceTensor(const std::vector<char> &name, enum DataType type,
+                                      const std::vector<int64_t> &shape, void *data, size_t data_len) noexcept {
  MS_LOG(ERROR) << "Unsupported Feature.";
-  return nullptr;
+  return MSTensor(nullptr);
 }

 MSTensor *MSTensor::CreateTensorFromFile(const std::vector<char> &file, enum DataType type,
@ -305,12 +309,28 @@ void *MSTensor::MutableData() {
  return impl_->MutableData();
 }

+void MSTensor::SetDeviceData(void *data) {
+  if (impl_ == nullptr) {
+    MS_LOG(ERROR) << "Invalid tensor implement.";
+    return;
+  }
+  std::static_pointer_cast<MutableTensorImpl>(impl_)->SetDeviceData(data);
+}
+
+void *MSTensor::GetDeviceData() {
+  if (impl_ == nullptr) {
+    MS_LOG(ERROR) << "Invalid tensor implement.";
+    return nullptr;
+  }
+  return std::static_pointer_cast<MutableTensorImpl>(impl_)->GetDeviceData();
+}
+
 bool MSTensor::IsConst() const {
  if (impl_ == nullptr) {
    MS_LOG(ERROR) << "Invalid tensor implement.";
    return false;
  }
-  return std::static_pointer_cast<LiteTensorImpl>(impl_)->IsConst();
+  return std::static_pointer_cast<MutableTensorImpl>(impl_)->IsConst();
 }

 size_t MSTensor::DataSize() const {
@ -338,7 +358,7 @@ void MSTensor::SetShape(const std::vector<int64_t> &shape) {
    return;
  }

-  std::static_pointer_cast<LiteTensorImpl>(impl_)->SetShape(shape);
+  std::static_pointer_cast<MutableTensorImpl>(impl_)->SetShape(shape);
 }

 void MSTensor::SetDataType(enum DataType data_type) {
@ -347,7 +367,7 @@ void MSTensor::SetDataType(enum DataType data_type) {
    return;
  }

-  std::static_pointer_cast<LiteTensorImpl>(impl_)->SetDataType(data_type);
+  std::static_pointer_cast<MutableTensorImpl>(impl_)->SetDataType(data_type);
 }

 void MSTensor::SetTensorName(const std::vector<char> &name) {
@ -355,7 +375,7 @@ void MSTensor::SetTensorName(const std::vector<char> &name) {
    MS_LOG(ERROR) << "Invalid tensor implement.";
    return;
  }
-  std::static_pointer_cast<LiteTensorImpl>(impl_)->SetName(CharToString(name));
+  std::static_pointer_cast<MutableTensorImpl>(impl_)->SetName(CharToString(name));
 }

 void MSTensor::SetAllocator(std::shared_ptr<Allocator> allocator) {
@ -364,7 +384,7 @@ void MSTensor::SetAllocator(std::shared_ptr<Allocator> allocator) {
    return;
  }

-  return std::static_pointer_cast<LiteTensorImpl>(impl_)->SetAllocator(allocator);
+  return std::static_pointer_cast<MutableTensorImpl>(impl_)->SetAllocator(allocator);
 }

 std::shared_ptr<Allocator> MSTensor::allocator() const {
@ -373,7 +393,7 @@ std::shared_ptr<Allocator> MSTensor::allocator() const {
    return nullptr;
  }

-  return std::static_pointer_cast<LiteTensorImpl>(impl_)->allocator();
+  return std::static_pointer_cast<MutableTensorImpl>(impl_)->GetAllocator();
 }

 void MSTensor::SetFormat(mindspore::Format format) {
@ -382,7 +402,7 @@ void MSTensor::SetFormat(mindspore::Format format) {
    return;
  }

-  return std::static_pointer_cast<LiteTensorImpl>(impl_)->SetFormat(format);
+  return std::static_pointer_cast<MutableTensorImpl>(impl_)->SetFormat(format);
 }

 mindspore::Format MSTensor::format() const {
@ -391,16 +411,16 @@ mindspore::Format MSTensor::format() const {
    return mindspore::Format::NHWC;
  }

-  return std::static_pointer_cast<LiteTensorImpl>(impl_)->format();
+  return std::static_pointer_cast<MutableTensorImpl>(impl_)->Format();
 }

-void MSTensor::SetData(void *data) {
+void MSTensor::SetData(void *data, bool own_data) {
  if (impl_ == nullptr) {
    MS_LOG(ERROR) << "Invalid tensor implement.";
    return;
  }

-  return std::static_pointer_cast<LiteTensorImpl>(impl_)->SetData(data);
+  return std::static_pointer_cast<MutableTensorImpl>(impl_)->SetData(data, own_data);
 }

 std::vector<QuantParam> MSTensor::QuantParams() const {
@ -409,7 +429,7 @@ std::vector<QuantParam> MSTensor::QuantParams() const {
    return std::vector<QuantParam>{};
  }

-  return std::static_pointer_cast<LiteTensorImpl>(impl_)->QuantParams();
+  return std::static_pointer_cast<MutableTensorImpl>(impl_)->GetQuantParams();
 }

 void MSTensor::SetQuantParams(std::vector<QuantParam> quant_params) {
@ -418,7 +438,7 @@ void MSTensor::SetQuantParams(std::vector<QuantParam> quant_params) {
    return;
  }

-  return std::static_pointer_cast<LiteTensorImpl>(impl_)->SetQuantParams(quant_params);
+  return std::static_pointer_cast<MutableTensorImpl>(impl_)->SetQuantParams(quant_params);
 }

 Buffer::Buffer() : impl_(std::make_shared<Impl>()) {}
--- a/mindspore/lite/src/tensor.h
+++ b/mindspore/lite/src/tensor.h
@ -128,16 +128,13 @@ class Tensor {

  // note: in the case of that old_data is valid, set_data just releases the ownership of it but not frees it. Of
  //       course, you can call FreeData before calling set_data to ensure the data can be freed by current tensor.
-  void set_data(void *data) {
-    if (this->data_ == data) {
-      return;
-    }
-    if (allocator_ != nullptr) {
+  void set_data(void *data, bool own_data = true) {
+    if (allocator_ != nullptr && this->data_ != data) {
      allocator_->IncRefCount(data, 1);
      allocator_->DecRefCount(this->data_, 1);
    }
    this->data_ = data;
-    this->own_data_ = true;
+    this->own_data_ = own_data;
  }

  Category category() const { return this->category_; }
--- a/mindspore/lite/tools/benchmark/benchmark_unified_api.cc
+++ b/mindspore/lite/tools/benchmark/benchmark_unified_api.cc
@ -910,12 +910,12 @@ int BenchmarkUnifiedApi::PrintInputData() {
 #ifdef PARALLEL_INFERENCE
 void BenchmarkUnifiedApi::ModelParallelRunnerWarmUp(int index) {
  auto in = model_runner_.GetInputs();
-  auto output = all_outputs_[index];
  for (size_t i = 0; i < in.size(); i++) {
    in[i].SetData(all_inputs_data_[index][i]);
    in[i].SetShape(resize_dims_[i]);
  }
  auto warm_up_start = GetTimeUs();
+  std::vector<MSTensor> output;
  auto ret = model_runner_.Predict(in, &output);
  for (size_t j = 0; j < in.size(); j++) {
    in[j].SetData(nullptr);
@ -937,12 +937,12 @@ void BenchmarkUnifiedApi::ModelParallelRunnerRun(int task_num, int parallel_idx)
    int idx = parallel_idx + flags_->warm_up_loop_count_;
    auto in = model_runner_.GetInputs();
    auto in_data = all_inputs_data_[idx];
-    auto output = all_outputs_[idx];
    for (size_t tensor_index = 0; tensor_index < in.size(); tensor_index++) {
      in.at(tensor_index).SetData(all_inputs_data_.at(idx)[tensor_index]);
      in.at(tensor_index).SetShape(resize_dims_.at(tensor_index));
    }
    auto predict_start = GetTimeUs();
+    std::vector<MSTensor> output;
    auto ret = model_runner_.Predict(in, &output);
    if (ret != kSuccess) {
      model_parallel_runner_ret_failed_ = true;
--- a/mindspore/lite/tools/converter/registry/CMakeLists.txt
+++ b/mindspore/lite/tools/converter/registry/CMakeLists.txt
@ -13,6 +13,7 @@ set(REG_SRC ${CONVERT_REG_SRC}
        ${KERNEL_REG_DIR}/../common/string_util.cc
        ${KERNEL_REG_DIR}/../common/utils.cc
        ${KERNEL_REG_DIR}/../extendrt/delegate/tensorrt/distribution/distribution_base.cc
+        ${KERNEL_REG_DIR}/../extendrt/delegate/plugin/tensorrt_executor_plugin.cc
        ${CORE_DIR}/utils/log_adapter.cc
        ${CORE_DIR}/utils/status.cc
        ${CONVERTER_DIR}/converter_context.cc
--- a/tests/st/cpp/model/test_zero_copy.cc
+++ b/tests/st/cpp/model/test_zero_copy.cc
@ -137,9 +137,9 @@ TEST_F(TestZeroCopy, TestDeviceTensor) {
    // Apply transform on images
    Status rc = Transform(image, &image);
    ASSERT_TRUE(rc == kSuccess);
-    MSTensor *device_tensor =
-      MSTensor::CreateDevTensor(image.Name(), image.DataType(), image.Shape(),
-                                image.MutableData(), image.DataSize());
+    MSTensor device_tensor =
+      MSTensor::CreateDeviceTensor(image.Name(), image.DataType(), image.Shape(),
+                                   image.MutableData(), image.DataSize());
    MSTensor *tensor =
      MSTensor::CreateTensor(image.Name(), image.DataType(), image.Shape(),
                             image.Data().get(), image.DataSize());
@ -158,7 +158,7 @@ TEST_F(TestZeroCopy, TestDeviceTensor) {
    inputs.clear();
    start_time = (TimeValue){0};
    end_time = (TimeValue){0};
-    inputs.push_back(*device_tensor);
+    inputs.push_back(device_tensor);

    // infer with device tensor
    (void)gettimeofday(&start_time, nullptr);