!41575 TensorRT opt

Merge pull request !41575 from 徐永飞/new_trt
2022-09-20 11:53:59 +00:00 · 2022-09-20 11:53:59 +00:00 · 1c8b861898
parent f26cf3bed0 07aff0dc75
commit 1c8b861898
79 changed files with 2686 additions and 762 deletions
--- a/mindspore/lite/src/common/config_file.cc
+++ b/mindspore/lite/src/common/config_file.cc
@ -60,8 +60,7 @@ void ParseLine(const std::string &line, std::map<std::string, std::string> *sect
 }
 }  // namespace

-int GetAllSectionInfoFromConfigFile(const std::string &file,
-                                    std::map<std::string, std::map<std::string, std::string>> *config) {
+int GetAllSectionInfoFromConfigFile(const std::string &file, ConfigInfos *config) {
  if (file.empty() || config == nullptr) {
    MS_LOG(ERROR) << "input Invalid!check file and config.";
    return RET_ERROR;
--- a/mindspore/lite/src/common/config_file.h
+++ b/mindspore/lite/src/common/config_file.h
@ -30,13 +30,14 @@
 #include <utility>
 #include "src/common/utils.h"
 #include "src/common/log_adapter.h"
+#include "src/common/config_infos.h"
 #include "ir/dtype/type_id.h"

 namespace mindspore {
 namespace lite {
 constexpr int MAX_CONFIG_FILE_LENGTH = 1024;
-int GetAllSectionInfoFromConfigFile(const std::string &file,
-                                    std::map<std::string, std::map<std::string, std::string>> *config);
+
+int GetAllSectionInfoFromConfigFile(const std::string &file, ConfigInfos *config);

 void ParserExecutionPlan(const std::map<std::string, std::string> *config_infos,
                         std::map<std::string, TypeId> *data_type_plan);
--- a/mindspore/lite/src/common/config_infos.cc
+++ b/mindspore/lite/src/common/config_infos.cc
@ -0,0 +1,383 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/common/config_infos.h"
+#include <vector>
+#include "src/common/log_adapter.h"
+#include "src/common/common.h"
+
+namespace mindspore {
+std::vector<std::string> ProfileParser::Split(const std::string &str, const std::string &delim) {
+  auto start = 0U;
+  auto end = str.find(delim);
+  std::vector<std::string> substrs;
+  while (end != std::string::npos) {
+    substrs.push_back(str.substr(start, end - start));
+    start = end + delim.length();
+    end = str.find(delim, start);
+  }
+  substrs.push_back(str.substr(start, end));
+  return substrs;
+}
+
+bool ProfileParser::StrToInt64(const std::string &str, int64_t *val) {
+  if (str.empty()) {
+    return false;
+  }
+  int64_t symbol = 1;
+  size_t index = 0;
+  if (str[0] == '-') {
+    symbol = -1;
+    index++;
+  }
+  int64_t val_ret = 0;
+  constexpr int64_t decimal_times = 10;
+  for (; index < str.size(); index++) {
+    auto c = str[index];
+    if (c < '0' || c > '9') {
+      return false;
+    }
+    val_ret = val_ret * decimal_times + (c - '0');
+    if (val_ret >= INT32_MAX) {
+      return false;
+    }
+  }
+  *val = symbol * val_ret;
+  return true;
+}
+
+constexpr int KV_NUM = 2;
+
+bool ProfileParser::ParseShapeStr(const std::vector<std::string> &str_dims, ShapeVector *shape_ptr) {
+  if (shape_ptr == nullptr) {
+    return false;
+  }
+  if (str_dims.empty()) {
+    MS_LOG_ERROR << "Invalid input shape dim, dims number cannot be 0";
+    return false;
+  }
+  auto &shape = *shape_ptr;
+  shape.resize(str_dims.size());
+  for (size_t i = 0; i != str_dims.size(); ++i) {
+    int64_t dim = 0;
+    if (!StrToInt64(str_dims[i], &dim)) {
+      MS_LOG_ERROR << "Invalid input shape dim, dim value range or format is invalid: " << str_dims[i];
+      return false;
+    }
+    if (dim <= 0 && dim != -1) {
+      MS_LOG_ERROR << "Invalid input shape dim, dim can only be -1 when dim < 0： " << str_dims[i];
+      return false;
+    }
+    shape[i] = dim;
+  }
+  return true;
+}
+
+bool ProfileParser::ParseRangeStr(const std::string &range_str, int64_t *min_ptr, int64_t *max_ptr) {
+  if (min_ptr == nullptr || max_ptr == nullptr) {
+    return false;
+  }
+  auto min_and_max = Split(range_str, "~");
+  auto &min = *min_ptr;
+  auto &max = *max_ptr;
+  constexpr size_t number_range_count = 1;
+  constexpr size_t min_max_range_count = 2;
+  if (min_and_max.size() == number_range_count) {
+    if (!StrToInt64(min_and_max[0], &min)) {
+      MS_LOG_ERROR << "Invalid dynamic dim value range, dim value range or format is invalid: " << range_str
+                   << ". It should be 'min~max' or a number.";
+      return false;
+    }
+    max = min;
+  } else if (min_and_max.size() == min_max_range_count) {
+    if (!StrToInt64(min_and_max[0], &min) || !StrToInt64(min_and_max[1], &max)) {
+      MS_LOG_ERROR << "Invalid dynamic dim value range, dim value range or format is invalid: " << range_str
+                   << ". It should be 'min~max' or a number.";
+      return false;
+    }
+  } else {
+    MS_LOG_ERROR << "Invalid dynamic dim value range, dim value range or format is invalid: " << range_str
+                 << ". It should be 'min~max' or a number.";
+    return false;
+  }
+  if (min > max || min <= 0) {
+    MS_LOG(ERROR) << "Invalid dimension range string format of '" << lite::kDynamicDims << "': " << range_str;
+    return false;
+  }
+  return true;
+}
+
+bool ProfileParser::ParseOptDimStr(const std::string &opt_dim_str, int64_t *opt_ptr) {
+  if (opt_ptr == nullptr) {
+    return false;
+  }
+  auto &opt = *opt_ptr;
+  opt = std::stoll(opt_dim_str);
+  if (!StrToInt64(opt_dim_str, &opt)) {
+    MS_LOG_ERROR << "Invalid opt dim value range, dim value range or format is invalid: " << opt_dim_str
+                 << ". It should be a number.";
+    return false;
+  }
+  if (opt <= 0) {
+    MS_LOG(ERROR) << "Invalid opt dim value range '" << lite::kOptimizeDims << "': " << opt_dim_str;
+    return false;
+  }
+  return true;
+}
+
+bool ProfileParser::ParseInputShape(const std::string &input_shapes_str, ProfileConfigs *profile_configs_ptr) {
+  auto &profile_configs = *profile_configs_ptr;
+  auto input_slices = Split(input_shapes_str, ";");
+  for (auto input_slice : input_slices) {
+    auto name_and_shape = Split(input_slice, ":");
+    if (name_and_shape.size() != KV_NUM) {
+      MS_LOG(ERROR) << "each tensor has name and shape [" << input_slice << "]";
+      return false;
+    }
+    std::string name = name_and_shape[0];
+    std::string shape_str = name_and_shape[1];
+    if (shape_str.front() != '[' || shape_str.back() != ']') {
+      MS_LOG(ERROR) << "shape format check fail.";
+      return false;
+    }
+    constexpr size_t trim_len = 2;
+    shape_str = shape_str.substr(1, shape_str.size() - trim_len);
+    auto str_dims = Split(shape_str, ",");
+    ProfileInputInfo info;
+    info.name = name;
+    ShapeVector &shape = info.input_shape;
+    if (!ParseShapeStr(str_dims, &shape)) {
+      MS_LOG(ERROR) << "Invalid input shape dims: " << shape_str;
+      return false;
+    }
+    info.is_dynamic_shape = std::any_of(shape.begin(), shape.end(), [](auto dim) { return dim < 0; });
+    profile_configs.input_infos.push_back(info);
+  }
+  return true;
+}
+
+bool ProfileParser::ParseDynamicDims(const std::string &dynamic_dims_str, ProfileConfigs *profile_configs_ptr) {
+  auto &profile_configs = *profile_configs_ptr;
+  auto inputs_of_str = Split(dynamic_dims_str, ";");
+  if (inputs_of_str.size() != profile_configs.input_infos.size()) {
+    MS_LOG(ERROR) << "The input count " << inputs_of_str.size() << " in '" << lite::kDynamicDims
+                  << "' != the input count " << profile_configs.input_infos.size() << " '"
+                  << " in '" << lite::kInputShape;
+    return false;
+  }
+  // for every input
+  for (size_t input_index = 0; input_index != inputs_of_str.size(); ++input_index) {
+    auto &info = profile_configs.input_infos[input_index];
+    auto one_input_str = inputs_of_str[input_index];
+    auto profiles_of_str = Split(one_input_str, "],[");
+    if (profiles_of_str.empty()) {
+      MS_LOG(ERROR) << "The profile count of " << input_index << "th input in '" << lite::kDynamicDims << "' is 0";
+      return false;
+    }
+    if (profile_configs.profiles.empty()) {
+      profile_configs.profiles.resize(profiles_of_str.size());
+      for (auto &profile : profile_configs.profiles) {
+        profile.inputs.resize(profile_configs.input_infos.size());
+      }
+    }
+    if (profiles_of_str.size() != profile_configs.profiles.size()) {
+      MS_LOG(ERROR) << "The profile count " << profiles_of_str.size() << " of " << input_index << "th input in '"
+                    << lite::kDynamicDims << "' != profile count " << profile_configs.profiles.size() << " of "
+                    << (input_index - 1) << " th input";
+      return false;
+    }
+    // for every profile in one input, parse input range: min, max
+    for (size_t profile_index = 0; profile_index != profiles_of_str.size(); ++profile_index) {
+      ProfileItem &profile_item = profile_configs.profiles[profile_index];
+      ProfileInputRange &input_range = profile_item.inputs[input_index];
+
+      auto one_profile_str = profiles_of_str[profile_index];
+      while (one_profile_str.front() == '[' || one_profile_str.front() == ' ') {
+        one_profile_str = one_profile_str.substr(1);
+      }
+      while (one_profile_str.back() == ']' || one_profile_str.back() == ' ') {
+        one_profile_str = one_profile_str.substr(0, one_profile_str.size() - 1);
+      }
+      auto dim_ranges = Split(one_profile_str, ",");
+
+      auto &input_shape = info.input_shape;
+      size_t dynamic_nbdims = std::count(input_shape.begin(), input_shape.end(), -1);
+      if (dim_ranges.size() != dynamic_nbdims) {
+        MS_LOG(ERROR) << "Number of dynamic dims in config '" << lite::kDynamicDims << "' " << dim_ranges.size()
+                      << " != that in '" << lite::kInputShape << "' " << dynamic_nbdims << ".";
+        return false;
+      }
+      size_t range_index = 0;
+      input_range.min_dims = input_shape;
+      input_range.max_dims = input_shape;
+      for (size_t i = 0; i < input_shape.size(); ++i) {
+        if (input_shape[i] != -1) {
+          continue;
+        }
+        if (!ParseRangeStr(dim_ranges[range_index++], &input_range.min_dims[i], &input_range.max_dims[i])) {
+          return false;
+        }
+      }
+      input_range.opt_dims = input_range.min_dims;  // default
+    }
+  }
+  return true;
+}
+
+bool ProfileParser::ParseOptDims(const std::string &opt_dims_str, ProfileConfigs *profile_configs_ptr) {
+  auto &profile_configs = *profile_configs_ptr;
+  auto inputs_of_str = Split(opt_dims_str, ";");
+  if (inputs_of_str.size() != profile_configs.input_infos.size()) {
+    MS_LOG(ERROR) << "The input count " << inputs_of_str.size() << " in '" << lite::kOptimizeDims
+                  << "' != the input count " << profile_configs.input_infos.size() << " '"
+                  << " in '" << lite::kInputShape;
+    return false;
+  }
+  // for every input
+  for (size_t input_index = 0; input_index != inputs_of_str.size(); ++input_index) {
+    auto &info = profile_configs.input_infos[input_index];
+    auto one_input_str = inputs_of_str[input_index];
+    auto profiles_of_str = Split(one_input_str, "],[");
+    if (profiles_of_str.size() != profile_configs.profiles.size()) {
+      MS_LOG(ERROR) << "The profile count " << profiles_of_str.size() << " of " << input_index << "th input in '"
+                    << lite::kOptimizeDims << "' != profile count " << profile_configs.profiles.size() << " in '"
+                    << lite::kDynamicDims << "'";
+      return false;
+    }
+    // for every profile in one input, parse input range: min, max
+    for (size_t profile_index = 0; profile_index != profiles_of_str.size(); ++profile_index) {
+      ProfileItem &profile_item = profile_configs.profiles[profile_index];
+      ProfileInputRange &input_range = profile_item.inputs[input_index];
+
+      auto one_profile_str = profiles_of_str[profile_index];
+      while (one_profile_str.front() == '[' || one_profile_str.front() == ' ') {
+        one_profile_str = one_profile_str.substr(1);
+      }
+      while (one_profile_str.back() == ']' || one_profile_str.back() == ' ') {
+        one_profile_str = one_profile_str.substr(0, one_profile_str.size() - 1);
+      }
+      auto opt_dims_vec = Split(one_profile_str, ",");
+
+      auto &input_shape = info.input_shape;
+      size_t dynamic_nbdims = std::count(input_shape.begin(), input_shape.end(), -1);
+      if (opt_dims_vec.size() != dynamic_nbdims) {
+        MS_LOG(ERROR) << "Number of dynamic dims in config '" << lite::kOptimizeDims << "' " << opt_dims_vec.size()
+                      << " != that in '" << lite::kInputShape << "' " << dynamic_nbdims << ".";
+        return false;
+      }
+      size_t dynamic_index = 0;
+      input_range.opt_dims = input_shape;
+      for (size_t i = 0; i < input_shape.size(); ++i) {
+        if (input_shape[i] != -1) {
+          continue;
+        }
+        if (!ParseOptDimStr(opt_dims_vec[dynamic_index++], &input_range.opt_dims[i])) {
+          return false;
+        }
+      }
+    }
+  }
+  return true;
+}
+
+bool ProfileParser::Parse(const std::map<std::string, std::string> &context, bool require_opt_when_dym,
+                          ProfileConfigs *profile_configs_ptr) {
+  if (profile_configs_ptr == nullptr) {
+    return false;
+  }
+  auto &profile_configs = *profile_configs_ptr;
+  auto get_item = [&context](const std::string &key) -> std::string {
+    auto it = context.find(key);
+    return it != context.end() ? it->second : "";
+  };
+  auto input_shapes = get_item(lite::kInputShape);
+  auto dynamic_dims = get_item(lite::kDynamicDims);
+  auto opt_dims = get_item(lite::kOptimizeDims);
+  if (input_shapes.empty() && dynamic_dims.empty() && opt_dims.empty()) {
+    MS_LOG(INFO) << "Do not found config of input range('" << lite::kInputShape << "')";
+    return true;
+  }
+  if (input_shapes.empty()) {
+    MS_LOG(ERROR) << "Config of '" << lite::kInputShape << " cannot be empty when '" << lite::kInputShape << "' or '"
+                  << lite::kOptimizeDims << "' is not empty";
+    return false;
+  }
+  if (!ParseInputShape(input_shapes, &profile_configs)) {
+    MS_LOG(ERROR) << "parse input shape failed.";
+    return false;
+  }
+  if (dynamic_dims.empty()) {
+    ProfileItem profile_item;
+    for (size_t i = 0; i < profile_configs.input_infos.size(); i++) {
+      if (profile_configs.input_infos[i].is_dynamic_shape) {
+        MS_LOG(ERROR) << "Config of '" << lite::kDynamicDims << "' cannot be empty when " << lite::kInputShape
+                      << " is dynamic";
+        return false;
+      }
+      auto &input_shape = profile_configs.input_infos[i].input_shape;
+      ProfileInputRange input_range;
+      input_range.min_dims = input_shape;
+      input_range.max_dims = input_shape;
+      input_range.opt_dims = input_shape;
+      profile_item.inputs.push_back(input_range);
+    }
+    profile_configs.profiles.push_back(profile_item);
+    return true;
+  }
+  if (!ParseDynamicDims(dynamic_dims, &profile_configs)) {
+    MS_LOG(ERROR) << "parse dynamic dims failed.";
+    return false;
+  }
+  if (require_opt_when_dym) {
+    if (!ParseOptDims(opt_dims, &profile_configs)) {
+      MS_LOG(ERROR) << "parse optimization dims failed.";
+      return false;
+    }
+  }
+  return true;
+}
+
+bool ProfileParser::ReorderByInputNames(const std::vector<std::string> &input_names, ProfileConfigs *profile_configs) {
+  if (input_names.size() != profile_configs->input_infos.size()) {
+    MS_LOG_ERROR << "Input name size " << input_names.size() << " != profile config input size "
+                 << profile_configs->input_infos.size();
+    return false;
+  }
+  ProfileConfigs new_profile_configs = *profile_configs;
+  auto &input_infos = profile_configs->input_infos;
+  auto &profiles = profile_configs->profiles;
+
+  for (size_t input_index = 0; input_index < input_names.size(); input_index++) {
+    auto input_name = input_names[input_index];
+    size_t i = 0;
+    for (; i < input_infos.size(); i++) {
+      if (input_infos[i].name == input_name) {
+        new_profile_configs.input_infos[input_index] = input_infos[i];
+        for (size_t profile_index = 0; profile_index < profiles.size(); profile_index++) {
+          new_profile_configs.profiles[profile_index].inputs[input_index] = profiles[profile_index].inputs[i];
+        }
+        break;
+      }
+    }
+    if (i >= input_infos.size()) {
+      MS_LOG_ERROR << "Cannot find input " << input_name << " in profile '" << lite::kInputShape << "' config";
+      return false;
+    }
+  }
+  *profile_configs = new_profile_configs;
+  return true;
+}
+}  // namespace mindspore
--- a/mindspore/lite/src/common/config_infos.h
+++ b/mindspore/lite/src/common/config_infos.h
@ -0,0 +1,70 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_COMMON_CONFIG_INFOS_H_
+#define MINDSPORE_LITE_SRC_COMMON_CONFIG_INFOS_H_
+
+#include <string>
+#include <map>
+#include <vector>
+#include "include/api/visible.h"
+#include "mindapi/base/shape_vector.h"
+
+namespace mindspore {
+using ConfigInfos = std::map<std::string, std::map<std::string, std::string>>;
+
+struct ProfileInputInfo {
+  std::string name;
+  bool is_dynamic_shape = false;
+  ShapeVector input_shape;
+};
+
+struct ProfileInputRange {
+  ShapeVector min_dims;
+  ShapeVector opt_dims;
+  ShapeVector max_dims;
+};
+
+struct ProfileItem {
+  std::vector<ProfileInputRange> inputs;
+};
+
+struct ProfileConfigs {
+  std::vector<ProfileInputInfo> input_infos;
+  std::vector<ProfileItem> profiles;
+};
+
+class MS_API ProfileParser {
+ public:
+  static bool Parse(const std::map<std::string, std::string> &context, bool require_opt_when_dym,
+                    ProfileConfigs *profile_configs);
+
+  static bool ReorderByInputNames(const std::vector<std::string> &input_names, ProfileConfigs *profile_configs);
+
+ private:
+  static bool ParseOptDims(const std::string &opt_dims_str, ProfileConfigs *profile_configs);
+  static bool ParseDynamicDims(const std::string &dynamic_dims_str, ProfileConfigs *profile_configs);
+  static bool ParseInputShape(const std::string &input_shapes_str, ProfileConfigs *profile_configs);
+
+  static bool StrToInt64(const std::string &str, int64_t *val);
+  static std::vector<std::string> Split(const std::string &str, const std::string &delim);
+  static bool ParseShapeStr(const std::vector<std::string> &str_dims, ShapeVector *shape_ptr);
+  static bool ParseRangeStr(const std::string &range_str, int64_t *min_ptr, int64_t *max_ptr);
+  static bool ParseOptDimStr(const std::string &opt_dim_str, int64_t *opt_ptr);
+};
+}  // namespace mindspore
+
+#endif  // MINDSPORE_LITE_SRC_COMMON_CONFIG_INFOS_H_
--- a/mindspore/lite/src/extendrt/CMakeLists.txt
+++ b/mindspore/lite/src/extendrt/CMakeLists.txt
@ -120,6 +120,7 @@ if(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE)
        ${CCSRC_DIR}/utils/scoped_long_running.cc
        ${CCSRC_DIR}/utils/cse.cc
        ${CCSRC_DIR}/utils/comm_manager.cc
+        ${CCSRC_DIR}/utils/signal_util.cc
        ${CORE_DIR}/utils/status.cc
        )
    add_library(mindspore-kernel-graph OBJECT ${KERNEL_GRAPH_SRC})
--- a/mindspore/lite/src/extendrt/cxx_api/model/model.cc
+++ b/mindspore/lite/src/extendrt/cxx_api/model/model.cc
@ -216,10 +216,37 @@ Status Model::BindGLTexture2DMemory(const std::map<std::string, unsigned int> &i
  return kSuccess;
 }

-Status Model::LoadConfig(const std::vector<char> &config_path) { return kSuccess; }
+Status Model::LoadConfig(const std::vector<char> &config_path) {
+  std::unique_lock<std::mutex> impl_lock(g_impl_init_lock);
+  if (impl_ != nullptr) {
+    MS_LOG(ERROR) << "impl_ illegal in LoadConfig.";
+    return Status(kLiteFileError, "Illegal operation.");
+  }
+
+  impl_ = std::make_shared<ModelImpl>();
+  if (impl_ == nullptr) {
+    MS_LOG(ERROR) << "Model implement is null.";
+    return Status(kLiteFileError, "Fail to load config file.");
+  }
+
+  auto ret = impl_->LoadConfig(CharToString(config_path));
+  if (ret != kSuccess) {
+    MS_LOG(ERROR) << "impl_ LoadConfig failed,";
+    return Status(kLiteFileError, "Invalid config file.");
+  }
+  return kSuccess;
+}

 Status Model::UpdateConfig(const std::vector<char> &section,
                           const std::pair<std::vector<char>, std::vector<char>> &config) {
-  return kSuccess;
+  std::unique_lock<std::mutex> impl_lock(g_impl_init_lock);
+  if (impl_ == nullptr) {
+    impl_ = std::make_shared<ModelImpl>();
+  }
+  if (impl_ != nullptr) {
+    return impl_->UpdateConfig(CharToString(section), {CharToString(config.first), CharToString(config.second)});
+  }
+  MS_LOG(ERROR) << "Model implement is null!";
+  return kLiteFileError;
 }
 }  // namespace mindspore
--- a/mindspore/lite/src/extendrt/cxx_api/model/model_impl.cc
+++ b/mindspore/lite/src/extendrt/cxx_api/model/model_impl.cc
@ -44,7 +44,7 @@ Status ModelImpl::BuildByBufferImpl(const void *model_data, size_t data_size, Mo
    // user does not set mindir_path, convert from model_path
    mindir_path = model_path.substr(0, model_path.rfind("/"));
  }
-  session_ = InferSession::CreateSession(model_context);
+  session_ = InferSession::CreateSession(model_context, config_info_);
  if (session_ == nullptr) {
    MS_LOG(ERROR) << "Create session failed.";
    return kLiteNullptr;
@ -254,7 +254,6 @@ Status ModelImpl::Predict() {
  auto outputs = GetOutputs();
  return Predict(inputs, &outputs);
 }
-
 bool ModelImpl::HasPreprocess() { return graph_->graph_data_->GetPreprocess().empty() ? false : true; }

 Status ModelImpl::Preprocess(const std::vector<std::vector<MSTensor>> &inputs, std::vector<MSTensor> *outputs) {
@ -345,19 +344,13 @@ Status ModelImpl::PredictWithPreprocess(const std::vector<std::vector<MSTensor>>
 }

 Status ModelImpl::LoadConfig(const std::string &config_path) {
-  std::map<std::string, std::map<std::string, std::string>> all_config_info;
+  ConfigInfos all_config_info;
  int ret = lite::GetAllSectionInfoFromConfigFile(config_path, &all_config_info);
  if (ret != kSuccess) {
    MS_LOG(ERROR) << "GetAllSectionInfoFromConfigFile fail!ret: " << ret;
    return kLiteFileError;
  }
  config_info_ = all_config_info;
-  std::map<std::string, std::string> config_info = all_config_info[kExecutionPlan];
-  if (config_info.empty()) {
-    MS_LOG(WARNING) << "No valid execution plan info in config file.";
-    return kSuccess;
-  }
-
  return kSuccess;
 }

--- a/mindspore/lite/src/extendrt/cxx_api/model/model_impl.h
+++ b/mindspore/lite/src/extendrt/cxx_api/model/model_impl.h
@ -29,6 +29,8 @@
 #include "include/common/utils/utils.h"
 #include "ir/func_graph.h"
 #include "extendrt/infer_session.h"
+#include "src/common/config_infos.h"
+
 #ifndef _WIN32
 #include <dlfcn.h>
 #endif
@ -73,10 +75,11 @@ class ModelImpl {
  std::shared_ptr<Graph> graph_ = nullptr;
  std::shared_ptr<InferSession> session_ = nullptr;
  std::shared_ptr<Context> context_ = nullptr;
+  ConfigInfos config_info_;
+  std::map<std::string, TypeId> execution_plan_;
 #ifndef _WIN32
  void *handle_ = nullptr;
 #endif
-  std::map<std::string, std::map<std::string, std::string>> config_info_;
 };
 }  // namespace mindspore
 #endif  // MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_MODEL_MODEL_IMPL_H_
--- a/mindspore/lite/src/extendrt/cxx_api/model_pool/model_pool.cc
+++ b/mindspore/lite/src/extendrt/cxx_api/model_pool/model_pool.cc
@ -1096,7 +1096,7 @@ Status ModelPool::InitByPath(const std::string &model_path, const std::shared_pt
  size_t size = 0;
  graph_buf_ = lite::ReadFile(model_path.c_str(), &size);
  if (graph_buf_ == nullptr) {
-    MS_LOG(ERROR) << "read ms model failed, model path: " << model_path;
+    MS_LOG(ERROR) << "read model failed, model path: " << model_path;
    return kLiteNullptr;
  }
  auto status = Init(graph_buf_, size, runner_config);
--- a/mindspore/lite/src/extendrt/delegate/factory.h
+++ b/mindspore/lite/src/extendrt/delegate/factory.h
@ -24,11 +24,15 @@
 #include "runtime/hardware/device_context.h"
 #include "src/extendrt/delegate_graph_executor.h"
 #include "include/api/context.h"
+#include "src/common/config_infos.h"
+
 namespace mindspore {
 using mindspore::device::GraphExecutor;
 // TODO(zhaizhiqiang): Wrap graph executor as delegate.
 // typedef std::shared_ptr<GraphSinkDelegate> (*DelegateCreator)(const std::shared_ptr<Context> &);
-typedef std::shared_ptr<GraphExecutor> (*DelegateCreator)(const std::shared_ptr<Context> &);
+using DelegateCreator =
+  std::function<std::shared_ptr<GraphExecutor>(const std::shared_ptr<Context> &, const ConfigInfos &)>;
+
 class MS_API DelegateRegistry {
 public:
  DelegateRegistry() = default;
@ -48,7 +52,7 @@ class MS_API DelegateRegistry {
  }

  std::shared_ptr<GraphExecutor> GetDelegate(const mindspore::DeviceType &device_type, const std::string &provider,
-                                             const std::shared_ptr<Context> &ctx) {
+                                             const std::shared_ptr<Context> &ctx, const ConfigInfos &config_infos) {
    //  find common delegate
    auto it = creator_map_.find(device_type);
    if (it == creator_map_.end()) {
@ -58,7 +62,7 @@ class MS_API DelegateRegistry {
    if (creator_it == it->second.end()) {
      return nullptr;
    }
-    return creator_it->second(ctx);
+    return creator_it->second(ctx, config_infos);
  }

 private:
--- a/mindspore/lite/src/extendrt/delegate/graph_executor/litert/CMakeLists.txt
+++ b/mindspore/lite/src/extendrt/delegate/graph_executor/litert/CMakeLists.txt
@ -26,6 +26,7 @@ set(LITE_SRC
        ${LITE_DIR}/src/common/log.cc
        ${LITE_DIR}/src/common/prim_util.cc
        ${LITE_DIR}/src/common/tensor_util.cc
+        ${LITE_DIR}/src/common/config_infos.cc
        ${LITE_DIR}/src/litert/allocator.cc
        ${LITE_DIR}/src/litert/inner_allocator.cc
        ${LITE_DIR}/src/litert/runtime_allocator.cc
--- a/mindspore/lite/src/extendrt/delegate/graph_executor/litert/graph_executor.cc
+++ b/mindspore/lite/src/extendrt/delegate/graph_executor/litert/graph_executor.cc
@ -41,7 +41,9 @@ const int64_t kBufferSize = 1024;
 }  // namespace
 const char litert_provider[] = "litert";

-LiteRTGraphExecutor::LiteRTGraphExecutor(const std::shared_ptr<mindspore::Context> &context) : context_(context) {
+LiteRTGraphExecutor::LiteRTGraphExecutor(const std::shared_ptr<mindspore::Context> &context,
+                                         const ConfigInfos &config_infos)
+    : context_(context), config_infos_(config_infos) {
  lite_session_ = CreateLiteSession(ContextUtils::Convert(context_.get()));
 }

@ -355,8 +357,9 @@ bool LiteRTGraphExecutor::IsNeedExtractTensorData(mindspore::schema::MetaGraphT
  return false;
 }

-static std::shared_ptr<device::GraphExecutor> LiteRTGraphExecutorCreator(const std::shared_ptr<Context> &ctx) {
-  return std::make_shared<LiteRTGraphExecutor>(ctx);
+static std::shared_ptr<device::GraphExecutor> LiteRTGraphExecutorCreator(const std::shared_ptr<Context> &ctx,
+                                                                         const ConfigInfos &config_infos) {
+  return std::make_shared<LiteRTGraphExecutor>(ctx, config_infos);
 }

 REG_DELEGATE(kCPU, litert_provider, LiteRTGraphExecutorCreator);
--- a/mindspore/lite/src/extendrt/delegate/graph_executor/litert/graph_executor.h
+++ b/mindspore/lite/src/extendrt/delegate/graph_executor/litert/graph_executor.h
@ -28,12 +28,13 @@
 #include "include/model.h"
 #include "src/litert/lite_session.h"
 #include "src/common/helper/infer_helpers.h"
+#include "src/common/config_infos.h"

 namespace mindspore {
 class LiteRTGraphExecutor : public LiteGraphExecutor {
 public:
  LiteRTGraphExecutor() = default;
-  explicit LiteRTGraphExecutor(const std::shared_ptr<mindspore::Context> &context);
+  LiteRTGraphExecutor(const std::shared_ptr<mindspore::Context> &context, const ConfigInfos &config_infos);
  ~LiteRTGraphExecutor() = default;
  bool CompileGraph(const FuncGraphPtr &graph, const std::map<string, string> &compile_options) override;
  bool RunGraph(const FuncGraphPtr &graph, const std::vector<tensor::Tensor> &inputs,
@ -56,6 +57,7 @@ class LiteRTGraphExecutor : public LiteGraphExecutor {

 private:
  const std::shared_ptr<mindspore::Context> context_;
+  ConfigInfos config_infos_;
  lite::LiteGraph lite_graph_;
  std::shared_ptr<lite::LiteSession> lite_session_;
  std::shared_ptr<mindspore::infer::helper::InferHelpers> helpers_ = nullptr;
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/cuda_impl/logical.cu
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/cuda_impl/logical.cu
@ -39,6 +39,20 @@ __global__ void LogicalOrKernel(const T *input_addr1, const T *input_addr2, T *o
  }
 }

+template <typename T>
+__global__ void GreaterOrEqualKernal(const T *input1, const T *input2, T *output, int element_cnt) {
+  for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < element_cnt; pos += blockDim.x * gridDim.x) {
+    output[pos] = (input1[pos] >= input2[pos]);
+  }
+}
+
+template <typename T>
+__global__ void LessOrEqualKernal(const T *input1, const T *input2, T *output, int element_cnt) {
+  for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < element_cnt; pos += blockDim.x * gridDim.x) {
+    output[pos] = (input1[pos] <= input2[pos]);
+  }
+}
+
 template <typename T>
 void LogicalNot(const T *input1, T *output, int element_cnt, cudaStream_t stream) {
  LogicalNotKernel<<<GET_BLOCKS(element_cnt), GET_THREADS, 0, stream>>>(input1, output, element_cnt);
@ -54,6 +68,26 @@ void LogicalOr(const T *input1, const T *input2, T *output, int element_cnt, cud
  LogicalOrKernel<<<GET_BLOCKS(element_cnt), GET_THREADS, 0, stream>>>(input1, input2, output, element_cnt);
 }

+template <typename T>
+void GreaterOrEqual(const T *input1, const T *input2, T *output, int element_cnt, cudaStream_t stream) {
+  GreaterOrEqualKernal<<<GET_BLOCKS(element_cnt), GET_THREADS, 0, stream>>>(input1, input2, output, element_cnt);
+}
+
+template <typename T>
+void LessOrEqual(const T *input1, const T *input2, T *output, int element_cnt, cudaStream_t stream) {
+  LessOrEqualKernal<<<GET_BLOCKS(element_cnt), GET_THREADS, 0, stream>>>(input1, input2, output, element_cnt);
+}
+
+template void GreaterOrEqual(const float *input1, const float *input2, float *output, int element_cnt,
+                             cudaStream_t stream);
+
+template void GreaterOrEqual(const int *input1, const int *input2, int *output, int element_cnt, cudaStream_t stream);
+
+template void LessOrEqual(const float *input1, const float *input2, float *output, int element_cnt,
+                          cudaStream_t stream);
+
+template void LessOrEqual(const int *input1, const int *input2, int *output, int element_cnt, cudaStream_t stream);
+
 template void LogicalNot(const int32_t *input1, int32_t *output, int element_cnt, cudaStream_t stream);

 template void LogicalAnd(const int32_t *input1, const int32_t *input2, int32_t *output, int element_cnt,
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/cuda_impl/logical.cuh
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/cuda_impl/logical.cuh
@ -26,4 +26,10 @@ void LogicalOr(const T *input1, const T *input2, T *output, int element_cnt, cud
 template <typename T>
 void LogicalNot(const T *input1, T *output, int element_cnt, cudaStream_t stream);

+template <typename T>
+void GreaterOrEqual(const T *input1, const T *input2, T *output, int element_cnt, cudaStream_t stream);
+
+template <typename T>
+void LessOrEqual(const T *input1, const T *input2, T *output, int element_cnt, cudaStream_t stream);
+
 #endif  // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_LOGICAL_H_
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/activation_opt_plugin.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/activation_opt_plugin.cc
@ -90,8 +90,8 @@ int ActivationOptPlugin::RunCudaActivation(const nvinfer1::PluginTensorDesc *inp
      break;
    }
    case (ActivationType::SWISH): {
-      CalSwish(GetDimsVolume(inputDesc[0].dims), static_cast<const float *>(inputs[0]),
-               static_cast<float *>(outputs[0]), stream, device_id_);
+      Swish(GetDimsVolume(inputDesc[0].dims), static_cast<const float *>(inputs[0]), static_cast<float *>(outputs[0]),
+            stream, device_id_);
      break;
    }
    default: {
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/batchnorm_tensorrt.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/batchnorm_tensorrt.cc
@ -0,0 +1,100 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/extendrt/delegate/tensorrt/op/batchnorm_tensorrt.h"
+#include <memory>
+#include <numeric>
+#include "ops/fused_batch_norm.h"
+
+namespace mindspore::lite {
+int BatchNormTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
+                                 const std::vector<TensorInfo> &out_tensors) {
+  if (in_tensors.size() != INPUT_SIZE5 && in_tensors.size() != 1) {
+    MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
+    return RET_ERROR;
+  }
+  if (out_tensors.size() != 1) {
+    MS_LOG(ERROR) << "Unsupported output tensor size, size is " << in_tensors.size();
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int BatchNormTensorRT::AddInnerOp(TensorRTContext *ctx) {
+  CHECK_NULL_RETURN(ctx->network());
+  auto norm_op = AsOps<ops::FusedBatchNorm>();
+  CHECK_NULL_RETURN(norm_op);
+  epsilon_ = norm_op->get_epsilon();
+
+  ITensorHelper norm_input;
+  int ret = PreprocessInputs2SameDim(ctx, input(ctx, 0), &norm_input);
+  if (ret != RET_OK || norm_input.trt_tensor_ == nullptr) {
+    MS_LOG(ERROR) << "PreprocessInputs2SameDim norm_input failed for " << op_name_;
+    return RET_ERROR;
+  }
+  auto expect_shape = ConvertMSShape(norm_input.trt_tensor_->getDimensions());
+  gamma_ = ConvertTensorWithExpandDims(ctx, in_tensors_[1], expect_shape, op_name_ + in_tensors_[1].Name());
+  CHECK_NULL_RETURN(gamma_);
+  beta_ =
+    ConvertTensorWithExpandDims(ctx, in_tensors_[BETA_INDEX], expect_shape, op_name_ + in_tensors_[BETA_INDEX].Name());
+  CHECK_NULL_RETURN(beta_);
+  mean_ =
+    ConvertTensorWithExpandDims(ctx, in_tensors_[MEAN_INDEX], expect_shape, op_name_ + in_tensors_[MEAN_INDEX].Name());
+  CHECK_NULL_RETURN(mean_);
+  var_ =
+    ConvertTensorWithExpandDims(ctx, in_tensors_[VAR_INDEX], expect_shape, op_name_ + in_tensors_[VAR_INDEX].Name());
+  CHECK_NULL_RETURN(var_);
+
+  return RunAsTrtOps(ctx, norm_input);
+}
+
+int BatchNormTensorRT::RunAsTrtOps(TensorRTContext *ctx, ITensorHelper norm_input) {
+  // var + min epsilon
+  auto const_epsilon = ConvertScalarToITensor(ctx, norm_input.trt_tensor_->getDimensions().nbDims, &epsilon_,
+                                              DataType::kNumberTypeFloat32, op_name_ + "_epsilion");
+  CHECK_NULL_RETURN(const_epsilon);
+  auto var_epsilon =
+    ctx->network()->addElementWise(*var_, *const_epsilon, nvinfer1::ElementWiseOperation::kSUM)->getOutput(0);
+  CHECK_NULL_RETURN(var_epsilon);
+
+  // standard deviation
+  auto std_dev = ctx->network()->addUnary(*var_epsilon, nvinfer1::UnaryOperation::kSQRT)->getOutput(0);
+  CHECK_NULL_RETURN(std_dev);
+
+  auto scale = ctx->network()->addElementWise(*gamma_, *std_dev, nvinfer1::ElementWiseOperation::kDIV)->getOutput(0);
+  CHECK_NULL_RETURN(scale);
+
+  auto mean_mul_scale =
+    ctx->network()->addElementWise(*mean_, *scale, nvinfer1::ElementWiseOperation::kPROD)->getOutput(0);
+  CHECK_NULL_RETURN(mean_mul_scale);
+
+  auto bias =
+    ctx->network()->addElementWise(*beta_, *mean_mul_scale, nvinfer1::ElementWiseOperation::kSUB)->getOutput(0);
+  CHECK_NULL_RETURN(bias);
+
+  // scale with bias
+  auto scale_layer =
+    ctx->network()->addElementWise(*norm_input.trt_tensor_, *scale, nvinfer1::ElementWiseOperation::kPROD);
+  this->layer_ = scale_layer;
+  auto scale_out = scale_layer->getOutput(0);
+  CHECK_NULL_RETURN(scale_out);
+  auto beta_out = ctx->network()->addElementWise(*scale_out, *bias, nvinfer1::ElementWiseOperation::kSUM)->getOutput(0);
+  CHECK_NULL_RETURN(beta_out);
+  ctx->RegisterTensor(ITensorHelper{beta_out, Format::NCHW, true}, out_tensors_[0].Name());
+  return RET_OK;
+}
+REGISTER_TENSORRT_CREATOR(ops::kNameFusedBatchNorm, BatchNormTensorRT)
+}  // namespace mindspore::lite
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/batchnorm_tensorrt.h
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/batchnorm_tensorrt.h
@ -0,0 +1,50 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_BATCH_NORM_TENSORRT_H_
+#define MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_BATCH_NORM_TENSORRT_H_
+#include <string>
+#include <vector>
+#include "src/extendrt/delegate/tensorrt/op/tensorrt_op.h"
+
+namespace mindspore::lite {
+constexpr int BETA_INDEX = 2;
+constexpr int MEAN_INDEX = 3;
+constexpr int VAR_INDEX = 4;
+
+class BatchNormTensorRT : public TensorRTOp {
+ public:
+  BatchNormTensorRT(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
+                    const std::vector<TensorInfo> &out_tensors, std::string name)
+      : TensorRTOp(base_operator, in_tensors, out_tensors, name) {}
+
+  ~BatchNormTensorRT() override = default;
+
+  int AddInnerOp(TensorRTContext *ctx) override;
+
+  int IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
+                const std::vector<TensorInfo> &out_tensors) override;
+
+ private:
+  int RunAsTrtOps(TensorRTContext *ctx, ITensorHelper helper);
+
+  float epsilon_{0.0f};
+  nvinfer1::ITensor *gamma_{nullptr};
+  nvinfer1::ITensor *beta_{nullptr};
+  nvinfer1::ITensor *mean_{nullptr};
+  nvinfer1::ITensor *var_{nullptr};
+};
+}  // namespace mindspore::lite
+#endif  // MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_BATCH_NORM_TENSORRT_H_
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/batchtospace_tensorrt.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/batchtospace_tensorrt.cc
@ -29,7 +29,7 @@
 namespace mindspore::lite {
 int BatchToSpaceTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
                                    const std::vector<TensorInfo> &out_tensors) {
-  if (in_tensors.size() != 3) {
+  if (in_tensors.size() != INPUT_SIZE3) {
    MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
    return RET_ERROR;
  }
@ -38,8 +38,6 @@ int BatchToSpaceTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const
    MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
    return RET_ERROR;
  }
-  dynamic_shape_params_.support_dynamic_ = false;
-  dynamic_shape_params_.support_hw_dynamic_ = false;
  return RET_OK;
 }

@ -52,19 +50,17 @@ int BatchToSpaceTensorRT::AddInnerOp(TensorRTContext *ctx) {
    MS_LOG(ERROR) << "block_h not equal block_w " << op_name_;
    return RET_ERROR;
  }
-  const int *pad_ptr = reinterpret_cast<const int *>(in_tensors_[2].Data());
+  const int *pad_ptr = reinterpret_cast<const int *>(in_tensors_[INPUT_SIZE2].Data());
  int ph0 = *(pad_ptr + 0);
  int ph1 = *(pad_ptr + 1);
-  int pw0 = *(pad_ptr + 2);
-  int pw1 = *(pad_ptr + 3);
-  auto in_dims = input_tensor->getDimensions();
-  int in = in_dims.d[0];
-  int ic = in_dims.d[1];
-  int ih = in_dims.d[2];
-  int iw = in_dims.d[3];
+  int pw0 = *(pad_ptr + INPUT_SIZE2);
+  int pw1 = *(pad_ptr + INPUT_SIZE3);

-  auto plugin =
-    std::make_shared<BatchToSpacePlugin>(input_tensor->getName(), in, ic, ih, iw, bh, ph0, ph1, pw0, pw1, device_id_);
+  auto plugin = std::make_shared<BatchToSpacePlugin>(input_tensor->getName(), bh, ph0, ph1, pw0, pw1, device_id_);
+  if (plugin == nullptr) {
+    MS_LOG(ERROR) << "add batchtospace plugin failed for" << op_name_;
+    return RET_ERROR;
+  }
  nvinfer1::ITensor *inputTensors[] = {input_tensor};
  nvinfer1::IPluginV2Layer *space2batch_opt_layer = ctx->network()->addPluginV2(inputTensors, 1, *plugin);
  if (space2batch_opt_layer == nullptr) {
@ -93,15 +89,20 @@ int BatchToSpacePlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,

 int BatchToSpacePlugin::RunCudaBatchToSpace(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs,
                                            void *const *outputs, cudaStream_t stream) {
-  int on = in_ / (bh_ * bh_);
-  int oc = ic_;
-  int oh = ih_ * bh_ - ph0_ - ph1_;
-  int ow = iw_ * bh_ - pw0_ - pw1_;
+  nvinfer1::Dims input_dims = inputDesc[0].dims;
+  int in = input_dims.d[0];
+  int ic = input_dims.d[1];
+  int ih = input_dims.d[2];
+  int iw = input_dims.d[3];
+  int on = in / (bh_ * bh_);
+  int oc = ic;
+  int oh = ih * bh_ - ph0_ - ph1_;
+  int ow = iw * bh_ - pw0_ - pw1_;

  int size = on * oc * oh * ow;

-  CalBatchToSpace<float>(size, static_cast<const float *>(inputs[0]), in_, ih_, iw_, ic_, on, oh, ow, oc, ph0_, ph1_,
-                         pw0_, pw1_, bh_, static_cast<float *>(outputs[0]), device_id_, stream);
+  CalBatchToSpace<float>(size, static_cast<const float *>(inputs[0]), in, ih, iw, ic, on, oh, ow, oc, ph0_, ph1_, pw0_,
+                         pw1_, bh_, static_cast<float *>(outputs[0]), device_id_, stream);
  return RET_OK;
 }

@ -115,7 +116,7 @@ nvinfer1::IPluginV2DynamicExt *BatchToSpacePlugin::clone() const noexcept {
  return plugin;
 }

-size_t BatchToSpacePlugin::getSerializationSize() const noexcept { return sizeof(int) * 9; }
+size_t BatchToSpacePlugin::getSerializationSize() const noexcept { return sizeof(int) * 5; }

 nvinfer1::DimsExprs BatchToSpacePlugin::getOutputDimensions(int index, const nvinfer1::DimsExprs *inputs,
                                                            int nbInputDims,
@ -127,19 +128,15 @@ nvinfer1::DimsExprs BatchToSpacePlugin::getOutputDimensions(int index, const nvi
  auto bh = exprBuilder.constant(bh_);
  dims.d[1] = inputs[0].d[1];
  auto ph_sum = exprBuilder.constant(ph0_ + ph1_);
-  auto pod0 = exprBuilder.operation(nvinfer1::DimensionOperation::kPROD, *inputs[0].d[2], *bh);
-  dims.d[2] = exprBuilder.operation(nvinfer1::DimensionOperation::kSUB, *pod0, *ph_sum);
+  auto pod0 = exprBuilder.operation(nvinfer1::DimensionOperation::kPROD, *inputs[0].d[INPUT_SIZE2], *bh);
+  dims.d[INPUT_SIZE2] = exprBuilder.operation(nvinfer1::DimensionOperation::kSUB, *pod0, *ph_sum);
  auto pw_sum = exprBuilder.constant(pw0_ + pw1_);
-  auto pod1 = exprBuilder.operation(nvinfer1::DimensionOperation::kPROD, *inputs[0].d[3], *bh);
-  dims.d[3] = exprBuilder.operation(nvinfer1::DimensionOperation::kSUB, *pod1, *pw_sum);
+  auto pod1 = exprBuilder.operation(nvinfer1::DimensionOperation::kPROD, *inputs[0].d[INPUT_SIZE3], *bh);
+  dims.d[INPUT_SIZE3] = exprBuilder.operation(nvinfer1::DimensionOperation::kSUB, *pod1, *pw_sum);
  return dims;
 }

 void BatchToSpacePlugin::serialize(void *buffer) const noexcept {
-  SerializeValue(&buffer, &in_, sizeof(int));
-  SerializeValue(&buffer, &ic_, sizeof(int));
-  SerializeValue(&buffer, &ih_, sizeof(int));
-  SerializeValue(&buffer, &iw_, sizeof(int));
  SerializeValue(&buffer, &bh_, sizeof(int));
  SerializeValue(&buffer, &ph0_, sizeof(int));
  SerializeValue(&buffer, &ph1_, sizeof(int));
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/batchtospace_tensorrt.h
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/batchtospace_tensorrt.h
@ -40,13 +40,8 @@ class BatchToSpaceTensorRT : public TensorRTOp {
 constexpr auto BATCHTOSPACETENSORRT_PLUGIN_NAME{"BatchToSpacePlugin"};
 class BatchToSpacePlugin : public TensorRTPlugin {
 public:
-  BatchToSpacePlugin(const std::string name, int in, int ic, int ih, int iw, int bh, int ph0, int ph1, int pw0, int pw1,
-                     uint32_t device_id)
+  BatchToSpacePlugin(const std::string name, int bh, int ph0, int ph1, int pw0, int pw1, uint32_t device_id)
      : TensorRTPlugin(name, std::string(BATCHTOSPACETENSORRT_PLUGIN_NAME), device_id),
-        in_(in),
-        ic_(ic),
-        ih_(ih),
-        iw_(iw),
        bh_(bh),
        ph0_(ph0),
        ph1_(ph1),
@ -56,23 +51,15 @@ class BatchToSpacePlugin : public TensorRTPlugin {
  BatchToSpacePlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
      : TensorRTPlugin(std::string(name), std::string(BATCHTOSPACETENSORRT_PLUGIN_NAME)) {
    const nvinfer1::PluginField *fields = fc->fields;
-    in_ = static_cast<const int *>(fields[0].data)[0];
-    ic_ = static_cast<const int *>(fields[1].data)[0];
-    ih_ = static_cast<const int *>(fields[2].data)[0];
-    iw_ = static_cast<const int *>(fields[3].data)[0];
-    bh_ = static_cast<const int *>(fields[4].data)[0];
-    ph0_ = static_cast<const int *>(fields[5].data)[0];
-    ph1_ = static_cast<const int *>(fields[6].data)[0];
-    pw0_ = static_cast<const int *>(fields[7].data)[0];
-    pw1_ = static_cast<const int *>(fields[8].data)[0];
+    bh_ = static_cast<const int *>(fields[0].data)[0];
+    ph0_ = static_cast<const int *>(fields[1].data)[0];
+    ph1_ = static_cast<const int *>(fields[2].data)[0];
+    pw0_ = static_cast<const int *>(fields[3].data)[0];
+    pw1_ = static_cast<const int *>(fields[4].data)[0];
  }

  BatchToSpacePlugin(const char *name, const void *serialData, size_t serialLength)
      : TensorRTPlugin(std::string(name), std::string(BATCHTOSPACETENSORRT_PLUGIN_NAME)) {
-    DeserializeValue(&serialData, &serialLength, &in_, sizeof(int));
-    DeserializeValue(&serialData, &serialLength, &ic_, sizeof(int));
-    DeserializeValue(&serialData, &serialLength, &ih_, sizeof(int));
-    DeserializeValue(&serialData, &serialLength, &iw_, sizeof(int));
    DeserializeValue(&serialData, &serialLength, &bh_, sizeof(int));
    DeserializeValue(&serialData, &serialLength, &ph0_, sizeof(int));
    DeserializeValue(&serialData, &serialLength, &ph1_, sizeof(int));
@ -93,15 +80,11 @@ class BatchToSpacePlugin : public TensorRTPlugin {
 private:
  int RunCudaBatchToSpace(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs, void *const *outputs,
                          cudaStream_t stream);
-  size_t in_;
-  size_t ic_;
-  size_t ih_;
-  size_t iw_;
-  size_t bh_;
-  size_t ph0_;
-  size_t ph1_;
-  size_t pw0_;
-  size_t pw1_;
+  int bh_;
+  int ph0_;
+  int ph1_;
+  int pw0_;
+  int pw1_;
  const std::string layer_name_;
  std::string name_space_;
 };
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/concate_tensorrt.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/concate_tensorrt.cc
@ -40,7 +40,7 @@ int ConcateTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::

  return RET_OK;
 }
-int ConcateTensorRT::AddInnerOp(TensorRTContext *ctx) {
+int ConcateTensorRT::CheckParams(TensorRTContext *ctx) {
  if (ctx == nullptr || ctx->network() == nullptr) {
    MS_LOG(ERROR) << "context or network is invalid";
    return RET_ERROR;
@ -59,6 +59,14 @@ int ConcateTensorRT::AddInnerOp(TensorRTContext *ctx) {
                  << ", but origin ms tensor has " << in_tensors_.size();
    return RET_ERROR;
  }
+  return RET_OK;
+}
+
+int ConcateTensorRT::AddInnerOp(TensorRTContext *ctx) {
+  if (CheckParams(ctx) != RET_OK) {
+    MS_LOG(ERROR) << "Check input tensors failed: " << op_name_;
+    return RET_ERROR;
+  }

  nvinfer1::ITensor *trt_input_tensors[in_tensors_.size()];
  int ret = PreProcessInputs(ctx, trt_input_tensors);
@ -67,8 +75,15 @@ int ConcateTensorRT::AddInnerOp(TensorRTContext *ctx) {
    return ret;
  }

-  if (type_ == ops::kNameStack) {
-    for (size_t i = 0; i != in_tensors_.size(); ++i) {
+  bool has_rank_0 = false;
+  for (size_t i = 0; i < in_tensors_.size(); ++i) {
+    if (!input(ctx, i).is_tensor) {
+      has_rank_0 = true;
+      break;
+    }
+  }
+  if (type_ == ops::kNameStack && !has_rank_0) {
+    for (size_t i = 0; i < in_tensors_.size(); ++i) {
      auto shuffle_layer = ctx->network()->addShuffle(*trt_input_tensors[i]);
      if (shuffle_layer == nullptr) {
        MS_LOG(ERROR) << "addShuffle failed for TensorRT.";
@ -95,7 +110,7 @@ int ConcateTensorRT::AddInnerOp(TensorRTContext *ctx) {
  }
  concate_layer->setName(op_name_.c_str());
  auto concat_output = concate_layer->getOutput(0);
-  ctx->RegisterTensor(ITensorHelper{concat_output, out_format_, true}, out_tensors_[0].Name());
+  ctx->RegisterTensor(ITensorHelper{concat_output, NCHW, true}, out_tensors_[0].Name());
  this->layer_ = concate_layer;
  return RET_OK;
 }
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/concate_tensorrt.h
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/concate_tensorrt.h
@ -43,6 +43,7 @@ class ConcateTensorRT : public TensorRTOp {

 private:
  int PreProcessInputs(TensorRTContext *ctx, nvinfer1::ITensor *trt_input_tensors[]);
+  int CheckParams(TensorRTContext *ctx);

  Format out_format_{Format::NCHW};
  bool same_format_{true};
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/constantofshape_tensorrt.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/constantofshape_tensorrt.cc
@ -0,0 +1,81 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/extendrt/delegate/tensorrt/op/constantofshape_tensorrt.h"
+#include <numeric>
+#include "src/extendrt/delegate/tensorrt/tensorrt_utils.h"
+#include "ops/constant_of_shape.h"
+
+namespace mindspore::lite {
+int ConstantOfShapeTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
+                                       const std::vector<TensorInfo> &out_tensors) {
+  if (in_tensors.size() != 1) {
+    MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size() << " : " << op_name_;
+    return RET_ERROR;
+  }
+
+  if (out_tensors.size() != 1) {
+    MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size() << " : " << op_name_;
+    return RET_ERROR;
+  }
+  auto constofshape_op = AsOps<ops::ConstantOfShape>();
+  if (constofshape_op == nullptr) {
+    MS_LOG(ERROR) << "Failed to as operator ConstantOfShape: " << op_name_;
+    return RET_ERROR;
+  }
+  DataType data_type = static_cast<DataType>(constofshape_op->get_data_type());
+  if (data_type != DataType::kNumberTypeInt32 && data_type != DataType::kNumberTypeFloat32) {
+    MS_LOG(ERROR) << "Unsupported data type for " << op_name_;
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int ConstantOfShapeTensorRT::AddInnerOp(TensorRTContext *ctx) {
+  auto constofshape_op = AsOps<ops::ConstantOfShape>();
+  if (constofshape_op == nullptr) {
+    MS_LOG(ERROR) << "Failed to as operator ConstantOfShape: " << op_name_;
+    return RET_ERROR;
+  }
+  auto &&value_vector = constofshape_op->get_value();
+  auto value_tensor = ctx->ConvertTo1DTensor(value_vector);
+  CHECK_NULL_RETURN(value_tensor);
+
+  auto unsqueeze_layer = ctx->network()->addShuffle(*value_tensor);
+  CHECK_NULL_RETURN(unsqueeze_layer);
+
+  auto shape = input(ctx, 0).trt_tensor_;
+  int rank = shape->getDimensions().d[0];
+  nvinfer1::Dims unsqueeze{rank};
+  std::fill(unsqueeze.d, unsqueeze.d + rank, 1);
+  unsqueeze_layer->setReshapeDimensions(unsqueeze);
+  unsqueeze_layer->setZeroIsPlaceholder(false);
+  value_tensor = unsqueeze_layer->getOutput(0);
+  CHECK_NULL_RETURN(value_tensor);
+
+  auto out_tensor = Broadcast(ctx, value_tensor, shape);
+  if (static_cast<DataType>(constofshape_op->get_data_type()) == DataType::kNumberTypeInt32) {
+    out_tensor = TRTTensorCast(ctx, out_tensor, nvinfer1::DataType::kINT32, op_name_ + "_cast_out");
+  }
+
+  auto output_helper = ITensorHelper{out_tensor, Format::NCHW, true};
+  ctx->RegisterTensor(output_helper, out_tensors_[0].Name());
+  MS_LOG(DEBUG) << "output " << GetTensorFormat(output_helper);
+  this->layer_ = unsqueeze_layer;
+  return RET_OK;
+}
+REGISTER_TENSORRT_CREATOR(ops::kNameConstantOfShape, ConstantOfShapeTensorRT)
+}  // namespace mindspore::lite
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/constantofshape_tensorrt.h
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/constantofshape_tensorrt.h
@ -0,0 +1,38 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_CONSTANT_OF_SHAPE_TENSORRT_H_
+#define MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_CONSTANT_OF_SHAPE_TENSORRT_H_
+#include <string>
+#include <vector>
+#include <algorithm>
+#include "src/extendrt/delegate/tensorrt/op/tensorrt_op.h"
+
+namespace mindspore::lite {
+class ConstantOfShapeTensorRT : public TensorRTOp {
+ public:
+  ConstantOfShapeTensorRT(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
+                          const std::vector<TensorInfo> &out_tensors, std::string name)
+      : TensorRTOp(base_operator, in_tensors, out_tensors, name) {}
+
+  ~ConstantOfShapeTensorRT() override = default;
+
+  int AddInnerOp(TensorRTContext *ctx) override;
+
+  int IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
+                const std::vector<TensorInfo> &out_tensors) override;
+};
+}  // namespace mindspore::lite
+#endif  // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_CONSTANT_OF_SHAPE_TENSORRT_H_
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/elementwise_tensorrt.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/elementwise_tensorrt.cc
@ -34,6 +34,7 @@
 #include "ops/equal.h"
 #include "ops/less.h"
 #include "ops/greater.h"
+#include "ops/floor_mod.h"

 namespace mindspore::lite {
 namespace {
@ -118,8 +119,12 @@ int ElementWiseTensorRT::AddInnerOp(TensorRTContext *ctx) {
    MS_LOG(ERROR) << "PreprocessInputTensors failed.";
    return RET_ERROR;
  }
-  nvinfer1::IElementWiseLayer *cal_layer =
-    ctx->network()->addElementWise(*x_input.trt_tensor_, *y_input.trt_tensor_, element_wise_op_);
+  nvinfer1::IElementWiseLayer *cal_layer;
+  if (type_ == ops::kNameFloorMod) {
+    cal_layer = AddFoorMod(ctx, x_input.trt_tensor_, y_input.trt_tensor_);
+  } else {
+    cal_layer = ctx->network()->addElementWise(*x_input.trt_tensor_, *y_input.trt_tensor_, element_wise_op_);
+  }

  if (cal_layer == nullptr) {
    MS_LOG(ERROR) << "addElementWise failed for TensorRT.";
@ -164,7 +169,8 @@ int ElementWiseTensorRT::AddInnerOp(TensorRTContext *ctx) {
    MS_LOG(INFO) << "bool result cast to int32" << op_name_;
  }
 #endif
-  auto output_helper = ITensorHelper{op_out_tensor, x_input.format_, x_input.same_format_};
+  auto is_tensor = x_input.is_tensor || y_input.is_tensor;
+  auto output_helper = ITensorHelper{op_out_tensor, x_input.format_, x_input.same_format_, is_tensor};
  ctx->RegisterTensor(output_helper, out_tensors_[0].Name());
  MS_LOG(DEBUG) << "output " << GetTensorFormat(output_helper);
  return RET_OK;
@ -198,6 +204,25 @@ int ElementWiseTensorRT::PreprocessInputTensors(TensorRTContext *ctx, ITensorHel
  return RET_OK;
 }

+nvinfer1::IElementWiseLayer *ElementWiseTensorRT::AddFoorMod(TensorRTContext *ctx, nvinfer1::ITensor *x0_trt,
+                                                             nvinfer1::ITensor *x1_trt) {
+  nvinfer1::IElementWiseLayer *layer_0 =
+    ctx->network()->addElementWise(*x0_trt, *x1_trt, nvinfer1::ElementWiseOperation::kFLOOR_DIV);
+  layer_0->setName((op_name_ + "_floor_div").c_str());
+  auto result_0 = layer_0->getOutput(0);
+
+  nvinfer1::IElementWiseLayer *layer_1 =
+    ctx->network()->addElementWise(*result_0, *x1_trt, nvinfer1::ElementWiseOperation::kPROD);
+  layer_1->setName((op_name_ + "_prod").c_str());
+  auto result_1 = layer_1->getOutput(0);
+
+  nvinfer1::IElementWiseLayer *layer_2 =
+    ctx->network()->addElementWise(*x0_trt, *result_1, nvinfer1::ElementWiseOperation::kSUB);
+  layer_2->setName((op_name_ + "_sub").c_str());
+
+  return layer_2;
+}
+
 nvinfer1::ITensor *ElementWiseTensorRT::AddActivation(TensorRTContext *ctx, nvinfer1::ITensor *in_tensor) {
  ActivationType activation = ActivationType::NO_ACTIVATION;
  if (type_ == ops::kNameAddFusion) {
@ -255,11 +280,13 @@ nvinfer1::ITensor *ElementWiseTensorRT::AddActivation(TensorRTContext *ctx, nvin
 int ElementWiseTensorRT::AddConstTensor(TensorRTContext *ctx) {
  int const_tensor_index = in_tensors_[0].IsConst() ? 0 : 1;
  auto expect_shape = ConvertMSShape(input(ctx, 1 - const_tensor_index).trt_tensor_->getDimensions());
-  nvinfer1::ITensor *constant_input =
-    ConvertConstantTensorWithDims(ctx, in_tensors_[const_tensor_index], expect_shape, op_name_);
+  auto &const_tensor = in_tensors_[const_tensor_index];
+  nvinfer1::ITensor *constant_input = ConvertConstantTensorWithDims(ctx, const_tensor, expect_shape, op_name_);
  CHECK_NULL_RETURN(constant_input);
-  auto const_helper = ITensorHelper{constant_input, input(ctx, 1 - const_tensor_index).format_, true};
-  ctx->RegisterTensor(const_helper, in_tensors_[const_tensor_index].Name());
+  auto const_shape = const_tensor.Shape();
+  auto is_scalar = const_shape.empty() || (const_shape.size() == 1 && const_shape[0] == 1);
+  auto const_helper = ITensorHelper{constant_input, input(ctx, 1 - const_tensor_index).format_, true, !is_scalar};
+  ctx->RegisterTensor(const_helper, const_tensor.Name());
  return RET_OK;
 }

@ -273,6 +300,7 @@ REGISTER_TENSORRT_CREATOR(ops::kNameEltwise, ElementWiseTensorRT)
 REGISTER_TENSORRT_CREATOR(ops::kNameMinimum, ElementWiseTensorRT)
 REGISTER_TENSORRT_CREATOR(ops::kNameMaximum, ElementWiseTensorRT)
 REGISTER_TENSORRT_CREATOR(ops::kNameBiasAdd, ElementWiseTensorRT)
+REGISTER_TENSORRT_CREATOR(ops::kNameFloorMod, ElementWiseTensorRT)
 #if TRT_VERSION_GE(7, 2)
 REGISTER_TENSORRT_CREATOR(ops::kNameEqual, ElementWiseTensorRT)
 REGISTER_TENSORRT_CREATOR(ops::kNameLess, ElementWiseTensorRT)
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/elementwise_tensorrt.h
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/elementwise_tensorrt.h
@ -37,6 +37,8 @@ class ElementWiseTensorRT : public TensorRTOp {
 private:
  nvinfer1::ITensor *AddActivation(TensorRTContext *ctx, nvinfer1::ITensor *in_tensor);

+  nvinfer1::IElementWiseLayer *AddFoorMod(TensorRTContext *ctx, nvinfer1::ITensor *x0_trt, nvinfer1::ITensor *x1_trt);
+
  int AddConstTensor(TensorRTContext *ctx);

  bool SameTensor(nvinfer1::ITensor *trt_tensor, TensorInfo *ms_tensor);
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/fill_tensorrt.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/fill_tensorrt.cc
@ -0,0 +1,79 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/extendrt/delegate/tensorrt/op/fill_tensorrt.h"
+#include <numeric>
+#include "src/extendrt/delegate/tensorrt/tensorrt_utils.h"
+#include "ops/fill.h"
+
+namespace mindspore::lite {
+int FillTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
+                            const std::vector<TensorInfo> &out_tensors) {
+#if TRT_VERSION_GE(8, 2)
+  if (in_tensors.size() != INPUT_SIZE2) {
+    MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size() << " : " << op_name_;
+    return RET_ERROR;
+  }
+
+  if (out_tensors.size() != 1) {
+    MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size() << " : " << op_name_;
+    return RET_ERROR;
+  }
+  return RET_OK;
+#else
+  MS_LOG(WARNING) << "low TensorRT version don't support fill op, please upgrade TensorRT version to 8.2 or higher";
+  return RET_ERROR;
+#endif
+}
+
+int FillTensorRT::AddInnerOp(TensorRTContext *ctx) {
+#if TRT_VERSION_GE(8, 2)
+  ITensorHelper fill_input;
+  nvinfer1::FillOperation op = nvinfer1::FillOperation::kLINSPACE;
+  auto *fill_layer = ctx->network()->addFill({}, op);
+  if (fill_layer == nullptr) {
+    MS_LOG(ERROR) << "addFill failed for TensorRT : " << op_name_;
+    return RET_ERROR;
+  }
+  fill_layer->setInput(0, *input(ctx, 1).trt_tensor_);
+  nvinfer1::ITensor *alpha_tensor = nullptr;
+  if (!in_tensors_[0].IsConst()) {
+    alpha_tensor = input(ctx, 0).trt_tensor_;
+  } else {
+    alpha_tensor =
+      ConvertScalarToITensor(ctx, 0, in_tensors_[0].Data(), in_tensors_[0].DataType(), op_name_ + "_alpha");
+  }
+  fill_layer->setInput(1, *alpha_tensor);
+  int nbdims = input(ctx, 1).trt_tensor_->getDimensions().d[0];
+  nvinfer1::ITensor *beta_tensor = nullptr;
+  if (in_tensors_[0].DataType() == DataType::kNumberTypeInt32) {
+    beta_tensor = ctx->ConvertTo1DTensor(std::vector<int>(nbdims, 0));
+  } else {
+    beta_tensor = ctx->ConvertTo1DTensor(std::vector<float>(nbdims, 0.f));
+  }
+  fill_layer->setInput(INPUT_SIZE2, *beta_tensor);
+
+  nvinfer1::ITensor *out_tensor = fill_layer->getOutput(0);
+  ctx->RegisterTensor(ITensorHelper{out_tensor, Format::NCHW, true}, out_tensors_[0].Name());
+  this->layer_ = fill_layer;
+  return RET_OK;
+#else
+  MS_LOG(WARNING) << "low TensorRT version don't support fill op, please upgrade TensorRT version to 8.2 or higher";
+  return RET_ERROR;
+#endif
+}
+REGISTER_TENSORRT_CREATOR(ops::kNameFill, FillTensorRT)
+}  // namespace mindspore::lite
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/fill_tensorrt.h
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/fill_tensorrt.h
@ -0,0 +1,41 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_FILL_TENSORRT_H_
+#define MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_FILL_TENSORRT_H_
+#include <string>
+#include <vector>
+#include <algorithm>
+#include "src/extendrt/delegate/tensorrt/op/tensorrt_op.h"
+
+namespace mindspore::lite {
+class FillTensorRT : public TensorRTOp {
+ public:
+  FillTensorRT(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
+               const std::vector<TensorInfo> &out_tensors, std::string name)
+      : TensorRTOp(base_operator, in_tensors, out_tensors, name) {}
+
+  ~FillTensorRT() override = default;
+
+  int AddInnerOp(TensorRTContext *ctx) override;
+
+  int IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
+                const std::vector<TensorInfo> &out_tensors) override;
+
+ private:
+  std::vector<float> zeros_;
+};
+}  // namespace mindspore::lite
+#endif  // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_FILL_TENSORRT_H_
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/fullyconnected_tensorrt.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/fullyconnected_tensorrt.cc
@ -88,7 +88,7 @@ int FullyConnectedTensorRT::AddInnerOp(TensorRTContext *ctx) {
 }

 int FullyConnectedTensorRT::PreprocessInputs(TensorRTContext *ctx, ITensorHelper *fc_input) {
-  auto ret = PreprocessInputs2SameDim(ctx, input(ctx, !in_tensors_[1].IsConst()), fc_input);
+  auto ret = PreprocessInputs2SameDim(ctx, input(ctx, in_tensors_[1].IsConst() ? 0 : 1), fc_input);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "PreprocessInputs2SameDim failed for " << op_name_;
    return ret;
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/gather_tensorrt.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/gather_tensorrt.cc
@ -85,7 +85,7 @@ int GatherTensorRT::AddInnerOp(TensorRTContext *ctx) {
  nvinfer1::ITensor *op_output = gather_layer->getOutput(0);
  auto old_shape = ConvertMSShape(op_output->getDimensions());
  // keep shape
-  if (in_tensors_[1].Shape().empty() && old_shape.size() > 1) {
+  if (indices_tensor.trt_tensor_->getDimensions().nbDims == 0 && old_shape.size() > 1) {
    auto squeeze = ctx->network()->addShuffle(*op_output);
    if (squeeze == nullptr) {
      MS_LOG(ERROR) << "add output squeeze failed for " << op_name_;
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/greaterorequal_tensorrt.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/greaterorequal_tensorrt.cc
@ -0,0 +1,111 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/extendrt/delegate/tensorrt/op/greaterorequal_tensorrt.h"
+#include <cuda_runtime.h>
+#include <numeric>
+#include <memory>
+#include <vector>
+#include <functional>
+#include <unordered_map>
+#include "NvInferRuntimeCommon.h"
+#include "src/extendrt/delegate/tensorrt/tensorrt_utils.h"
+#include "src/extendrt/delegate/tensorrt/cuda_impl/logical.cuh"
+#include "ops/greater_equal.h"
+
+namespace mindspore::lite {
+int GreaterorequalTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
+                                      const std::vector<TensorInfo> &out_tensors) {
+  if (in_tensors.size() != INPUT_SIZE2) {
+    MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
+    return RET_ERROR;
+  }
+  if (out_tensors.size() != 1) {
+    MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int GreaterorequalTensorRT::AddInnerOp(TensorRTContext *ctx) {
+  if (ctx == nullptr || ctx->network() == nullptr) {
+    MS_LOG(ERROR) << "network or input tensor is invalid";
+    return RET_ERROR;
+  }
+  nvinfer1::ITensor *inputTensors[] = {input(ctx, 0).trt_tensor_, input(ctx, 1).trt_tensor_};
+  auto plugin = std::make_shared<GreaterorequalPlugin>(op_name_, schema::PrimitiveType_GreaterEqual);
+  if (plugin == nullptr) {
+    MS_LOG(ERROR) << "create GreaterorequalPlugin failed for " << op_name_;
+    return RET_ERROR;
+  }
+  nvinfer1::IPluginV2Layer *greaterorequal_layer = ctx->network()->addPluginV2(inputTensors, 2, *plugin);
+  this->layer_ = greaterorequal_layer;
+
+  nvinfer1::ITensor *op_out_tensor = greaterorequal_layer->getOutput(0);
+  if (op_out_tensor == nullptr) {
+    MS_LOG(ERROR) << "greaterorequal out tensor is nullptr.";
+    return RET_ERROR;
+  }
+  ctx->RegisterTensor(ITensorHelper{op_out_tensor, input(ctx, 0).format_, input(ctx, 0).same_format_},
+                      out_tensors_[0].Name());
+  return RET_OK;
+}
+
+REGISTER_TENSORRT_PLUGIN(GreaterorequalPluginCreater);
+template class TensorRTPluginCreater<GreaterorequalPlugin>;
+template <class T>
+nvinfer1::PluginFieldCollection TensorRTPluginCreater<T>::field_collection_{};
+template <class T>
+std::vector<nvinfer1::PluginField> TensorRTPluginCreater<T>::fields_;
+
+int GreaterorequalPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
+                                  const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs,
+                                  void *const *outputs, void *workspace, cudaStream_t stream) noexcept {
+  return RunCudaGreaterorequal(inputDesc, inputs, outputs, stream);
+}
+
+int GreaterorequalPlugin::RunCudaGreaterorequal(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs,
+                                                void *const *outputs, cudaStream_t stream) {
+  if (inputDesc->type == nvinfer1::DataType::kINT32) {
+    GreaterOrEqual(static_cast<const int *>(inputs[0]), static_cast<const int *>(inputs[1]),
+                   static_cast<int *>(outputs[0]), GetDimsVolume(inputDesc[0].dims), stream);
+  } else if (inputDesc->type == nvinfer1::DataType::kFLOAT) {
+    GreaterOrEqual(static_cast<const float *>(inputs[0]), static_cast<const float *>(inputs[1]),
+                   static_cast<float *>(outputs[0]), GetDimsVolume(inputDesc[0].dims), stream);
+  } else {
+    MS_LOG(ERROR) << "unsupported equal data type";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+nvinfer1::IPluginV2DynamicExt *GreaterorequalPlugin::clone() const noexcept {
+  auto *plugin = new (std::nothrow) GreaterorequalPlugin(*this);
+  if (plugin == nullptr) {
+    MS_LOG(ERROR) << "malloc greaterorequal plugin failed";
+    return nullptr;
+  }
+  plugin->setPluginNamespace(name_space_.c_str());
+  return plugin;
+}
+
+size_t GreaterorequalPlugin::getSerializationSize() const noexcept { return sizeof(schema::PrimitiveType); }
+
+void GreaterorequalPlugin::serialize(void *buffer) const noexcept {
+  SerializeValue(&buffer, &primitive_type_, sizeof(schema::PrimitiveType));
+}
+REGISTER_TENSORRT_CREATOR(ops::kNameGreaterEqual, GreaterorequalTensorRT)
+}  // namespace mindspore::lite
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/greaterorequal_tensorrt.h
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/greaterorequal_tensorrt.h
@ -0,0 +1,77 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_GREATEROREQUAL_PLUGIN_H_
+#define MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_GREATEROREQUAL_PLUGIN_H_
+
+#include <string>
+#include <vector>
+#include "src/extendrt/delegate/tensorrt/op/tensorrt_op.h"
+#include "src/extendrt/delegate/tensorrt/op/tensorrt_plugin.h"
+
+namespace mindspore::lite {
+class GreaterorequalTensorRT : public TensorRTOp {
+ public:
+  GreaterorequalTensorRT(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
+                         const std::vector<TensorInfo> &out_tensors, std::string name)
+      : TensorRTOp(base_operator, in_tensors, out_tensors, name) {}
+
+  ~GreaterorequalTensorRT() override = default;
+
+  int AddInnerOp(TensorRTContext *ctx) override;
+
+  int IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
+                const std::vector<TensorInfo> &out_tensors) override;
+};
+
+constexpr auto GREATEROREQUAL_PLUGIN_NAME{"GreaterorequalPlugin"};
+class GreaterorequalPlugin : public TensorRTPlugin {
+ public:
+  GreaterorequalPlugin(const std::string name, schema::PrimitiveType primitive_type)
+      : TensorRTPlugin(name, std::string(GREATEROREQUAL_PLUGIN_NAME)), primitive_type_(primitive_type) {}
+
+  GreaterorequalPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
+      : TensorRTPlugin(std::string(name), std::string(GREATEROREQUAL_PLUGIN_NAME)) {
+    const nvinfer1::PluginField *fields = fc->fields;
+    primitive_type_ = static_cast<const schema::PrimitiveType *>(fields[0].data)[0];
+  }
+
+  GreaterorequalPlugin(const char *name, const void *serialData, size_t serialLength)
+      : TensorRTPlugin(std::string(name), std::string(GREATEROREQUAL_PLUGIN_NAME)) {
+    DeserializeValue(&serialData, &serialLength, &primitive_type_, sizeof(schema::PrimitiveType));
+  }
+
+  GreaterorequalPlugin() = delete;
+
+  nvinfer1::IPluginV2DynamicExt *clone() const noexcept override;
+  int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
+              const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
+  size_t getSerializationSize() const noexcept override;
+  void serialize(void *buffer) const noexcept override;
+
+ private:
+  int RunCudaGreaterorequal(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs,
+                            void *const *outputs, cudaStream_t stream);
+  const std::string layer_name_;
+  std::string name_space_;
+  schema::PrimitiveType primitive_type_;
+};
+class GreaterorequalPluginCreater : public TensorRTPluginCreater<GreaterorequalPlugin> {
+ public:
+  GreaterorequalPluginCreater() : TensorRTPluginCreater(std::string(GREATEROREQUAL_PLUGIN_NAME)) {}
+};
+}  // namespace mindspore::lite
+#endif  // MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_GREATEROREQUAL_PLUGIN_H_
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/lessorequal_tensorrt.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/lessorequal_tensorrt.cc
@ -0,0 +1,110 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/extendrt/delegate/tensorrt/op/lessorequal_tensorrt.h"
+#include <cuda_runtime.h>
+#include <numeric>
+#include <memory>
+#include <vector>
+#include <functional>
+#include <unordered_map>
+#include "src/extendrt/delegate/tensorrt/tensorrt_utils.h"
+#include "NvInferRuntimeCommon.h"
+#include "src/extendrt/delegate/tensorrt/cuda_impl/logical.cuh"
+#include "ops/less_equal.h"
+
+namespace mindspore::lite {
+int LessorequalTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
+                                   const std::vector<TensorInfo> &out_tensors) {
+  if (in_tensors.size() != INPUT_SIZE2) {
+    MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
+    return RET_ERROR;
+  }
+  if (out_tensors.size() != 1) {
+    MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+int LessorequalTensorRT::AddInnerOp(TensorRTContext *ctx) {
+  if (ctx == nullptr || ctx->network() == nullptr) {
+    MS_LOG(ERROR) << "network or input tensor is invalid";
+    return RET_ERROR;
+  }
+  nvinfer1::ITensor *inputTensors[] = {input(ctx, 0).trt_tensor_, input(ctx, 1).trt_tensor_};
+  auto plugin = std::make_shared<LessorequalPlugin>(op_name_, schema::PrimitiveType_LessEqual);
+  if (plugin == nullptr) {
+    MS_LOG(ERROR) << "create LessorequalPlugin failed for " << op_name_;
+    return RET_ERROR;
+  }
+  nvinfer1::IPluginV2Layer *lessorequal_layer = ctx->network()->addPluginV2(inputTensors, 2, *plugin);
+  this->layer_ = lessorequal_layer;
+  nvinfer1::ITensor *op_out_tensor = lessorequal_layer->getOutput(0);
+  if (op_out_tensor == nullptr) {
+    MS_LOG(ERROR) << "lessorequal out tensor is nullptr.";
+    return RET_ERROR;
+  }
+  ctx->RegisterTensor(ITensorHelper{op_out_tensor, input(ctx, 0).format_, input(ctx, 0).same_format_},
+                      out_tensors_[0].Name());
+  return RET_OK;
+}
+
+REGISTER_TENSORRT_PLUGIN(LessorequalPluginCreater);
+template class TensorRTPluginCreater<LessorequalPlugin>;
+template <class T>
+nvinfer1::PluginFieldCollection TensorRTPluginCreater<T>::field_collection_{};
+template <class T>
+std::vector<nvinfer1::PluginField> TensorRTPluginCreater<T>::fields_;
+
+int LessorequalPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
+                               const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs,
+                               void *const *outputs, void *workspace, cudaStream_t stream) noexcept {
+  return RunCudaLessorequal(inputDesc, inputs, outputs, stream);
+}
+
+int LessorequalPlugin::RunCudaLessorequal(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs,
+                                          void *const *outputs, cudaStream_t stream) {
+  if (inputDesc->type == nvinfer1::DataType::kINT32) {
+    LessOrEqual(static_cast<const int *>(inputs[0]), static_cast<const int *>(inputs[1]),
+                static_cast<int *>(outputs[0]), GetDimsVolume(inputDesc[0].dims), stream);
+  } else if (inputDesc->type == nvinfer1::DataType::kFLOAT) {
+    LessOrEqual(static_cast<const float *>(inputs[0]), static_cast<const float *>(inputs[1]),
+                static_cast<float *>(outputs[0]), GetDimsVolume(inputDesc[0].dims), stream);
+  } else {
+    MS_LOG(ERROR) << "unsupported equal data type";
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+nvinfer1::IPluginV2DynamicExt *LessorequalPlugin::clone() const noexcept {
+  auto *plugin = new (std::nothrow) LessorequalPlugin(*this);
+  if (plugin == nullptr) {
+    MS_LOG(ERROR) << "malloc lessorequal plugin failed";
+    return nullptr;
+  }
+  plugin->setPluginNamespace(name_space_.c_str());
+  return plugin;
+}
+
+size_t LessorequalPlugin::getSerializationSize() const noexcept { return sizeof(schema::PrimitiveType); }
+
+void LessorequalPlugin::serialize(void *buffer) const noexcept {
+  SerializeValue(&buffer, &primitive_type_, sizeof(schema::PrimitiveType));
+}
+REGISTER_TENSORRT_CREATOR(ops::kNameLessEqual, LessorequalTensorRT)
+}  // namespace mindspore::lite
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/lessorequal_tensorrt.h
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/lessorequal_tensorrt.h
@ -0,0 +1,77 @@
+/**
+ * Copyright 2022 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_LESSOREQUAL_PLUGIN_H_
+#define MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_LESSOREQUAL_PLUGIN_H_
+
+#include <string>
+#include <vector>
+#include "src/extendrt/delegate/tensorrt/op/tensorrt_op.h"
+#include "src/extendrt/delegate/tensorrt/op/tensorrt_plugin.h"
+
+namespace mindspore::lite {
+class LessorequalTensorRT : public TensorRTOp {
+ public:
+  LessorequalTensorRT(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
+                      const std::vector<TensorInfo> &out_tensors, std::string name)
+      : TensorRTOp(base_operator, in_tensors, out_tensors, name) {}
+
+  ~LessorequalTensorRT() override = default;
+
+  int AddInnerOp(TensorRTContext *ctx) override;
+
+  int IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
+                const std::vector<TensorInfo> &out_tensors) override;
+};
+
+constexpr auto LESSOREQUAL_PLUGIN_NAME{"LessorequalPlugin"};
+class LessorequalPlugin : public TensorRTPlugin {
+ public:
+  LessorequalPlugin(const std::string name, schema::PrimitiveType primitive_type)
+      : TensorRTPlugin(name, std::string(LESSOREQUAL_PLUGIN_NAME)), primitive_type_(primitive_type) {}
+
+  LessorequalPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
+      : TensorRTPlugin(std::string(name), std::string(LESSOREQUAL_PLUGIN_NAME)) {
+    const nvinfer1::PluginField *fields = fc->fields;
+    primitive_type_ = static_cast<const schema::PrimitiveType *>(fields[0].data)[0];
+  }
+
+  LessorequalPlugin(const char *name, const void *serialData, size_t serialLength)
+      : TensorRTPlugin(std::string(name), std::string(LESSOREQUAL_PLUGIN_NAME)) {
+    DeserializeValue(&serialData, &serialLength, &primitive_type_, sizeof(schema::PrimitiveType));
+  }
+
+  LessorequalPlugin() = delete;
+
+  nvinfer1::IPluginV2DynamicExt *clone() const noexcept override;
+  int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
+              const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
+  size_t getSerializationSize() const noexcept override;
+  void serialize(void *buffer) const noexcept override;
+
+ private:
+  int RunCudaLessorequal(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs, void *const *outputs,
+                         cudaStream_t stream);
+  const std::string layer_name_;
+  std::string name_space_;
+  schema::PrimitiveType primitive_type_;
+};
+class LessorequalPluginCreater : public TensorRTPluginCreater<LessorequalPlugin> {
+ public:
+  LessorequalPluginCreater() : TensorRTPluginCreater(std::string(LESSOREQUAL_PLUGIN_NAME)) {}
+};
+}  // namespace mindspore::lite
+#endif  // MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_LESSOREQUAL_PLUGIN_H_
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/lstm_tensorrt.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/lstm_tensorrt.cc
@ -281,6 +281,7 @@ nvinfer1::ITensor *LSTMTensorRT::ConcateAll(TensorRTContext *ctx, std::vector<nv
    return nullptr;
  }
  concat->setAxis(axis);
+  this->layer_ = concat;
  return concat->getOutput(0);
 }

--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/lstm_tensorrt.h
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/lstm_tensorrt.h
@ -103,7 +103,6 @@ class LSTMTensorRT : public TensorRTOp {
                          int *state_weight_offset, int *bias_offset, LstmWeights *lstm_weights,
                          const LstmState &next_state);

-  // nvinfer1::INetworkDefinition *network_{nullptr};
  nvinfer1::ITensor *input_data_{nullptr};
  nvinfer1::ITensor *sequence_size_input_{nullptr};
  nvinfer1::ITensor *op_data_out_{nullptr};
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/matmul_tensorrt.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/matmul_tensorrt.cc
@ -235,8 +235,8 @@ nvinfer1::ITensor *MatMulTensorRT::AddBias(TensorRTContext *ctx, nvinfer1::ITens
  if (in_tensors_.size() == kBiasIndex + 1) {
    nvinfer1::ITensor *bias = nullptr;
    if (in_tensors_[kBiasIndex].Shape().size() < static_cast<size_t>(out_tensor->getDimensions().nbDims)) {
-      std::vector<int64_t> expect_dims(out_tensors_[0].Shape());
-      expect_dims[0] = out_tensor->getDimensions().d[0];
+      std::vector<int64_t> expect_dims(input_tensor->getDimensions().nbDims, 1);
+      expect_dims[expect_dims.size() - 1] = in_tensors_[kBiasIndex].Shape().back();
      bias = ConvertTensorWithExpandDims(ctx, in_tensors_[kBiasIndex], expect_dims, op_name_);
    } else if (in_tensors_[kBiasIndex].Shape().size() == static_cast<size_t>(out_tensor->getDimensions().nbDims)) {
      bias = ConvertConstantTensor(ctx, in_tensors_[kBiasIndex], op_name_);
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/pad_tensorrt.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/pad_tensorrt.cc
@ -80,7 +80,6 @@ int PadTensorRT::AddInnerOp(TensorRTContext *ctx) {
    int h_post;
    int w_pre;
    int w_post;
-    // MS_LOG(INFO) << in_tensors_[0].format();
    if (SameDims(pad_input->getDimensions(), in_tensors_[0].Shape())) {
      // NCHW: 0: N_pre, 1: N_post, 2: C_pre, 3: C_post, 4: H_pre, 5: H_post, 6: W_pre, 7: W_post
      if (*padding_data != 0 || *(padding_data + 1) != 0 || *(padding_data + 2) != 0 || *(padding_data + 3) != 0) {
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/reduce_tensorrt.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/reduce_tensorrt.cc
@ -61,7 +61,7 @@ int ReduceTensorRT::AddInnerOp(TensorRTContext *ctx) {
    CHECK_NULL_RETURN(reduce_input);
  }

-  uint32_t reduceAxis = GetAxis();
+  uint32_t reduceAxis = GetAxis(ctx);
  auto reduce_operation_opt = TryConvertTRTReduceMode(reduce_op->get_mode());
  if (!reduce_operation_opt) {
    MS_LOG(WARNING) << "invalid reduce for TensorRT, need check: " << static_cast<int>(reduce_op->get_mode());
@ -88,7 +88,7 @@ int ReduceTensorRT::AddInnerOp(TensorRTContext *ctx) {
  return RET_OK;
 }

-uint32_t ReduceTensorRT::GetAxis() {
+uint32_t ReduceTensorRT::GetAxis(TensorRTContext *ctx) {
  // axis
  uint32_t reduceAxis = 0;
  auto axis_tensor = this->in_tensors_[1];
@ -102,7 +102,8 @@ uint32_t ReduceTensorRT::GetAxis() {
  auto axis_data = reinterpret_cast<const int *>(axis_tensor.Data());
  CHECK_NULL_RETURN(axis_data);
  for (int i = 0; i < axis_tensor.ElementNum(); i++) {
-    int format_axis_data = (*axis_data == -1) ? in_tensors_[0].Shape().size() - 1 : *axis_data;
+    auto input_0 = input(ctx, 0).trt_tensor_;
+    int format_axis_data = (*axis_data == -1) ? input_0->getDimensions().nbDims - 1 : *axis_data;
    MS_LOG(DEBUG) << op_name_ << " reduceAxis at index : " << *axis_data;
    reduceAxis |= 1u << format_axis_data;
    axis_data++;
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/reduce_tensorrt.h
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/reduce_tensorrt.h
@ -38,7 +38,7 @@ class ReduceTensorRT : public TensorRTOp {
                const std::vector<TensorInfo> &out_tensors) override;

 private:
-  uint32_t GetAxis();
+  uint32_t GetAxis(TensorRTContext *ctx);
  Format out_format_;
 };
 }  // namespace mindspore::lite
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/resize_tensorrt.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/resize_tensorrt.cc
@ -40,7 +40,16 @@ int ResizeTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::v
    MS_LOG(ERROR) << "convert failed " << op_name_;
    return RET_ERROR;
  }
-  dynamic_shape_params_.support_hw_dynamic_ = false;
+  if (resize_op_->get_coordinate_transform_mode() == CoordinateTransformMode::ALIGN_CORNERS &&
+      resize_op_->get_method() == ResizeMethod::LINEAR) {
+    MS_LOG(ERROR) << "Resize op do not support coordinate_transform_mode == ALIGN_CORNERS when method == LINEAR "
+                  << op_name_;
+    return RET_ERROR;
+  }
+  dynamic_shape_params_.support_hw_dynamic_ =
+    (resize_op_->get_new_height() > 0 && resize_op_->get_new_width() > 0) ? false : true;
+  dynamic_shape_params_.support_hw_dynamic_ &= resize_op_->get_method() != ResizeMethod::LINEAR;
+
  return RET_OK;
 }

@ -53,16 +62,7 @@ int ResizeTensorRT::AddInnerOp(TensorRTContext *ctx) {
  nvinfer1::ITensor *resize_in_tensor = input(ctx, 0).trt_tensor_;
  MS_LOG(DEBUG) << "origin input " << GetTensorFormat(input(ctx, 0));

-  MS_LOG(DEBUG) << "after transpose input " << GetTensorFormat(resize_in_tensor, Format::NCHW, true);
-
-  auto method = resize_op_->get_method();
-  nvinfer1::ITensor *output_tensor = nullptr;
-  if (method == ResizeMethod::LINEAR) {
-    MS_LOG(INFO) << "using plugin for resize";
-    output_tensor = RunPlugin(ctx, resize_in_tensor);
-  } else {
-    output_tensor = RunTensorRT(ctx, resize_in_tensor);
-  }
+  nvinfer1::ITensor *output_tensor = RunTensorRT(ctx, resize_in_tensor);
  if (output_tensor == nullptr) {
    return RET_ERROR;
  }
@ -73,26 +73,35 @@ int ResizeTensorRT::AddInnerOp(TensorRTContext *ctx) {
 }

 nvinfer1::ITensor *ResizeTensorRT::RunPlugin(TensorRTContext *ctx, nvinfer1::ITensor *resize_in_tensor) {
-  constexpr int non_const_resize_input_count = 2;
-  if (ReadyInputsNumber(ctx) == non_const_resize_input_count) {
+  std::vector<int> resize_shape;
+  if (ReadyInputsNumber(ctx) == INPUT_SIZE2) {
+    MS_LOG(ERROR) << "dynamic input is not support!";
    return nullptr;
  } else {
-    const int *resize_ptr = reinterpret_cast<const int *>(in_tensors_[1].Data());
-    std::vector<int> resize_shape;
-    for (int i = 0; i != in_tensors_[1].ElementNum(); ++i) {
-      resize_shape.push_back(*(resize_ptr + i));
+    if (in_tensors_.size() == 1) {
+      resize_shape.push_back(static_cast<float>(resize_op_->get_new_height()));
+      resize_shape.push_back(static_cast<float>(resize_op_->get_new_width()));
+    } else {
+      const int *resize_ptr = reinterpret_cast<const int *>(in_tensors_[1].Data());
+      for (int i = 0; i != in_tensors_[1].ElementNum(); ++i) {
+        resize_shape.push_back(*(resize_ptr + i));
+      }
+      if (resize_shape.size() != INPUT_SIZE2) {
+        MS_LOG(ERROR) << "Do not support resize number more than 2";
+        return nullptr;
+      }
    }
-    constexpr int resize_hw_dims_count = 2;
-    if (resize_shape.size() != resize_hw_dims_count) {
-      MS_LOG(ERROR) << "Do not support resize number more than 2";
+    bool using_half_pixel = (resize_op_->get_coordinate_transform_mode() == CoordinateTransformMode::HALF_PIXEL);
+    auto plugin = std::make_shared<ResizeLinear2DPlugin>(resize_in_tensor->getName(), resize_shape[0], resize_shape[1],
+                                                         using_half_pixel, device_id_);
+    if (plugin == nullptr) {
+      MS_LOG(ERROR) << "add ResizeLinear2D plugin failed for " << op_name_;
      return nullptr;
    }
-    auto plugin =
-      std::make_shared<ResizeLinear2DPlugin>(resize_in_tensor->getName(), resize_shape[0], resize_shape[1], device_id_);
    nvinfer1::ITensor *inputTensors[] = {resize_in_tensor};
    nvinfer1::IPluginV2Layer *resize_layer = ctx->network()->addPluginV2(inputTensors, 1, *plugin);
    if (resize_layer == nullptr) {
-      MS_LOG(ERROR) << "add spacetobatch op failed for TensorRT.";
+      MS_LOG(ERROR) << "add resize op failed for TensorRT.";
      return nullptr;
    }
    resize_layer->setName(op_name_.c_str());
@ -124,18 +133,38 @@ nvinfer1::ITensor *ResizeTensorRT::RunTensorRT(TensorRTContext *ctx, nvinfer1::I

 int ResizeTensorRT::SetOutputDims(TensorRTContext *ctx, nvinfer1::ITensor *resize_in_tensor,
                                  nvinfer1::IResizeLayer *resize_layer) {
-  auto in_dims = resize_in_tensor->getDimensions();
+  nvinfer1::Dims in_dims = resize_in_tensor->getDimensions();
  if (in_tensors_.size() == 1 && in_dims.nbDims == DIMENSION_4D) {
-    // hw is static, but has dynamic batch size
-    float scales[DIMENSION_4D]{1, 1, 1, 1};
-    scales[kNCHW_H] = static_cast<float>(resize_op_->get_new_height()) / static_cast<float>(in_dims.d[kNCHW_H]);
-    scales[kNCHW_W] = static_cast<float>(resize_op_->get_new_width()) / static_cast<float>(in_dims.d[kNCHW_W]);
-    resize_layer->setScales(scales, DIMENSION_4D);
+    nvinfer1::Dims4 new_dims(in_dims.d[0], in_dims.d[1], resize_op_->get_new_height(),
+                             resize_op_->get_new_width());  // nchw
+    resize_layer->setOutputDimensions(new_dims);            // static shape
  } else {
    auto shape_value_tensor = in_tensors_[1];
    if (!shape_value_tensor.IsConst() && in_tensors_.size() >= INPUT_SIZE2) {
      // dynamic output shape
-      resize_layer->setInput(1, *input(ctx, 1).trt_tensor_);
+      auto shape_tensor = input(ctx, 1).trt_tensor_;
+      if (shape_tensor->getDimensions().d[0] == INPUT_SIZE4) {
+        resize_layer->setInput(1, *shape_tensor);
+      } else {
+        auto in_tensor_shape = ctx->network()->addShape(*resize_in_tensor)->getOutput(0);
+        CHECK_NULL_RETURN(in_tensor_shape);
+        nvinfer1::Dims start_dims{1, {0}};
+        nvinfer1::Dims size_dims{1, {2}};
+        nvinfer1::Dims stride_dims{1, {1}};
+        auto nc = ctx->network()->addSlice(*in_tensor_shape, start_dims, size_dims, stride_dims)->getOutput(0);
+        CHECK_NULL_RETURN(nc);
+
+        nvinfer1::ITensor *trt_input_tensors[INPUT_SIZE2];
+        trt_input_tensors[0] = nc;
+        trt_input_tensors[1] = shape_tensor;
+
+        auto concat_layer = ctx->network()->addConcatenation(trt_input_tensors, INPUT_SIZE2);
+        concat_layer->setAxis(0);
+        auto nchw = concat_layer->getOutput(0);
+        CHECK_NULL_RETURN(nchw);
+        nchw = TRTTensorCast(ctx, nchw, nvinfer1::DataType::kINT32, op_name_ + "_input_nchw_to_int32");
+        resize_layer->setInput(1, *nchw);
+      }
    } else {
      std::vector<float> out_shape;
      ParseValueFromShapeTensor(ctx, shape_value_tensor, &out_shape);
@ -170,7 +199,6 @@ int ResizeTensorRT::SetOutputDims(TensorRTContext *ctx, nvinfer1::ITensor *resiz
        float scales[DIMENSION_4D]{1, 1, 1, 1};
        scales[kNCHW_H] = out_shape[kNCHW_H];
        scales[kNCHW_W] = out_shape[kNCHW_W];
-        MS_LOG(WARNING) << scales[0] << " " << scales[1] << " " << scales[2] << " " << scales[3];
        resize_layer->setScales(scales, DIMENSION_4D);
      }
    }
@ -237,12 +265,25 @@ int ResizeTensorRT::SetParams(nvinfer1::IResizeLayer *resize_layer) {
  }
  resize_layer->setResizeMode(method_map.at(method));

-  // unsupported for trt6, but support setCoordinateTransformation() in version8
  auto coordinate_transform_mode = resize_op_->get_coordinate_transform_mode();
+// unsupported for trt6, but support setCoordinateTransformation() in version8
+#if TRT_VERSION_GE(8, 0)
+  std::map<CoordinateTransformMode, nvinfer1::ResizeCoordinateTransformation> transform_map = {
+    {CoordinateTransformMode::ASYMMETRIC, nvinfer1::ResizeCoordinateTransformation::kASYMMETRIC},
+    {CoordinateTransformMode::ALIGN_CORNERS, nvinfer1::ResizeCoordinateTransformation::kALIGN_CORNERS},
+    {CoordinateTransformMode::HALF_PIXEL, nvinfer1::ResizeCoordinateTransformation::kHALF_PIXEL}};
+  auto transform_it = transform_map.find(coordinate_transform_mode);
+  if (transform_it == transform_map.end()) {
+    MS_LOG(ERROR) << op_name_ << " not support resize coordinate transform mode " << coordinate_transform_mode;
+    return RET_ERROR;
+  }
+  resize_layer->setCoordinateTransformation(transform_it->second);
+#else
  if (coordinate_transform_mode != CoordinateTransformMode::ASYMMETRIC) {
    MS_LOG(WARNING) << op_name_ << " has coordinate_transform_mode may not supported: "
                    << static_cast<int>(coordinate_transform_mode);
  }
+#endif
  return RET_OK;
 }

@ -265,8 +306,8 @@ int ResizeLinear2DPlugin::RunCudaResizeLinear2D(const nvinfer1::PluginTensorDesc
  float h_scale = dims.d[kNCHW_H] / static_cast<float>(resize_h_);
  float w_scale = dims.d[kNCHW_W] / static_cast<float>(resize_w_);
  CalResizeBilinear(static_cast<const float *>(inputs[0]), dims.d[0], dims.d[1], dims.d[kNCHW_H], dims.d[kNCHW_W],
-                    resize_h_, resize_w_, h_scale, w_scale, false, static_cast<float *>(outputs[0]), device_id_,
-                    stream);
+                    resize_h_, resize_w_, h_scale, w_scale, using_half_pixel_, static_cast<float *>(outputs[0]),
+                    device_id_, stream);
  return RET_OK;
 }

@ -280,7 +321,7 @@ nvinfer1::IPluginV2DynamicExt *ResizeLinear2DPlugin::clone() const noexcept {
  return plugin;
 }

-size_t ResizeLinear2DPlugin::getSerializationSize() const noexcept { return sizeof(int) * 2; }
+size_t ResizeLinear2DPlugin::getSerializationSize() const noexcept { return sizeof(int) * INPUT_SIZE2 + sizeof(bool); }

 nvinfer1::DimsExprs ResizeLinear2DPlugin::getOutputDimensions(int32_t index, const nvinfer1::DimsExprs *inputs,
                                                              int nbInputDims,
@ -299,6 +340,7 @@ nvinfer1::DimsExprs ResizeLinear2DPlugin::getOutputDimensions(int32_t index, con
 void ResizeLinear2DPlugin::serialize(void *buffer) const noexcept {
  SerializeValue(&buffer, &resize_h_, sizeof(int));
  SerializeValue(&buffer, &resize_w_, sizeof(int));
+  SerializeValue(&buffer, &using_half_pixel_, sizeof(bool));
 }
 REGISTER_TENSORRT_CREATOR(ops::kNameResize, ResizeTensorRT)
 }  // namespace mindspore::lite
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/resize_tensorrt.h
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/resize_tensorrt.h
@ -41,10 +41,11 @@ class ResizeTensorRT : public TensorRTOp {
                const std::vector<TensorInfo> &out_tensors) override;

 private:
-  int SetOutputDims(TensorRTContext *ctx, nvinfer1::ITensor *resize_in_tensor, nvinfer1::IResizeLayer *resize_layer);
  nvinfer1::ITensor *RunPlugin(TensorRTContext *ctx, nvinfer1::ITensor *resize_in_tensor);
  nvinfer1::ITensor *RunTensorRT(TensorRTContext *ctx, nvinfer1::ITensor *resize_in_tensor);

+  int SetOutputDims(TensorRTContext *ctx, nvinfer1::ITensor *resize_in_tensor, nvinfer1::IResizeLayer *resize_layer);
+
  void ParseValueFromShapeTensor(TensorRTContext *ctx, const TensorInfo &shape_value_tensor,
                                 std::vector<float> *out_shape);

@ -61,22 +62,25 @@ class ResizeTensorRT : public TensorRTOp {
 constexpr auto RESIZELINEAR2D_PLUGIN_NAME{"ResizeLinear2DPlugin"};
 class ResizeLinear2DPlugin : public TensorRTPlugin {
 public:
-  ResizeLinear2DPlugin(const std::string name, int resize_h, int resize_w, uint32_t device_id)
+  ResizeLinear2DPlugin(const std::string name, int resize_h, int resize_w, bool using_half_pixel, uint32_t device_id)
      : TensorRTPlugin(name, std::string(RESIZELINEAR2D_PLUGIN_NAME), device_id),
        resize_h_(resize_h),
-        resize_w_(resize_w) {}
+        resize_w_(resize_w),
+        using_half_pixel_(using_half_pixel) {}

  ResizeLinear2DPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
      : TensorRTPlugin(std::string(name), std::string(RESIZELINEAR2D_PLUGIN_NAME)) {
    const nvinfer1::PluginField *fields = fc->fields;
    resize_h_ = static_cast<const int *>(fields[0].data)[0];
    resize_w_ = static_cast<const int *>(fields[1].data)[0];
+    using_half_pixel_ = static_cast<const bool *>(fields[2].data)[0];
  }

  ResizeLinear2DPlugin(const char *name, const void *serialData, size_t serialLength)
      : TensorRTPlugin(std::string(name), std::string(RESIZELINEAR2D_PLUGIN_NAME)) {
    DeserializeValue(&serialData, &serialLength, &resize_h_, sizeof(int));
    DeserializeValue(&serialData, &serialLength, &resize_w_, sizeof(int));
+    DeserializeValue(&serialData, &serialLength, &using_half_pixel_, sizeof(bool));
  }

  ResizeLinear2DPlugin() = delete;
@ -94,6 +98,7 @@ class ResizeLinear2DPlugin : public TensorRTPlugin {
                            void *const *outputs, cudaStream_t stream);
  int resize_h_;
  int resize_w_;
+  bool using_half_pixel_;
  const std::string layer_name_;
  std::string name_space_;
 };
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/scale_tensorrt.h
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/scale_tensorrt.h
@ -48,7 +48,7 @@ class ScaleTensorRT : public TensorRTOp {

  Format out_format_;

-  bool out_same_format_{false};
+  bool out_same_format_{true};

  nvinfer1::ScaleMode mode_;

--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/shape_tensorrt.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/shape_tensorrt.cc
@ -16,6 +16,7 @@

 #include "src/extendrt/delegate/tensorrt/op/shape_tensorrt.h"
 #include "ops/shape.h"
+#include "ops/dynamic_shape.h"

 namespace mindspore::lite {
 int ShapeTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
@ -32,8 +33,6 @@ int ShapeTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::ve
    MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
    return RET_ERROR;
  }
-  dynamic_shape_params_.support_dynamic_ = false;
-  dynamic_shape_params_.support_hw_dynamic_ = false;
  return RET_OK;
 }
 int ShapeTensorRT::AddInnerOp(TensorRTContext *ctx) {
@ -55,4 +54,5 @@ int ShapeTensorRT::AddInnerOp(TensorRTContext *ctx) {
  return RET_OK;
 }
 REGISTER_TENSORRT_CREATOR(ops::kNameShape, ShapeTensorRT)
+REGISTER_TENSORRT_CREATOR(ops::kNameDynamicShape, ShapeTensorRT)
 }  // namespace mindspore::lite
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/shuffle_tensorrt.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/shuffle_tensorrt.cc
@ -72,6 +72,11 @@ int ShuffleTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::
      MS_LOG(ERROR) << "Unsupported shape tensor of " << type_;
      return RET_ERROR;
    }
+  } else if (type_ == ops::kNameBroadcastTo) {
+    if (in_tensors.size() != INPUT_SIZE2) {
+      MS_LOG(ERROR) << "PrimitiveType_Transpose Unsupported in_tensors size: " << in_tensors.size();
+      return RET_ERROR;
+    }
  } else {
    MS_LOG(ERROR) << "Unsupported op type:" << type_;
    return RET_ERROR;
@ -123,12 +128,19 @@ int ShuffleTensorRT::AddInnerOp(TensorRTContext *ctx) {
    MS_LOG(ERROR) << "Unsupported op type for " << op_name_;
    return RET_ERROR;
  }
-  if (ret == RET_OK) {
-    auto output_helper = ITensorHelper{shuffler_output_, out_format_, true};
-    ctx->RegisterTensor(output_helper, out_tensors_[0].Name());
-    MS_LOG(DEBUG) << "output " << GetTensorFormat(output_helper);
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "AddOp failed for " << op_name_;
+    return ret;
  }
-  return ret;
+
+  if (shuffler_output_ == nullptr) {
+    MS_LOG(ERROR) << "output tensor create failed for " << op_name_;
+    return RET_ERROR;
+  }
+  auto output_helper = ITensorHelper{shuffler_output_, out_format_, true};
+  ctx->RegisterTensor(output_helper, out_tensors_[0].Name());
+  MS_LOG(DEBUG) << "output " << GetTensorFormat(output_helper);
+  return RET_OK;
 }

 int ShuffleTensorRT::InputTensorPreprocess(TensorRTContext *ctx) {
@ -189,7 +201,7 @@ int ShuffleTensorRT::AddUnsqueezeOp(nvinfer1::IShuffleLayer *shuffle_layer) {
  nvinfer1::ITensor *expand_input = shuffler_input_;
  if (input(ctx_, 0).is_tensor == true) {
    for (size_t i = 0; i < param_axis_.size(); i++) {
-      expand_input = ExpandDim(shuffle_layer, expand_input, param_axis_[i]);
+      expand_input = ExpandDim(ctx_, expand_input, param_axis_[i]);
    }
  }
  shuffler_output_ = expand_input;
@ -242,8 +254,12 @@ int ShuffleTensorRT::AddReshapeOp(nvinfer1::IShuffleLayer *shuffle_layer) {
  auto &shape_tensor = in_tensors_[1];
  if (shape_tensor.IsConst()) {
    // static shuffle layer
-    shuffle_layer->setReshapeDimensions(
-      InferReshapeDims(shuffler_input_->getDimensions(), in_tensors_[0].Shape(), out_tensors_[0].Shape()));
+    nvinfer1::Dims reshape_dims{static_cast<int>(shape_tensor.ElementNum())};
+    const int *shape_ptr = reinterpret_cast<const int *>(shape_tensor.Data());
+    for (int i = 0; i != shape_tensor.ElementNum(); ++i) {
+      reshape_dims.d[i] = *(shape_ptr + i);
+    }
+    shuffle_layer->setReshapeDimensions(reshape_dims);
  } else {
    if (in_tensors_.size() != INPUT_SIZE2) {
      MS_LOG(ERROR) << "invalid shape tensor for reshape " << op_name_;
@ -270,6 +286,10 @@ int ShuffleTensorRT::AddFlattenOp(nvinfer1::IShuffleLayer *shuffle_layer) {
 }

 int ShuffleTensorRT::AddExpandDimsOp(nvinfer1::IShuffleLayer *shuffle_layer) {
+  if (!input(ctx_, 0).is_tensor) {
+    shuffler_output_ = shuffler_input_;
+    return RET_OK;
+  }
  int axis;
  if (in_tensors_[1].DataType() == DataType::kNumberTypeInt64) {
    auto axis_data = static_cast<const int64_t *>(in_tensors_[1].Data());
@ -281,114 +301,44 @@ int ShuffleTensorRT::AddExpandDimsOp(nvinfer1::IShuffleLayer *shuffle_layer) {
    MS_LOG(WARNING) << op_name_ << " axis tensor data type is " << static_cast<int>(in_tensors_[1].DataType());
    return RET_ERROR;
  }
-  shuffler_output_ = ExpandDim(shuffle_layer, shuffler_input_, axis);
+  shuffler_output_ = ExpandDim(ctx_, shuffler_input_, axis);
  return shuffler_output_ == nullptr ? RET_ERROR : RET_OK;
 }

 int ShuffleTensorRT::AddBroadcastToOp(nvinfer1::IShuffleLayer *shuffle_layer) {
-  if (out_tensors_[0].ElementNum() != in_tensors_[0].ElementNum() &&
-      out_tensors_[0].Shape().size() == in_tensors_[0].Shape().size()) {
-    MS_LOG(WARNING) << "broadcast element cnt changes, ignore broadcast for " << op_name_;
-    shuffle_layer->setReshapeDimensions(shuffler_input_->getDimensions());
-    MS_LOG(WARNING) << "here " << op_name_;
-  } else if (out_tensors_[0].ElementNum() == in_tensors_[0].ElementNum()) {
-    nvinfer1::Dims new_dims = ConvertCudaDims(out_tensors_[0].Shape());
-    if (new_dims.nbDims == -1) {
-      MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
-      return RET_ERROR;
-    }
-    new_dims.d[0] = shuffler_input_->getDimensions().d[0];
-    shuffle_layer->setReshapeDimensions(new_dims);
-    MS_LOG(WARNING) << "here " << op_name_;
+  if (!in_tensors_[1].IsConst()) {
+    auto input_shape_tensor = input(ctx_, 1).trt_tensor_;
+    shuffler_output_ = Broadcast(ctx_, shuffler_input_, input_shape_tensor);
  } else {
-    MS_LOG(ERROR) << "broadcast needs check for " << op_name_;
+    std::vector<int> input_shape;
+    const int *shape_ptr = reinterpret_cast<const int *>(in_tensors_[1].Data());
+    for (int i = 0; i != in_tensors_[1].ElementNum(); ++i) {
+      input_shape.push_back(*(shape_ptr + i));
+    }
+
+    nvinfer1::Dims in_tensor_dims = shuffler_input_->getDimensions();
+    auto input_shape_tensor = ctx_->ConvertTo1DTensor(input_shape);
+
+    while (in_tensor_dims.nbDims < static_cast<int64_t>(input_shape.size())) {
+      shuffler_input_ = ExpandDim(ctx_, shuffler_input_, 0);
+      if (shuffler_input_->getDimensions().nbDims == -1) {
+        MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
+        return RET_ERROR;
+      }
+      shuffle_layer->setReshapeDimensions(shuffler_input_->getDimensions());
+      shuffler_input_ = shuffle_layer->getOutput(0);
+      in_tensor_dims = shuffler_input_->getDimensions();
+    }
+
+    auto size_tensor = ctx_->network()->addShape(*shuffler_input_)->getOutput(0);
+    size_tensor = ctx_->network()
+                    ->addElementWise(*input_shape_tensor, *size_tensor, nvinfer1::ElementWiseOperation::kMAX)
+                    ->getOutput(0);
+    shuffler_output_ = Broadcast(ctx_, shuffler_input_, size_tensor);
  }
-  shuffler_output_ = shuffle_layer->getOutput(0);
  return shuffler_output_ == nullptr ? RET_ERROR : RET_OK;
 }

-nvinfer1::ITensor *ShuffleTensorRT::ExpandDim(nvinfer1::IShuffleLayer *shuffle_layer, nvinfer1::ITensor *input_tensor,
-                                              int axis) {
-  auto input_dims = input_tensor->getDimensions();
-  // if expand dim not at last dim and shape is dynamic, change to expanddim at last dim and transpose
-  bool special_expand = false;
-  for (int i = 0; i < input_dims.nbDims; i++) {
-    special_expand = special_expand || input_dims.d[i] == -1;
-  }
-  special_expand = special_expand && (axis != -1 && axis != input_dims.nbDims - 1);
-
-  if (special_expand) {
-    std::vector<int64_t> new_shape;
-    for (int i = 0; i < input_dims.nbDims; i++) {
-      new_shape.push_back(input_dims.d[i] == -1 ? 0 : input_dims.d[i]);
-    }
-    new_shape.push_back(1);
-    nvinfer1::Dims new_dims = ConvertCudaDims(new_shape);
-    if (new_dims.nbDims == -1) {
-      MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
-      return nullptr;
-    }
-    shuffle_layer->setReshapeDimensions(new_dims);
-    // transpose
-    nvinfer1::Permutation perm{};
-    for (int i = 0; i < new_dims.nbDims; i++) {
-      if (i < axis) {
-        perm.order[i] = i;
-      } else if (i == axis) {
-        perm.order[i] = new_dims.nbDims - 1;
-      } else {
-        perm.order[i] = i - 1;
-      }
-    }
-    nvinfer1::IShuffleLayer *trans_layer = ctx_->network()->addShuffle(*shuffle_layer->getOutput(0));
-    if (trans_layer == nullptr) {
-      MS_LOG(ERROR) << "add transpose layer failed for special expand dims op " << op_name_;
-      return nullptr;
-    }
-    trans_layer->setFirstTranspose(perm);
-    return trans_layer->getOutput(0);
-  } else {
-    std::vector<int64_t> new_shape;
-    for (int i = 0; i < input_dims.nbDims; i++) {
-      if (axis == i) {
-        new_shape.push_back(1);
-      }
-      new_shape.push_back(input_dims.d[i] == -1 ? 0 : input_dims.d[i]);
-    }
-    if (axis == -1 || axis == input_dims.nbDims) {
-      new_shape.push_back(1);
-    }
-    nvinfer1::Dims new_dims = ConvertCudaDims(new_shape);
-    if (new_dims.nbDims == -1) {
-      MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
-      return nullptr;
-    }
-    shuffle_layer->setReshapeDimensions(new_dims);
-    return shuffle_layer->getOutput(0);
-  }
-}
-
-nvinfer1::Dims ShuffleTensorRT::InferReshapeDims(const nvinfer1::Dims &input_dims,
-                                                 const std::vector<int64_t> &ms_input_shape,
-                                                 const std::vector<int64_t> &ms_output_shape) {
-  // tensorrt support infer shape of 0 and -1
-  nvinfer1::Dims reshape_dims = ConvertCudaDims(ms_output_shape);
-  if (reshape_dims.nbDims == -1) {
-    MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
-    return reshape_dims;
-  }
-  for (int i = 0; i < reshape_dims.nbDims; i++) {
-    if (input_dims.d[i] == -1) {
-      if (ms_input_shape[i] == ms_output_shape[i]) {
-        reshape_dims.d[i] = 0;
-      } else {
-        reshape_dims.d[i] = -1;
-      }
-    }
-    MS_LOG(DEBUG) << "reshape infer_index " << i << " value: " << reshape_dims.d[i];
-  }
-  return reshape_dims;
-}
 REGISTER_TENSORRT_CREATOR(ops::kNameUnsqueeze, ShuffleTensorRT)
 REGISTER_TENSORRT_CREATOR(ops::kNameSqueeze, ShuffleTensorRT)
 REGISTER_TENSORRT_CREATOR(ops::kNameReshape, ShuffleTensorRT)
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/shuffle_tensorrt.h
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/shuffle_tensorrt.h
@ -42,9 +42,6 @@ class ShuffleTensorRT : public TensorRTOp {
  int AddFlattenOp(nvinfer1::IShuffleLayer *shuffle_layer);
  int AddExpandDimsOp(nvinfer1::IShuffleLayer *shuffle_layer);
  int AddBroadcastToOp(nvinfer1::IShuffleLayer *shuffle_layer);
-  nvinfer1::ITensor *ExpandDim(nvinfer1::IShuffleLayer *shuffle_layer, nvinfer1::ITensor *input_tensor, int axis);
-  nvinfer1::Dims InferReshapeDims(const nvinfer1::Dims &input_dims, const std::vector<int64_t> &ms_input_shape,
-                                  const std::vector<int64_t> &ms_output_shape);

  Format out_format_ = Format::NCHW;
  nvinfer1::ITensor *shuffler_input_{nullptr};
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/slice_tensorrt.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/slice_tensorrt.cc
@ -71,24 +71,32 @@ class StrideSliceTensorRTUtil final : public SliceTensorRTUtil {
      }
      int axis_value = *(static_cast<const int *>(in_tensors.at(axis_index).Data()));
      int start_value = *(static_cast<const int *>(begin.Data()));
-      int size_value = *(static_cast<const int *>(end.Data()));
+      int end_value = *(static_cast<const int *>(end.Data()));
+      int stride_value = *(static_cast<const int *>(stride.Data()));
      auto input_dims = helper.trt_tensor_->getDimensions();
      start_dims.nbDims = input_dims.nbDims;
      size_dims.nbDims = input_dims.nbDims;
+      stride_dims = nvinfer1::Dims{size_dims.nbDims, {}};
+      std::fill(start_dims.d, start_dims.d + start_dims.nbDims, 0);
+      std::fill(stride_dims.d, stride_dims.d + stride_dims.nbDims, 1);
+      if (start_value < 0) {
+        start_value = input_dims.d[axis_value] + start_value;
+      }
      for (int i = 0; i < start_dims.nbDims; i++) {
        if (i == axis_value) {
          start_dims.d[i] = start_value;
-          size_dims.d[i] = size_value >= 0 ? std::min(size_value, input_dims.d[i]) - start_dims.d[i]
-                                           : size_value + input_dims.d[i] - start_dims.d[i];
+          stride_dims.d[i] = stride_value;
+          if (end_value >= 0) {
+            size_dims.d[i] = std::min(end_value, input_dims.d[i]) - start_dims.d[i];
+          } else if (end_value >= -input_dims.d[i]) {
+            size_dims.d[i] = end_value + input_dims.d[i] - start_dims.d[i];
+          } else {
+            size_dims.d[i] = input_dims.d[i];
+          }
        } else {
-          start_dims.d[i] = 0;
          size_dims.d[i] = helper.trt_tensor_->getDimensions().d[i];
        }
      }
-
-      int stride_value = *(static_cast<const int *>(stride.Data()));
-      stride_dims = nvinfer1::Dims{size_dims.nbDims, {}};
-      std::fill(stride_dims.d, stride_dims.d + stride_dims.nbDims, stride_value);
    }
    return std::make_tuple(start_dims, size_dims, stride_dims);
  }
@ -103,7 +111,7 @@ class StrideSliceTensorRTUtil final : public SliceTensorRTUtil {
          shape.erase(shape.begin() + i);
        }
      }
-      return Reshape(ctx, input, shape);
+      return shape.empty() ? nullptr : Reshape(ctx, input, shape);
    }
    return input;
  }
@ -136,8 +144,8 @@ class SliceFusionTensorRTUtil final : public SliceTensorRTUtil {
    const auto &begin = in_tensors.at(1);
    const auto &size = in_tensors.at(SIZE_INDEX);

-    auto start_dims = lite::ConvertCudaDims(begin.Data(), begin.ElementNum());
-    auto size_dims = lite::ConvertCudaDims(size.Data(), size.ElementNum());
+    auto start_dims = lite::ConvertCudaDims(begin);
+    auto size_dims = lite::ConvertCudaDims(size);
    for (int i = 0; i != size_dims.nbDims; ++i) {
      if (size_dims.d[i] == -1) {
        size_dims.d[i] = helper.trt_tensor_->getDimensions().d[i];
@ -278,17 +286,16 @@ int SliceTensorRT::AddInnerOp(TensorRTContext *ctx) {
  this->layer_ = slice_layer;
  slice_layer->setName(op_name_.c_str());
  nvinfer1::ITensor *out_tensor = slice_layer->getOutput(0);
-  out_tensor = util_->PostProcess(ctx, out_tensor, in_tensors_, out_tensors_);
-  if (out_tensor == nullptr) {
-    MS_LOG(ERROR) << "output tensor create failed";
-    return RET_ERROR;
+  auto post_tensor = util_->PostProcess(ctx, out_tensor, in_tensors_, out_tensors_);
+  bool rank_0 = false;
+  if (post_tensor == nullptr) {
+    rank_0 = true;
+    post_tensor = out_tensor;
  }
-  auto helper = ITensorHelper{out_tensor, slice_input.format_, slice_input.same_format_};
+  auto helper = ITensorHelper{post_tensor, slice_input.format_, slice_input.same_format_, !rank_0};
  ctx->RegisterTensor(helper, out_tensors_[0].Name());
  MS_LOG(DEBUG) << "slice output : " << GetTensorFormat(helper);
  return RET_OK;
 }
-REGISTER_TENSORRT_CREATOR(ops::kNameStridedSlice, SliceTensorRT)
-REGISTER_TENSORRT_CREATOR(ops::kNameSliceFusion, SliceTensorRT)
 REGISTER_TENSORRT_CREATOR(ops::kNameCrop, SliceTensorRT)
 }  // namespace mindspore::lite
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/slicefusion_tensorrt.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/slicefusion_tensorrt.cc
@ -0,0 +1,131 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/extendrt/delegate/tensorrt/op/slicefusion_tensorrt.h"
+#include <algorithm>
+#include <utility>
+#include "src/extendrt/delegate/tensorrt/tensorrt_utils.h"
+#include "ops/fusion/slice_fusion.h"
+
+namespace mindspore::lite {
+nvinfer1::ITensor *SliceFusionTensorRT::GetDynamicSliceSize(TensorRTContext *ctx, nvinfer1::ITensor *input,
+                                                            nvinfer1::Dims start_dims, nvinfer1::Dims size_dims) {
+  auto in_tensor_shape = ctx->network()->addShape(*input)->getOutput(0);
+  if (in_tensor_shape == nullptr) {
+    MS_LOG(ERROR) << "add shape layer of input failed!";
+    return nullptr;
+  }
+  std::vector<nvinfer1::ITensor *> shape_tensors;
+  auto input_dims = input->getDimensions();
+  std::vector<int> input_shape_vec;
+  for (int i = 0; i != input_dims.nbDims; ++i) {
+    if (input_dims.d[i] == -1) {
+      if (!input_shape_vec.empty()) {
+        shape_tensors.push_back(ctx->ConvertTo1DTensor(input_shape_vec));
+        input_shape_vec.clear();
+      }
+      auto starts = nvinfer1::Dims{1, {i}};
+      auto size = nvinfer1::Dims{1, {1}};
+      auto strides = nvinfer1::Dims{1, {1}};
+      auto slice_layer = ctx->network()->addSlice(*in_tensor_shape, starts, size, strides);
+      if (slice_layer == nullptr) {
+        MS_LOG(ERROR) << "add slice layer failed";
+        return nullptr;
+      }
+      auto start_tensor = ctx->ConvertTo1DTensor(start_dims.d[i]);
+      shape_tensors.push_back(
+        ctx->network()
+          ->addElementWise(*slice_layer->getOutput(0), *start_tensor, nvinfer1::ElementWiseOperation::kSUB)
+          ->getOutput(0));
+    } else {
+      input_shape_vec.push_back(size_dims.d[i]);
+    }
+  }
+  if (!input_shape_vec.empty()) {
+    shape_tensors.push_back(ctx->ConvertTo1DTensor(input_shape_vec));
+  }
+  nvinfer1::ITensor *concat_tensors[shape_tensors.size()];
+  for (size_t i = 0; i != shape_tensors.size(); ++i) {
+    concat_tensors[i] = shape_tensors[i];
+  }
+  auto concat_layer = ctx->network()->addConcatenation(concat_tensors, shape_tensors.size());
+  if (concat_layer == nullptr) {
+    MS_LOG(ERROR) << "add concat layer failed!";
+    return nullptr;
+  }
+  concat_layer->setAxis(0);
+
+  return concat_layer->getOutput(0);
+}
+
+int SliceFusionTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
+                                   const std::vector<TensorInfo> &out_tensors) {
+  if (in_tensors.size() != SLICE_INPUT_SIZE) {
+    MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
+    return RET_ERROR;
+  }
+  if (out_tensors.size() != 1) {
+    MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
+    return RET_ERROR;
+  }
+  dynamic_shape_params_.support_hw_dynamic_ = false;
+  return RET_OK;
+}
+
+int SliceFusionTensorRT::AddInnerOp(TensorRTContext *ctx) {
+  ITensorHelper slice_input;
+  int ret = PreprocessInputs2SameDim(ctx, input(ctx, 0), &slice_input);
+  if (ret != RET_OK || slice_input.trt_tensor_ == nullptr) {
+    MS_LOG(ERROR) << "PreprocessInputs2SameDim input tensor failed for " << op_name_;
+    return RET_ERROR;
+  }
+
+  const auto &begin = in_tensors_.at(1);
+  const auto &size = in_tensors_.at(SIZE_INDEX);
+
+  auto start_dims = lite::ConvertCudaDims(begin);
+  auto size_dims = lite::ConvertCudaDims(size);
+  nvinfer1::ITensor *size_tensor = nullptr;
+  for (int i = 0; i != size_dims.nbDims; ++i) {
+    if (size_dims.d[i] == -1 && !IsDynamicInput(ctx, 0)) {
+      size_dims.d[i] = slice_input.trt_tensor_->getDimensions().d[i];
+    }
+  }
+  if (IsDynamicInput(ctx, 0)) {
+    size_tensor = GetDynamicSliceSize(ctx, slice_input.trt_tensor_, start_dims, size_dims);
+    size_dims = nvinfer1::Dims{-1};
+  }
+  auto stride_dims = lite::ConvertCudaDims(1, begin.ElementNum());
+
+  nvinfer1::ISliceLayer *slice_layer =
+    ctx->network()->addSlice(*slice_input.trt_tensor_, start_dims, size_dims, stride_dims);
+  if (slice_layer == nullptr) {
+    MS_LOG(ERROR) << "add Slice op failed for TensorRT: " << op_name_;
+    return RET_ERROR;
+  }
+  if (size_tensor != nullptr) {
+    slice_layer->setInput(INPUT_SIZE2, *size_tensor);
+  }
+  this->layer_ = slice_layer;
+  slice_layer->setName(op_name_.c_str());
+  nvinfer1::ITensor *out_tensor = slice_layer->getOutput(0);
+  auto helper = ITensorHelper{out_tensor, slice_input.format_, slice_input.same_format_};
+  ctx->RegisterTensor(helper, out_tensors_[0].Name());
+  MS_LOG(DEBUG) << "slice output : " << GetTensorFormat(helper);
+  return RET_OK;
+}
+REGISTER_TENSORRT_CREATOR(ops::kNameSliceFusion, SliceFusionTensorRT)
+}  // namespace mindspore::lite
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/slicefusion_tensorrt.h
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/slicefusion_tensorrt.h
@ -0,0 +1,44 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_SLICE_FUSION_TENSORRT_H_
+#define MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_SLICE_FUSION_TENSORRT_H_
+#include <string>
+#include <vector>
+#include <memory>
+#include "src/extendrt/delegate/tensorrt/op/tensorrt_op.h"
+
+namespace mindspore::lite {
+constexpr int SIZE_INDEX = 2;
+constexpr int SLICE_INPUT_SIZE = 3;
+class SliceFusionTensorRT : public TensorRTOp {
+ public:
+  SliceFusionTensorRT(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
+                      const std::vector<TensorInfo> &out_tensors, std::string name)
+      : TensorRTOp(base_operator, in_tensors, out_tensors, name) {}
+
+  ~SliceFusionTensorRT() override = default;
+
+  int AddInnerOp(TensorRTContext *ctx) override;
+
+  int IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
+                const std::vector<TensorInfo> &out_tensors) override;
+
+ private:
+  nvinfer1::ITensor *GetDynamicSliceSize(TensorRTContext *ctx, nvinfer1::ITensor *input, nvinfer1::Dims,
+                                         nvinfer1::Dims);
+};
+}  // namespace mindspore::lite
+#endif  // MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_SLICE_FUSION_TENSORRT_H_
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/spacetobatch_tensorrt.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/spacetobatch_tensorrt.cc
@ -29,7 +29,7 @@
 namespace mindspore::lite {
 int SpaceToBatchTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
                                    const std::vector<TensorInfo> &out_tensors) {
-  if (in_tensors.size() != 3) {
+  if (in_tensors.size() != INPUT_SIZE3) {
    MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
    return RET_ERROR;
  }
@ -38,8 +38,6 @@ int SpaceToBatchTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const
    MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
    return RET_ERROR;
  }
-  dynamic_shape_params_.support_dynamic_ = false;
-  dynamic_shape_params_.support_hw_dynamic_ = false;
  return RET_OK;
 }

@ -52,19 +50,17 @@ int SpaceToBatchTensorRT::AddInnerOp(TensorRTContext *ctx) {
    MS_LOG(ERROR) << "block_h not equal block_w " << op_name_;
    return RET_ERROR;
  }
-  const int *pad_ptr = reinterpret_cast<const int *>(in_tensors_[2].Data());
+  const int *pad_ptr = reinterpret_cast<const int *>(in_tensors_[INPUT_SIZE2].Data());
  int ph0 = *(pad_ptr + 0);
  int ph1 = *(pad_ptr + 1);
-  int pw0 = *(pad_ptr + 2);
-  int pw1 = *(pad_ptr + 3);
-  auto in_dims = input_tensor->getDimensions();
-  int in = in_dims.d[0];
-  int ic = in_dims.d[1];
-  int ih = in_dims.d[2];
-  int iw = in_dims.d[3];
+  int pw0 = *(pad_ptr + INPUT_SIZE2);
+  int pw1 = *(pad_ptr + INPUT_SIZE3);

-  auto plugin =
-    std::make_shared<SpaceToBatchPlugin>(input_tensor->getName(), in, ic, ih, iw, bh, ph0, ph1, pw0, pw1, device_id_);
+  auto plugin = std::make_shared<SpaceToBatchPlugin>(input_tensor->getName(), bh, ph0, ph1, pw0, pw1, device_id_);
+  if (plugin == nullptr) {
+    MS_LOG(ERROR) << "add spacetobatch plugin failed for" << op_name_;
+    return RET_ERROR;
+  }
  nvinfer1::ITensor *inputTensors[] = {input_tensor};
  nvinfer1::IPluginV2Layer *space2batch_opt_layer = ctx->network()->addPluginV2(inputTensors, 1, *plugin);
  if (space2batch_opt_layer == nullptr) {
@ -93,15 +89,20 @@ int SpaceToBatchPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,

 int SpaceToBatchPlugin::RunCudaSpaceToBatch(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs,
                                            void *const *outputs, cudaStream_t stream) {
-  int on = in_ * bh_ * bh_;
-  int oc = ic_;
-  int oh = (ih_ + ph0_ + ph1_) / bh_;
-  int ow = (iw_ + pw0_ + pw1_) / bh_;
+  nvinfer1::Dims input_dims = inputDesc[0].dims;
+  int in = input_dims.d[0];
+  int ic = input_dims.d[1];
+  int ih = input_dims.d[2];
+  int iw = input_dims.d[3];
+  int on = in * bh_ * bh_;
+  int oc = ic;
+  int oh = (ih + ph0_ + ph1_) / bh_;
+  int ow = (iw + pw0_ + pw1_) / bh_;

-  int size = in_ * ic_ * ih_ * iw_;
+  int size = in * ic * ih * iw;

-  CalSpaceToBatch<float>(size, static_cast<const float *>(inputs[0]), in_, ih_, iw_, ic_, on, oh, ow, oc, ph0_, ph1_,
-                         pw0_, pw1_, bh_, static_cast<float *>(outputs[0]), device_id_, stream);
+  CalSpaceToBatch<float>(size, static_cast<const float *>(inputs[0]), in, ih, iw, ic, on, oh, ow, oc, ph0_, ph1_, pw0_,
+                         pw1_, bh_, static_cast<float *>(outputs[0]), device_id_, stream);
  return RET_OK;
 }

@ -115,7 +116,7 @@ nvinfer1::IPluginV2DynamicExt *SpaceToBatchPlugin::clone() const noexcept {
  return plugin;
 }

-size_t SpaceToBatchPlugin::getSerializationSize() const noexcept { return sizeof(int) * 9; }
+size_t SpaceToBatchPlugin::getSerializationSize() const noexcept { return sizeof(int) * 5; }

 nvinfer1::DimsExprs SpaceToBatchPlugin::getOutputDimensions(int32_t index, const nvinfer1::DimsExprs *inputs,
                                                            int nbInputDims,
@ -127,19 +128,15 @@ nvinfer1::DimsExprs SpaceToBatchPlugin::getOutputDimensions(int32_t index, const
  dims.d[1] = inputs[0].d[1];
  auto bh = exprBuilder.constant(bh_);
  auto ph_sum = exprBuilder.constant(ph0_ + ph1_);
-  auto sum0 = exprBuilder.operation(nvinfer1::DimensionOperation::kSUM, *inputs[0].d[2], *ph_sum);
-  dims.d[2] = exprBuilder.operation(nvinfer1::DimensionOperation::kFLOOR_DIV, *sum0, *bh);
+  auto sum0 = exprBuilder.operation(nvinfer1::DimensionOperation::kSUM, *inputs[0].d[INPUT_SIZE2], *ph_sum);
+  dims.d[INPUT_SIZE2] = exprBuilder.operation(nvinfer1::DimensionOperation::kFLOOR_DIV, *sum0, *bh);
  auto pw_sum = exprBuilder.constant(pw0_ + pw1_);
-  auto sum1 = exprBuilder.operation(nvinfer1::DimensionOperation::kSUM, *inputs[0].d[3], *pw_sum);
-  dims.d[3] = exprBuilder.operation(nvinfer1::DimensionOperation::kFLOOR_DIV, *sum1, *bh);
+  auto sum1 = exprBuilder.operation(nvinfer1::DimensionOperation::kSUM, *inputs[0].d[INPUT_SIZE3], *pw_sum);
+  dims.d[INPUT_SIZE3] = exprBuilder.operation(nvinfer1::DimensionOperation::kFLOOR_DIV, *sum1, *bh);
  return dims;
 }

 void SpaceToBatchPlugin::serialize(void *buffer) const noexcept {
-  SerializeValue(&buffer, &in_, sizeof(int));
-  SerializeValue(&buffer, &ic_, sizeof(int));
-  SerializeValue(&buffer, &ih_, sizeof(int));
-  SerializeValue(&buffer, &iw_, sizeof(int));
  SerializeValue(&buffer, &bh_, sizeof(int));
  SerializeValue(&buffer, &ph0_, sizeof(int));
  SerializeValue(&buffer, &ph1_, sizeof(int));
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/spacetobatch_tensorrt.h
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/spacetobatch_tensorrt.h
@ -40,13 +40,8 @@ class SpaceToBatchTensorRT : public TensorRTOp {
 constexpr auto SPACETOTENSORRT_PLUGIN_NAME{"SpaceToBatchPlugin"};
 class SpaceToBatchPlugin : public TensorRTPlugin {
 public:
-  SpaceToBatchPlugin(const std::string name, int in, int ic, int ih, int iw, int bh, int ph0, int ph1, int pw0, int pw1,
-                     uint32_t device_id)
+  SpaceToBatchPlugin(const std::string name, int bh, int ph0, int ph1, int pw0, int pw1, uint32_t device_id)
      : TensorRTPlugin(name, std::string(SPACETOTENSORRT_PLUGIN_NAME), device_id),
-        in_(in),
-        ic_(ic),
-        ih_(ih),
-        iw_(iw),
        bh_(bh),
        ph0_(ph0),
        ph1_(ph1),
@ -56,23 +51,15 @@ class SpaceToBatchPlugin : public TensorRTPlugin {
  SpaceToBatchPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
      : TensorRTPlugin(std::string(name), std::string(SPACETOTENSORRT_PLUGIN_NAME)) {
    const nvinfer1::PluginField *fields = fc->fields;
-    in_ = static_cast<const int *>(fields[0].data)[0];
-    ic_ = static_cast<const int *>(fields[1].data)[0];
-    ih_ = static_cast<const int *>(fields[2].data)[0];
-    iw_ = static_cast<const int *>(fields[3].data)[0];
-    bh_ = static_cast<const int *>(fields[4].data)[0];
-    ph0_ = static_cast<const int *>(fields[5].data)[0];
-    ph1_ = static_cast<const int *>(fields[6].data)[0];
-    pw0_ = static_cast<const int *>(fields[7].data)[0];
-    pw1_ = static_cast<const int *>(fields[8].data)[0];
+    bh_ = static_cast<const int *>(fields[0].data)[0];
+    ph0_ = static_cast<const int *>(fields[1].data)[0];
+    ph1_ = static_cast<const int *>(fields[2].data)[0];
+    pw0_ = static_cast<const int *>(fields[3].data)[0];
+    pw1_ = static_cast<const int *>(fields[4].data)[0];
  }

  SpaceToBatchPlugin(const char *name, const void *serialData, size_t serialLength)
      : TensorRTPlugin(std::string(name), std::string(SPACETOTENSORRT_PLUGIN_NAME)) {
-    DeserializeValue(&serialData, &serialLength, &in_, sizeof(int));
-    DeserializeValue(&serialData, &serialLength, &ic_, sizeof(int));
-    DeserializeValue(&serialData, &serialLength, &ih_, sizeof(int));
-    DeserializeValue(&serialData, &serialLength, &iw_, sizeof(int));
    DeserializeValue(&serialData, &serialLength, &bh_, sizeof(int));
    DeserializeValue(&serialData, &serialLength, &ph0_, sizeof(int));
    DeserializeValue(&serialData, &serialLength, &ph1_, sizeof(int));
@ -93,15 +80,11 @@ class SpaceToBatchPlugin : public TensorRTPlugin {
 private:
  int RunCudaSpaceToBatch(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs, void *const *outputs,
                          cudaStream_t stream);
-  size_t in_;
-  size_t ic_;
-  size_t ih_;
-  size_t iw_;
-  size_t bh_;
-  size_t ph0_;
-  size_t ph1_;
-  size_t pw0_;
-  size_t pw1_;
+  int bh_;
+  int ph0_;
+  int ph1_;
+  int pw0_;
+  int pw1_;
  const std::string layer_name_;
  std::string name_space_;
 };
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/split_tensorrt.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/split_tensorrt.cc
@ -24,19 +24,53 @@
 namespace mindspore::lite {
 int SplitTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
                             const std::vector<TensorInfo> &out_tensors) {
-  if (!IsShapeKnown()) {
-    MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
-    return RET_ERROR;
-  }
  if (in_tensors.size() != 1 && in_tensors.size() != INPUT_SIZE2) {
    MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
    return RET_ERROR;
  }
-  dynamic_shape_params_.support_dynamic_ = false;
-  dynamic_shape_params_.support_hw_dynamic_ = false;
  return RET_OK;
 }

+nvinfer1::ITensor *SplitTensorRT::GetDynamicSliceSize(TensorRTContext *ctx, nvinfer1::ITensor *input, size_t i) {
+  auto in_tensor_shape = ctx->network()->addShape(*input)->getOutput(0);
+  if (in_tensor_shape == nullptr) {
+    MS_LOG(ERROR) << "add shape layer of input failed!";
+    return nullptr;
+  }
+  auto len_tensor = ctx->ConvertTo1DTensor(static_cast<int>(size_splits_[i]));
+  if (len_tensor == nullptr) {
+    MS_LOG(ERROR) << "convert 1d tensor failed!";
+    return nullptr;
+  }
+
+  nvinfer1::ITensor *concat_input_tensors[INPUT_SIZE2];
+  concat_input_tensors[0] = in_tensor_shape;
+  concat_input_tensors[1] = len_tensor;
+  auto concat_layer = ctx->network()->addConcatenation(concat_input_tensors, INPUT_SIZE2);
+  if (concat_layer == nullptr) {
+    MS_LOG(ERROR) << "add concat layer failed!";
+    return nullptr;
+  }
+  concat_layer->setAxis(0);
+  auto shape_and_len = concat_layer->getOutput(0);
+  if (shape_and_len == nullptr) {
+    MS_LOG(ERROR) << "get concat layer result failed!";
+    return nullptr;
+  }
+
+  std::vector<int> gather_slices(input->getDimensions().nbDims);
+  std::iota(gather_slices.begin(), gather_slices.end(), 0);
+  gather_slices[axis_] = gather_slices.size();
+  auto gather_slices_tensor = ctx->ConvertTo1DTensor(gather_slices);
+  nvinfer1::IGatherLayer *gather_layer = ctx->network()->addGather(*shape_and_len, *gather_slices_tensor, 0);
+  if (gather_layer == nullptr) {
+    MS_LOG(ERROR) << "add gather layer failed!";
+    return nullptr;
+  }
+
+  return gather_layer->getOutput(0);
+}
+
 int SplitTensorRT::AddInnerOp(TensorRTContext *ctx) {
  ITensorHelper split_input;
  int ret = PreprocessInputs2SameDim(ctx, input(ctx, 0), &split_input);
@ -78,16 +112,24 @@ int SplitTensorRT::AddInnerOp(TensorRTContext *ctx) {
  for (int i = 0; i != output_num_; ++i) {
    nvinfer1::Dims start_dims = lite::ConvertCudaDims(0, input_nbdims);
    start_dims.d[axis_] = axis_dim_index;
+    nvinfer1::Dims size_dims{-1};
+    nvinfer1::ITensor *size_tensor = nullptr;
+    if (!IsDynamicInput(ctx, 0)) {
+      size_dims = split_input.trt_tensor_->getDimensions();
+      size_dims.d[axis_] = size_splits_[i];
+    } else {
+      size_tensor = GetDynamicSliceSize(ctx, split_input.trt_tensor_, i);
+    }
    axis_dim_index += size_splits_[i];

-    nvinfer1::Dims size_dims = split_input.trt_tensor_->getDimensions();
-    size_dims.d[axis_] = size_splits_[i];
-
    slice_layer = ctx->network()->addSlice(*split_input.trt_tensor_, start_dims, size_dims, one_dims);
    if (slice_layer == nullptr) {
      MS_LOG(ERROR) << "add Slice op failed for TensorRT: " << op_name_;
      return RET_ERROR;
    }
+    if (size_tensor != nullptr) {
+      slice_layer->setInput(INPUT_SIZE2, *size_tensor);
+    }

    nvinfer1::ITensor *out_tensor = slice_layer->getOutput(0);
    if (type_ == ops::kNameUnstack) {
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/split_tensorrt.h
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/split_tensorrt.h
@ -35,10 +35,11 @@ class SplitTensorRT : public TensorRTOp {
                const std::vector<TensorInfo> &out_tensors) override;

 private:
+  nvinfer1::ITensor *GetDynamicSliceSize(TensorRTContext *ctx, nvinfer1::ITensor *input, size_t i);
  int ParseParams(const ITensorHelper &helper);
  int64_t axis_;
-  int64_t output_num_;
-  std::vector<int64_t> size_splits_;
+  int output_num_;
+  std::vector<int> size_splits_;
 };
 }  // namespace mindspore::lite
 #endif  // MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_SPLIT_TENSORRT_H_
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/strideslice_tensorrt.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/strideslice_tensorrt.cc
@ -0,0 +1,187 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "src/extendrt/delegate/tensorrt/op/strideslice_tensorrt.h"
+#include <algorithm>
+#include <numeric>
+#include "src/extendrt/delegate/tensorrt/tensorrt_utils.h"
+#include "ops/strided_slice.h"
+
+namespace mindspore::lite {
+nvinfer1::ITensor *StrideSliceTensorRT::GetDynamicSliceSize(TensorRTContext *ctx, nvinfer1::ITensor *input,
+                                                            int size_dim, int axis) {
+  auto in_tensor_shape = ctx->network()->addShape(*input)->getOutput(0);
+  if (in_tensor_shape == nullptr) {
+    MS_LOG(ERROR) << "add shape layer of input failed!";
+    return nullptr;
+  }
+  auto len_tensor = ctx->ConvertTo1DTensor(static_cast<int>(size_dim));
+  if (len_tensor == nullptr) {
+    MS_LOG(ERROR) << "convert 1d tensor failed!";
+    return nullptr;
+  }
+
+  nvinfer1::ITensor *concat_input_tensors[INPUT_SIZE2];
+  concat_input_tensors[0] = in_tensor_shape;
+  concat_input_tensors[1] = len_tensor;
+  auto concat_layer = ctx->network()->addConcatenation(concat_input_tensors, INPUT_SIZE2);
+  if (concat_layer == nullptr) {
+    MS_LOG(ERROR) << "add concat layer failed!";
+    return nullptr;
+  }
+  concat_layer->setAxis(0);
+  auto shape_and_len = concat_layer->getOutput(0);
+  if (shape_and_len == nullptr) {
+    MS_LOG(ERROR) << "get concat layer result failed!";
+    return nullptr;
+  }
+
+  std::vector<int> gather_slices(input->getDimensions().nbDims);
+  std::iota(gather_slices.begin(), gather_slices.end(), 0);
+  gather_slices[axis] = gather_slices.size();
+  auto gather_slices_tensor = ctx->ConvertTo1DTensor(gather_slices);
+  nvinfer1::IGatherLayer *gather_layer = ctx->network()->addGather(*shape_and_len, *gather_slices_tensor, 0);
+  if (gather_layer == nullptr) {
+    MS_LOG(ERROR) << "add gather layer failed!";
+    return nullptr;
+  }
+
+  return gather_layer->getOutput(0);
+}
+
+int StrideSliceTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
+                                   const std::vector<TensorInfo> &out_tensors) {
+  if (in_tensors.size() < HAS_AXIS - 1) {
+    MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
+    return RET_ERROR;
+  }
+  if (out_tensors.size() != 1) {
+    MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
+    return RET_ERROR;
+  }
+  if (!in_tensors.at(BEGINS_INDEX).IsConst() || !in_tensors.at(ENDS_INDEX).IsConst()) {
+    MS_LOG(ERROR) << "invalid input tensor for: " << op_name_;
+    return RET_ERROR;
+  }
+  return RET_OK;
+}
+
+nvinfer1::ILayer *StrideSliceTensorRT::MakeLayer(TensorRTContext *ctx, const ITensorHelper &slice_input) {
+  const auto &begin = in_tensors_.at(BEGINS_INDEX);
+  const auto &stride = in_tensors_.back();
+  const auto &end = in_tensors_.at(ENDS_INDEX);
+
+  auto input_dims = slice_input.trt_tensor_->getDimensions();
+  nvinfer1::Dims start_dims{input_dims};
+  nvinfer1::Dims size_dims{input_dims};
+  nvinfer1::Dims stride_dims{input_dims};
+
+  int64_t axis_index = in_tensors_.size() == HAS_AXIS ? AXIS_INDEX : -1;
+  nvinfer1::ITensor *size_tensor = nullptr;
+  if (begin.ElementNum() == input_dims.nbDims) {
+    start_dims = lite::ConvertCudaDims(begin);
+    auto end_dims = lite::ConvertCudaDims(end);
+    for (int i = 0; i < size_dims.nbDims; i++) {
+      size_dims.d[i] = end_dims.d[i] - start_dims.d[i];
+    }
+    stride_dims = lite::ConvertCudaDims(stride);
+  } else {
+    if (begin.ElementNum() != 1 || end.ElementNum() != 1 || stride.ElementNum() != 1) {
+      MS_LOG(ERROR)
+        << "Only support element number of begin, end and stride to be 1 when this number < input dims number, op: "
+        << op_name_;
+      return nullptr;
+    }
+    int axis_value = axis_index == -1 ? 0 : *(static_cast<const int *>(in_tensors_.at(axis_index).Data()));
+    int start_value = *(static_cast<const int *>(begin.Data()));
+    int end_value = *(static_cast<const int *>(end.Data()));
+    int stride_value = *(static_cast<const int *>(stride.Data()));
+    std::fill(start_dims.d, start_dims.d + start_dims.nbDims, 0);
+    std::fill(stride_dims.d, stride_dims.d + stride_dims.nbDims, 1);
+    size_dims = slice_input.trt_tensor_->getDimensions();
+    if (start_value < 0) {
+      start_value = input_dims.d[axis_value] + start_value;
+    }
+    for (int i = 0; i < start_dims.nbDims; i++) {
+      if (i == axis_value) {
+        start_dims.d[i] = start_value;
+        stride_dims.d[i] = stride_value;
+        if (end_value >= 0) {
+          size_dims.d[i] = std::min(end_value, input_dims.d[i]) - start_dims.d[i];
+        } else if (end_value >= -input_dims.d[i]) {
+          size_dims.d[i] = end_value + input_dims.d[i] - start_dims.d[i];
+        } else {
+          size_dims.d[i] = input_dims.d[i];
+        }
+      }
+    }
+    if (IsDynamicInput(ctx, 0)) {
+      size_tensor = GetDynamicSliceSize(ctx, slice_input.trt_tensor_, size_dims.d[axis_value], axis_value);
+      size_dims = nvinfer1::Dims{-1};
+    }
+  }
+  nvinfer1::ISliceLayer *slice_layer =
+    ctx->network()->addSlice(*slice_input.trt_tensor_, start_dims, size_dims, stride_dims);
+  if (slice_layer == nullptr) {
+    MS_LOG(ERROR) << "add Slice op failed for TensorRT: " << op_name_;
+    return nullptr;
+  }
+  if (size_tensor != nullptr) {
+    slice_layer->setInput(INPUT_SIZE2, *size_tensor);
+  }
+  return slice_layer;
+}
+
+int StrideSliceTensorRT::AddInnerOp(TensorRTContext *ctx) {
+  ITensorHelper slice_input;
+  int ret = PreprocessInputs2SameDim(ctx, input(ctx, 0), &slice_input);
+  if (ret != RET_OK || slice_input.trt_tensor_ == nullptr) {
+    MS_LOG(ERROR) << "PreprocessInputs2SameDim input tensor failed for " << op_name_;
+    return RET_ERROR;
+  }
+
+  auto slice_layer = MakeLayer(ctx, slice_input);
+  if (slice_layer == nullptr) {
+    return RET_ERROR;
+  }
+  this->layer_ = slice_layer;
+  slice_layer->setName(op_name_.c_str());
+  nvinfer1::ITensor *out_tensor = slice_layer->getOutput(0);
+  auto shape = ConvertMSShape(out_tensor->getDimensions());
+  bool rank_0 = false;
+
+  shrink_axis_ = AsOps<ops::StridedSlice>()->get_shrink_axis_mask();
+  if (shrink_axis_ != 0) {
+    for (int i = shape.size() - 1; i >= 0; --i) {
+      int mask = 1 << i;
+      if ((shrink_axis_ & mask) != 0) {
+        shape.erase(shape.begin() + i);
+      }
+    }
+    if (!shape.empty()) {
+      out_tensor = Reshape(ctx, out_tensor, shape);
+    } else {
+      rank_0 = true;
+    }
+  }
+  auto helper = ITensorHelper{out_tensor, slice_input.format_, slice_input.same_format_, !rank_0};
+  ctx->RegisterTensor(helper, out_tensors_[0].Name());
+  MS_LOG(DEBUG) << "slice output : " << GetTensorFormat(helper);
+  return RET_OK;
+}
+
+REGISTER_TENSORRT_CREATOR(ops::kNameStridedSlice, StrideSliceTensorRT)
+}  // namespace mindspore::lite
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/strideslice_tensorrt.h
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/strideslice_tensorrt.h
@ -0,0 +1,48 @@
+/**
+ * Copyright 2021 Huawei Technologies Co., Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_STRIDE_SLICE_TENSORRT_H_
+#define MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_STRIDE_SLICE_TENSORRT_H_
+#include <string>
+#include <vector>
+#include <tuple>
+#include <memory>
+#include "src/extendrt/delegate/tensorrt/op/tensorrt_op.h"
+
+namespace mindspore::lite {
+constexpr int BEGINS_INDEX = 1;
+constexpr int ENDS_INDEX = 2;
+constexpr int HAS_AXIS = 5;
+constexpr int AXIS_INDEX = 3;
+class StrideSliceTensorRT : public TensorRTOp {
+ public:
+  StrideSliceTensorRT(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
+                      const std::vector<TensorInfo> &out_tensors, std::string name)
+      : TensorRTOp(base_operator, in_tensors, out_tensors, name) {}
+
+  ~StrideSliceTensorRT() override = default;
+
+  int AddInnerOp(TensorRTContext *ctx) override;
+
+  int IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
+                const std::vector<TensorInfo> &out_tensors) override;
+
+ private:
+  nvinfer1::ITensor *GetDynamicSliceSize(TensorRTContext *ctx, nvinfer1::ITensor *input, int, int);
+  nvinfer1::ILayer *MakeLayer(TensorRTContext *ctx, const ITensorHelper &slice_input);
+  size_t shrink_axis_;
+};
+}  // namespace mindspore::lite
+#endif  // MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_STRIDE_SLICE_TENSORRT_H_
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/tensorrt_op.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/tensorrt_op.cc
@ -87,6 +87,11 @@ int TensorRTOp::ReadyInputsNumber(TensorRTContext *ctx) const {

 bool TensorRTOp::IsShapeKnown() { return true; }

+bool TensorRTOp::IsDynamicInput(TensorRTContext *ctx, size_t k) {
+  nvinfer1::Dims dims = input(ctx, k).trt_tensor_->getDimensions();
+  return std::any_of(dims.d, dims.d + dims.nbDims, [](int d) { return d == -1; });
+}
+
 int TensorRTOp::Prepare(void **network_tensor_bindings, nvinfer1::ICudaEngine *engine) {
  if (op_binding_tensor_.size() != 0) {
    MS_LOG(ERROR) << "need special op Prepare for " << op_name_;
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/tensorrt_op.h
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/tensorrt_op.h
@ -40,6 +40,7 @@ namespace mindspore::lite {
 constexpr int INPUT_SIZE2 = 2;
 constexpr int INPUT_SIZE3 = 3;
 constexpr int INPUT_SIZE4 = 4;
+constexpr int INPUT_SIZE5 = 5;

 struct BindingHelper {
  std::string name_;
@ -115,6 +116,7 @@ class TensorRTOp {
  nvinfer1::ILayer *layer() { return layer_; }

  bool GetSupportInputBool();
+  bool IsDynamicInput(TensorRTContext *ctx, size_t k);

  void SetSupportInputBool(bool support_input_bool);
  template <class OpsT>
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/tensorrt_plugin.h
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/tensorrt_plugin.h
@ -87,12 +87,12 @@ class TensorRTPluginCreater : public nvinfer1::IPluginCreator {

  const char *getPluginNamespace() const noexcept override { return name_space_.c_str(); }

-  nvinfer1::IPluginV2 *createPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc) noexcept {
+  nvinfer1::IPluginV2 *createPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc) noexcept override {
    return new (std::nothrow) T(name, fc);
  }

-  nvinfer1::IPluginV2 *deserializePlugin(const char *name, const void *serialData, size_t serialLength) noexcept {
-    return new (std::nothrow) T(name, serialData, serialLength);
+  nvinfer1::IPluginV2 *deserializePlugin(const char *name, const void *data, size_t len) noexcept override {
+    return new (std::nothrow) T(name, data, len);
  }

 protected:
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/tensor_info.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/tensor_info.cc
@ -21,6 +21,7 @@
 #include <functional>
 #include "include/api/kernel.h"
 #include "src/common/utils.h"
+#include "src/extendrt/utils/tensor_default_impl.h"

 namespace mindspore::lite {
 class TensorInfoImpl {
@ -29,46 +30,24 @@ class TensorInfoImpl {
  TensorInfoImpl(const std::string &name, mindspore::DataType type, const std::vector<int64_t> &shape,
                 mindspore::Format format, const void *data, size_t data_len,
                 const mindspore::tensor::TensorPtr &tensor_val)
-      : name_(name),
-        dType_(type),
-        shape_(shape),
-        format_(format),
-        data_(data),
-        data_len_(data_len),
-        tensor_val_(tensor_val) {
-    is_const_ = (data_ != nullptr);
-    if (data_ == nullptr || data_len_ == 0) {
-      auto ele_num = ElementNum();
-      auto type_size = item_size();
-      temp_data_.resize(ele_num * type_size);
-      data_ = temp_data_.data();
-      data_len_ = temp_data_.size();
-    }
-  }
-  void SetShape(const std::vector<int64_t> &shape) {
-    shape_ = shape;
-    auto new_elem_num = ElementNum();
-    auto type_size = item_size();
-    auto data_size = new_elem_num * type_size;
-    if (data_size != temp_data_.size() && data_ == temp_data_.data()) {
-      temp_data_.resize(data_size);
-      data_ = temp_data_.data();
-      data_len_ = data_size;
-    }
+      : tensor_impl_(name, type, shape), tensor_val_(tensor_val) {
+    tensor_impl_.SetFormat(format);
+    auto is_const = (data != nullptr);
+    tensor_impl_.SetIsConst(is_const);
+    SetData(data, data_len);
  }

-  int64_t ElementNum() const { return std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies<int64_t>()); }
-
-  size_t item_size() const { return DataTypeSize(static_cast<enum TypeId>(dType_)); }
-
-  std::string name_;
-  mindspore::DataType dType_ = mindspore::DataType::kTypeUnknown;
-  std::vector<int64_t> shape_;
-  mindspore::Format format_ = DEFAULT_FORMAT;
-  const void *data_ = nullptr;
-  size_t data_len_ = 0;
-  bool is_const_ = false;
-  std::vector<uint8_t> temp_data_;
+  size_t item_size() const { return DataTypeSize(static_cast<enum TypeId>(tensor_impl_.DataType())); }
+  void SetData(const void *data, size_t data_len) {
+    if (data != nullptr && data_len != 0) {
+      if (tensor_impl_.DataSize() != data_len) {
+        MS_LOG_WARNING << "Tensor expect data size " << tensor_impl_.DataSize() << " != data len " << data_len
+                       << ", shape: " << tensor_impl_.Shape() << ", dtype: " << tensor_impl_.DataType();
+      }
+      tensor_impl_.SetData(const_cast<void *>(data), false);
+    }
+  }
+  TensorDefaultImpl tensor_impl_;
  mindspore::tensor::TensorPtr tensor_val_ = nullptr;
 };

@ -82,21 +61,21 @@ std::string TensorInfo::Name() const {
  if (impl_ == nullptr) {
    return "";
  }
-  return impl_->name_;
+  return impl_->tensor_impl_.Name();
 }

 mindspore::DataType TensorInfo::DataType() const {
  if (impl_ == nullptr) {
    return mindspore::DataType::kTypeUnknown;
  }
-  return impl_->dType_;
+  return impl_->tensor_impl_.DataType();
 }

 mindspore::Format TensorInfo::format() const {
  if (impl_ == nullptr) {
    return DEFAULT_FORMAT;
  }
-  return impl_->format_;
+  return impl_->tensor_impl_.Format();
 }

 const std::vector<int64_t> &TensorInfo::Shape() const {
@ -104,21 +83,21 @@ const std::vector<int64_t> &TensorInfo::Shape() const {
  if (impl_ == nullptr) {
    return empty_shape;
  }
-  return impl_->shape_;
+  return impl_->tensor_impl_.Shape();
 }

 const void *TensorInfo::Data() const {
  if (impl_ == nullptr) {
    return nullptr;
  }
-  return impl_->data_;
+  return impl_->tensor_impl_.Data().get();
 }

 void *TensorInfo::MutableData() {
  if (impl_ == nullptr) {
    return nullptr;
  }
-  return const_cast<void *>(impl_->data_);
+  return const_cast<void *>(impl_->tensor_impl_.MutableData());
 }

 size_t TensorInfo::DataSize() const {
@ -132,7 +111,7 @@ bool TensorInfo::IsConst() const {
  if (impl_ == nullptr) {
    return 0;
  }
-  return impl_->is_const_ && impl_->data_ != nullptr;
+  return impl_->tensor_impl_.IsConst();
 }

 size_t TensorInfo::item_size() const {
@ -146,22 +125,21 @@ void TensorInfo::SetShape(const std::vector<int64_t> &shape) {
  if (impl_ == nullptr) {
    return;
  }
-  impl_->SetShape(shape);
+  impl_->tensor_impl_.SetShape(shape);
 }

 void TensorInfo::SetData(const void *data, size_t data_len) {
  if (impl_ == nullptr) {
    return;
  }
-  impl_->data_ = data;
-  impl_->data_len_ = data_len;
+  impl_->SetData(data, data_len);
 }

 int64_t TensorInfo::ElementNum() const {
  if (impl_ == nullptr) {
    return 0;
  }
-  return impl_->ElementNum();
+  return impl_->tensor_impl_.ElementNum();
 }

 TensorInfo &TensorInfo::operator=(const TensorInfo &other) {
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_context.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_context.cc
@ -15,6 +15,7 @@
 */

 #include "src/extendrt/delegate/tensorrt/tensorrt_context.h"
+#include "src/extendrt/delegate/tensorrt/tensorrt_utils.h"

 namespace mindspore::lite {
 TensorRTContext::~TensorRTContext() {
@ -22,6 +23,9 @@ TensorRTContext::~TensorRTContext() {
    network_->destroy();
    network_ = nullptr;
  }
+  for (auto ptr : owner_memorys_) {
+    free(ptr);
+  }
 }

 bool TensorRTContext::Init() {
@ -72,4 +76,31 @@ ITensorHelper TensorRTContext::MsName2Tensor(const std::string &ms_name) {
  MS_LOG(WARNING) << "Get Tensorrt tensor by ms_tensor: " << ms_name << " fail!";
  return {};
 }
+
+template <typename T>
+nvinfer1::ITensor *TensorRTContext::ConvertTo1DTensor(T value) {
+  return ConvertTo1DTensor(std::vector<T>{value});
+}
+
+template <typename T>
+nvinfer1::ITensor *TensorRTContext::ConvertTo1DTensor(const std::vector<T> &values) {
+  void *ptr = malloc(values.size() * sizeof(T));
+  const T *begin = &values[0];
+  memcpy(ptr, reinterpret_cast<const void *>(begin), values.size() * sizeof(T));
+  owner_memorys_.push_back(ptr);
+
+  nvinfer1::Weights weights{GetNvinferDataType<T>(), ptr, static_cast<int64_t>(values.size())};
+  nvinfer1::Dims dims{1, {static_cast<int>(values.size())}};
+  nvinfer1::IConstantLayer *constant_tensor = network()->addConstant(dims, weights);
+  if (constant_tensor == nullptr) {
+    MS_LOG(ERROR) << "create constant_tensor failed.";
+    return nullptr;
+  }
+  return constant_tensor->getOutput(0);
+}
+
+template nvinfer1::ITensor *TensorRTContext::ConvertTo1DTensor(int value);
+template nvinfer1::ITensor *TensorRTContext::ConvertTo1DTensor(float value);
+template nvinfer1::ITensor *TensorRTContext::ConvertTo1DTensor(const std::vector<int> &values);
+template nvinfer1::ITensor *TensorRTContext::ConvertTo1DTensor(const std::vector<float> &values);
 }  // namespace mindspore::lite
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_context.h
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_context.h
@ -20,6 +20,7 @@
 #include <NvInfer.h>
 #include <string>
 #include <unordered_map>
+#include <vector>
 #include "src/extendrt/delegate/tensorrt/tensorrt_runtime.h"

 namespace mindspore::lite {
@ -42,11 +43,17 @@ class TensorRTContext {
  bool HasTensor(const std::string &name) const;
  ITensorHelper MsName2Tensor(const std::string &ms_name);

+  template <typename T>
+  nvinfer1::ITensor *ConvertTo1DTensor(T value);
+  template <typename T>
+  nvinfer1::ITensor *ConvertTo1DTensor(const std::vector<T> &values);
+
 private:
  int counter_{0};
  nvinfer1::INetworkDefinition *network_{nullptr};
  std::unordered_map<std::string, ITensorHelper> ms_name2trt_tensor_;
  TensorRTRuntime *runtime_{nullptr};
+  std::vector<void *> owner_memorys_;
 };
 }  // namespace mindspore::lite
 #endif  // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_TENSORRT_CONTEXT_H_
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_graph_executor.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_graph_executor.cc
@ -22,43 +22,18 @@
 #include <map>
 #include <string>
 #include <utility>
-#include "ccsrc/kernel/kernel.h"
+#include <algorithm>
 #include "src/extendrt/delegate/delegate_utils.h"
 #include "ccsrc/kernel/common_utils.h"
 #include "ccsrc/backend/common/optimizer/helper.h"
 #include "ccsrc/include/common/utils/convert_utils.h"
+#include "src/extendrt/utils/kernel_graph_utils.h"
+#include "common/config_infos.h"

 namespace mindspore::lite {
 namespace {
 const char tensorrt_provider[] = "tensorrt";

-std::vector<std::string> Split(const std::string &str, const std::string &delim) {
-  auto start = 0U;
-  auto end = str.find(delim);
-  std::vector<std::string> substrs;
-  while (end != std::string::npos) {
-    substrs.push_back(str.substr(start, end - start));
-    start = end + delim.length();
-    end = str.find(delim, start);
-  }
-  substrs.push_back(str.substr(start, end));
-  return substrs;
-}
-std::vector<nvinfer1::Dims> StringToDims(const std::string &str) {
-  std::vector<nvinfer1::Dims> all_dims;
-  std::vector<std::string> all_str_dims = Split(str, ";");
-  for (const std::string &str_dims : all_str_dims) {
-    std::vector<std::string> str_dims_vec = Split(str_dims, ",");
-    nvinfer1::Dims dims;
-    dims.nbDims = str_dims_vec.size();
-    for (size_t i = 0; i != str_dims_vec.size(); ++i) {
-      dims.d[i] = std::stoi(str_dims_vec[i]);
-    }
-    all_dims.push_back(dims);
-  }
-  return all_dims;
-}
-
 struct NodeWithOutputIndex {
  NodeWithOutputIndex() = default;
  NodeWithOutputIndex(session::KernelWithIndex kernel_index, TensorInfo tensor_info)
@ -308,7 +283,7 @@ Status GetModelOutputsInfo(KernelGraphPtr kernel_graph, std::vector<NodeWithOutp
  MS_EXCEPTION_IF_NULL(kernel_graph);
  MS_EXCEPTION_IF_NULL(tensor_info_list_ptr);
  auto &tensor_info_list = *tensor_info_list_ptr;
-  auto outputs = kernel_graph->outputs();
+  auto outputs = KernelGraphUtils::GetKernelGraphOutputs(kernel_graph);
  // find parameters of graph inputs
  for (size_t i = 0; i < outputs.size(); ++i) {
    auto output = outputs[i];
@ -335,31 +310,8 @@ Status GetModelOutputsInfo(KernelGraphPtr kernel_graph, std::vector<NodeWithOutp
  return kSuccess;
 }
 }  // namespace
-TensorRTExecutor::TensorRTExecutor(const std::shared_ptr<mindspore::Context> &context,
-                                   const std::string &cache_model_path, size_t vocab_size, size_t device_cache_size,
-                                   const std::map<std::string, std::string> &ms_cache)
-    : context_(context),
-      cache_model_path_(cache_model_path),
-      vocab_size_(vocab_size),
-      device_cache_size_(device_cache_size) {
-  if (ms_cache.find("serialize_path") != ms_cache.end()) {
-    serialize_path_ = ms_cache.at("serialize_path");
-  }
-  if (ms_cache.find("min_dims") != ms_cache.end()) {
-    min_dims_ = StringToDims(ms_cache.at("min_dims"));
-  }
-  if (ms_cache.find("opt_dims") != ms_cache.end()) {
-    opt_dims_ = StringToDims(ms_cache.at("opt_dims"));
-  }
-  if (ms_cache.find("max_dims") != ms_cache.end()) {
-    max_dims_ = StringToDims(ms_cache.at("max_dims"));
-  }
-  if (min_dims_.size() != opt_dims_.size() || min_dims_.size() != max_dims_.size()) {
-    MS_LOG(WARNING) << "number of min_dims, opt_dims, max_dims are not equal";
-  }
-}
-
-TensorRTExecutor::TensorRTExecutor(const std::shared_ptr<mindspore::Context> &context) : context_(context) {}
+TensorRTExecutor::TensorRTExecutor(const std::shared_ptr<mindspore::Context> &context, const ConfigInfos &config_infos)
+    : context_(context), config_infos_(config_infos) {}

 TensorRTExecutor::~TensorRTExecutor() {
  if (runtime_ != nullptr) {
@ -387,12 +339,13 @@ bool IsHardwareSupport() {
  return true;
 }

-Status TensorRTExecutor::Init() {
+bool TensorRTExecutor::Init() {
  if (!IsHardwareSupport()) {
-    return mindspore::kLiteNotSupport;
+    return false;
  }
  if (context_ == nullptr) {
    MS_LOG_ERROR << "Context cannot be nullptr";
+    return false;
  }

  std::vector<std::shared_ptr<DeviceInfoContext>> device_list = context_->MutableDeviceInfo();
@ -401,50 +354,71 @@ Status TensorRTExecutor::Init() {
  });
  if (iter == device_list.end()) {
    MS_LOG(ERROR) << "no gpu device info found for TensorRT.";
-    return mindspore::kLiteError;
+    return false;
  }
  auto gpu_info = (*iter)->Cast<GPUDeviceInfo>();
  if (gpu_info == nullptr) {
    MS_LOG(ERROR) << "no gpu device info found for TensorRT.";
-    return mindspore::kLiteError;
+    return false;
  }
  device_info_ = gpu_info;
  int ret = lite::SetCudaDevice(device_info_);
  if (ret != RET_OK) {
-    return mindspore::kLiteError;
+    return false;
  }
  if (runtime_ == nullptr) {
    runtime_ = new (std::nothrow) TensorRTRuntime();
    if (runtime_ == nullptr) {
      MS_LOG(ERROR) << "create TensorRTRuntime failed.";
-      return mindspore::kLiteError;
+      return false;
    }
  }
  if (runtime_->Init() != RET_OK) {
    MS_LOG(ERROR) << "TensorRTRuntime init failed.";
-    return mindspore::kLiteError;
+    return false;
  }
  runtime_->SetDeviceID(device_info_->GetDeviceID());

  auto cuda_ret = cudaStreamCreate(&stream_);
  if (cuda_ret != cudaSuccess) {
    MS_LOG(ERROR) << "Cuda create stream failed";
-    return mindspore::kLiteError;
+    return false;
  }

  auto cublas_ret = cublasCreate(&cublas_handle_);
  if (cublas_ret != CUBLAS_STATUS_SUCCESS) {
    MS_LOG(ERROR) << "Cuda create cublas handle failed";
-    return mindspore::kLiteError;
+    return false;
  }

  auto cublaslt_ret = cublasLtCreate(&cublaslt_handle_);
  if (cublaslt_ret != CUBLAS_STATUS_SUCCESS) {
    MS_LOG(ERROR) << "Cuda create cublaslt handle failed";
-    return mindspore::kLiteError;
+    return false;
  }

-  return mindspore::kSuccess;
+  ret = ParseOptimizationProfile();
+  if (ret != RET_OK) {
+    MS_LOG(ERROR) << "Parse input ranges failed.";
+    return false;
+  }
+  return true;
+}
+
+int TensorRTExecutor::ParseOptimizationProfile() {
+  auto gpu_context_it = config_infos_.find(kGPUContext);
+  if (gpu_context_it == config_infos_.end()) {
+    MS_LOG(INFO) << "do not have input ranges config.";
+    return RET_OK;
+  }
+  auto &gpu_context = gpu_context_it->second;
+  ProfileConfigs profile_configs;
+  if (!ProfileParser::Parse(gpu_context, true, &profile_configs)) {
+    MS_LOG_WARNING << "Failed to parse profile info from '" << kGPUContext << "'";
+    return RET_FAILED;
+  }
+  trt_profile_configs_ = profile_configs;
+  return RET_OK;
 }

 Status TensorRTExecutor::BuildSubGraph(const KernelGraphPtr &kernel_graph) {
@ -490,57 +464,10 @@ Status TensorRTExecutor::BuildSubGraph(const KernelGraphPtr &kernel_graph) {
  if (status != kSuccess) {
    return status;
  }
-  tensorrt_graph_list_.push_back(TrtGraphContext{tensorrt_ops, inputs_, outputs_, nullptr});
-  status = UpdateTrtSubGraphInputsDepend();
-  if (status != kSuccess) {
-    return status;
-  }
-  for (size_t i = 0; i < tensorrt_graph_list_.size(); i++) {
-    auto &sub_graph = tensorrt_graph_list_[i];
-    sub_graph.sub_graph = CreateTensorRTGraph(sub_graph.tensorrt_ops, kernel_graph, tensorrt_subgraph_index,
-                                              sub_graph.inputs, sub_graph.outputs);
-    if (!sub_graph.sub_graph) {
-      MS_LOG(ERROR) << "Create tensorrt graph failed";
-      return mindspore::kLiteError;
-    }
-  }
-  return mindspore::kSuccess;
-}
-
-Status TensorRTExecutor::UpdateTrtSubGraphInputsDepend() {
-  std::set<TensorInfo> val_set;
-  for (auto &item : inputs_) {
-    val_set.emplace(item);
-  }
-  std::vector<std::set<TensorInfo>> sub_graphs_outputs;
-  for (size_t i = 0; i < tensorrt_graph_list_.size(); i++) {
-    auto &sub_graph = tensorrt_graph_list_[i];
-    std::set<TensorInfo> sub_graph_tensor_infos;
-    for (auto &op : sub_graph.tensorrt_ops) {
-      for (auto &item : op->outputs()) {
-        sub_graph_tensor_infos.emplace(item);
-      }
-    }
-    sub_graphs_outputs.emplace_back(std::move(sub_graph_tensor_infos));
-
-    for (auto &input : sub_graph.inputs) {
-      if (!val_set.count(input)) {
-        size_t k = 0;
-        for (; k < i; k++) {
-          if (sub_graphs_outputs[k].count(input)) {
-            tensorrt_graph_list_[k].outputs.push_back(input);
-            break;
-          }
-        }
-        if (k >= i) {
-          MS_LOG(ERROR) << "Cannot found input " << input.Name();
-          return mindspore::kLiteError;
-        }
-      }
-    }
-    for (auto &item : sub_graph.outputs) {
-      val_set.emplace(item);
-    }
+  tensorrt_graph_ = CreateTensorRTGraph(tensorrt_ops, kernel_graph, tensorrt_subgraph_index, inputs_, outputs_);
+  if (!tensorrt_graph_) {
+    MS_LOG(ERROR) << "Create tensorrt graph failed";
+    return mindspore::kLiteError;
  }
  return mindspore::kSuccess;
 }
@ -583,9 +510,18 @@ std::shared_ptr<TensorRTSubGraph> TensorRTExecutor::CreateTensorRTGraph(const st
                                                                        const KernelGraphPtr &graph, int index,
                                                                        const std::vector<TensorInfo> &inputs,
                                                                        const std::vector<TensorInfo> &outputs) {
-  auto tensorrt_graph =
-    std::make_shared<TensorRTSubGraph>(ops, inputs, outputs, context_.get(), device_info_, runtime_, support_resize_,
-                                       support_hw_resize_, min_dims_, opt_dims_, max_dims_);
+  if (!trt_profile_configs_.input_infos.empty()) {
+    std::vector<std::string> input_names;
+    std::transform(inputs.begin(), inputs.end(), std::back_inserter(input_names),
+                   [](auto &item) { return item.Name(); });
+    if (!ProfileParser::ReorderByInputNames(input_names, &trt_profile_configs_)) {
+      MS_LOG_ERROR << "Reorder profile by input names failed, input names: " << input_names;
+      return nullptr;
+    }
+  }
+
+  auto tensorrt_graph = std::make_shared<TensorRTSubGraph>(ops, inputs, outputs, context_.get(), device_info_, runtime_,
+                                                           support_resize_, support_hw_resize_, trt_profile_configs_);
  if (tensorrt_graph == nullptr) {
    MS_LOG(ERROR) << "new tensorrt_graph failed.";
    return nullptr;
@ -635,10 +571,6 @@ bool TensorRTExecutor::CompileGraph(const FuncGraphPtr &graph, const std::map<st

 bool TensorRTExecutor::RunGraph(const FuncGraphPtr &graph, const std::vector<tensor::Tensor> &inputs,
                                std::vector<tensor::Tensor> *outputs, const std::map<string, string> &compile_options) {
-  if (tensorrt_graph_list_.empty()) {
-    MS_LOG(ERROR) << "TensorRTGraph is nullptr.";
-    return false;
-  }
  if (inputs.size() != inputs_.size()) {
    MS_LOG(ERROR) << "Graph inputs size " << inputs_.size() << " != execute outputs size " << inputs.size();
    return false;
@ -647,70 +579,20 @@ bool TensorRTExecutor::RunGraph(const FuncGraphPtr &graph, const std::vector<ten
    MS_LOG(ERROR) << "Graph outputs size " << inputs_.size() << " != expected outputs size " << outputs->size();
    return false;
  }
-  if (tensorrt_graph_list_.size() == 1) {
-    auto &sub_graph = tensorrt_graph_list_[0];
-    if (sub_graph.sub_graph == nullptr) {
-      MS_LOG(ERROR) << "TensorRT subgraph is nullptr.";
-      return false;
-    }
-    return sub_graph.sub_graph->Execute(inputs, outputs) == RET_OK;
+  if (tensorrt_graph_ == nullptr) {
+    MS_LOG(ERROR) << "TensorRT subgraph is nullptr.";
+    return false;
  }
-  std::map<TensorInfo, std::shared_ptr<tensor::Tensor>> tensor_val_map;
-  for (size_t i = 0; i < inputs.size(); i++) {
-    tensor_val_map[inputs_[i]] = std::make_shared<tensor::Tensor>(inputs[i]);
-  }
-  for (auto &sub_graph : tensorrt_graph_list_) {
-    std::vector<tensor::Tensor> sub_inputs;
-    std::vector<tensor::Tensor> sub_outputs;
-    for (auto &item : sub_graph.inputs) {
-      auto it = tensor_val_map.find(item);
-      if (it == tensor_val_map.end()) {
-        MS_LOG(ERROR) << "Cannot find input tensor " << item.Name() << " in tensor val map";
-        return false;
-      }
-      sub_inputs.push_back(*it->second);
-    }
-    if (sub_graph.sub_graph == nullptr) {
-      MS_LOG(ERROR) << "TensorRT subgraph is nullptr.";
-      return false;
-    }
-    auto ret = sub_graph.sub_graph->Execute(sub_inputs, &sub_outputs);
-    if (ret != RET_OK) {
-      MS_LOG(ERROR) << "Failed to execute graph.";
-      return false;
-    }
-    if (sub_graph.outputs.size() != sub_outputs.size()) {
-      MS_LOG(ERROR) << "Subgraph outputs size " << sub_graph.outputs.size() << " != result outputs size "
-                    << sub_outputs.size();
-      return false;
-    }
-    for (size_t i = 0; i < sub_graph.outputs.size(); i++) {
-      tensor_val_map[sub_graph.outputs[i]] = std::make_shared<tensor::Tensor>(sub_outputs[i]);
-    }
-  }
-  outputs->clear();
-  for (auto &item : outputs_) {
-    auto it = tensor_val_map.find(item);
-    if (it == tensor_val_map.end()) {
-      MS_LOG(ERROR) << "Cannot find input tensor " << item.Name() << " in tensor val map";
-      return false;
-    }
-    outputs->push_back(*it->second);
-  }
-  return true;
+  return tensorrt_graph_->Execute(inputs, outputs) == RET_OK;
 }

 bool TensorRTExecutor::Resize(const FuncGraphPtr &, const std::vector<tensor::Tensor> &inputs,
                              const std::vector<std::vector<int64_t>> &new_shapes) {
-  if (tensorrt_graph_list_.size() == 1) {
-    auto &sub_graph = tensorrt_graph_list_[0];
-    if (sub_graph.sub_graph == nullptr) {
-      MS_LOG(ERROR) << "TensorRT subgraph is nullptr.";
-      return false;
-    }
-    return sub_graph.sub_graph->Resize(inputs, new_shapes) == RET_OK;
+  if (tensorrt_graph_ == nullptr) {
+    MS_LOG(ERROR) << "TensorRT subgraph is nullptr.";
+    return false;
  }
-  return false;
+  return tensorrt_graph_->Resize(inputs, new_shapes) == RET_OK;
 }

 std::vector<tensor::Tensor> TensorRTExecutor::GetInputInfos(const FuncGraphPtr &) {
@ -733,9 +615,12 @@ std::vector<tensor::Tensor> TensorRTExecutor::GetOutputInfos(const FuncGraphPtr
  return tensors;
 }

-static std::shared_ptr<device::GraphExecutor> TensorRTGraphExecutorCreator(const std::shared_ptr<Context> &ctx) {
-  auto executor = std::make_shared<TensorRTExecutor>(ctx);
-  executor->Init();
+static std::shared_ptr<device::GraphExecutor> TensorRTGraphExecutorCreator(const std::shared_ptr<Context> &ctx,
+                                                                           const ConfigInfos &config_infos) {
+  auto executor = std::make_shared<TensorRTExecutor>(ctx, config_infos);
+  if (!executor->Init()) {
+    return nullptr;
+  }
  return executor;
 }

--- a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_graph_executor.h
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_graph_executor.h
@ -43,13 +43,11 @@ struct TrtGraphContext {

 class TensorRTExecutor : public LiteGraphExecutor {
 public:
-  TensorRTExecutor(const std::shared_ptr<mindspore::Context> &context, const std::string &cache_model_path,
-                   size_t vocab_size, size_t device_cache_size, const std::map<std::string, std::string> &ms_cache);
-  explicit TensorRTExecutor(const std::shared_ptr<mindspore::Context> &context);
+  TensorRTExecutor(const std::shared_ptr<mindspore::Context> &context, const ConfigInfos &config_infos);

  ~TensorRTExecutor() override;

-  Status Init();
+  bool Init();

  bool CompileGraph(const FuncGraphPtr &graph, const std::map<string, string> &compile_options) override;
  bool RunGraph(const FuncGraphPtr &graph, const std::vector<tensor::Tensor> &inputs,
@ -61,8 +59,9 @@ class TensorRTExecutor : public LiteGraphExecutor {
  std::vector<tensor::Tensor> GetOutputInfos(const FuncGraphPtr &) override;

 private:
+  int ParseOptimizationProfile();
+
  Status BuildSubGraph(const KernelGraphPtr &graph);
-  Status UpdateTrtSubGraphInputsDepend();

  TensorRTOp *FindTensorRTOp(const CNodePtr &cnode, const BaseOperatorPtr &base_operator,
                             const std::vector<TensorInfo> &input_tensors,
@ -74,6 +73,7 @@ class TensorRTExecutor : public LiteGraphExecutor {
                                                        const std::vector<TensorInfo> &outputs);

  std::shared_ptr<mindspore::Context> context_{nullptr};
+  ConfigInfos config_infos_;
  std::shared_ptr<GPUDeviceInfo> device_info_{nullptr};
  TensorRTRuntime *runtime_{nullptr};
  bool support_hw_resize_{true};
@ -88,12 +88,9 @@ class TensorRTExecutor : public LiteGraphExecutor {

  std::vector<kernel::Kernel> kernel_list_;

-  std::vector<TrtGraphContext> tensorrt_graph_list_;
-
-  std::vector<nvinfer1::Dims> min_dims_;
-  std::vector<nvinfer1::Dims> opt_dims_;
-  std::vector<nvinfer1::Dims> max_dims_;
+  ProfileConfigs trt_profile_configs_;

+  std::shared_ptr<TensorRTSubGraph> tensorrt_graph_ = nullptr;
  std::vector<TensorInfo> inputs_;
  std::vector<TensorInfo> outputs_;
 };
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_plugin_impl.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_plugin_impl.cc
@ -14,13 +14,27 @@
 * limitations under the License.
 */
 #include "extendrt/delegate/tensorrt/tensorrt_plugin_impl.h"
+#ifdef LITE_CUDA_DISTRIBUTION
 #include "extendrt/delegate/tensorrt/distribution/distribution_base.h"
 #include "plugin/device/gpu/hal/device/distribution/collective_wrapper.h"
+#endif

 namespace mindspore::lite {
-int TensorRTPluginImpl::GetGPUGroupSize() const { return GetGroupSize(NCCL_WORLD_GROUP); }
+int TensorRTPluginImpl::GetGPUGroupSize() const {
+#ifdef LITE_CUDA_DISTRIBUTION
+  return GetGroupSize(NCCL_WORLD_GROUP);
+#else
+  return 1;
+#endif
+}

-int TensorRTPluginImpl::GetRankID() const { return GetRankIDByGroup(NCCL_WORLD_GROUP); }
+int TensorRTPluginImpl::GetRankID() const {
+#ifdef LITE_CUDA_DISTRIBUTION
+  return GetRankIDByGroup(NCCL_WORLD_GROUP);
+#else
+  return 0;
+#endif
+}
 }  // namespace mindspore::lite

 mindspore::lite::TensorRTExecutorPluginImplBase *CreateTensorRTPluginImpl() {
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_subgraph.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_subgraph.cc
@ -24,6 +24,8 @@
 #include <numeric>
 #include <functional>
 #include <fstream>
+#include <limits>
+#include <unordered_map>
 #include "src/extendrt/delegate/delegate_utils.h"
 #include "src/common/utils.h"

@ -32,25 +34,23 @@
 #include "ops/strided_slice.h"
 #include "ops/expand_dims.h"
 #include "ops/fusion/topk_fusion.h"
+#include "ops/broadcast_to.h"

 namespace mindspore::lite {
 TensorRTSubGraph::TensorRTSubGraph(std::vector<TensorRTOp *> ops, const std::vector<TensorInfo> &inputs,
                                   const std::vector<TensorInfo> &outputs, const mindspore::Context *ctx,
                                   std::shared_ptr<GPUDeviceInfo> device_info, TensorRTRuntime *runtime,
                                   bool support_resize, bool support_hw_resize,
-                                   const std::vector<nvinfer1::Dims> &min_dims,
-                                   const std::vector<nvinfer1::Dims> &opt_dims,
-                                   const std::vector<nvinfer1::Dims> &max_dims)
+                                   const ProfileConfigs &trt_profile_config)
    : inputs_(inputs),
      outputs_(outputs),
      all_ops_(std::move(ops)),
      device_info_(device_info),
      runtime_(runtime),
-      min_dims_(min_dims),
-      opt_dims_(opt_dims),
-      max_dims_(max_dims) {
+      trt_profile_config_(trt_profile_config) {
  trt_specific_weight_handled_inner_ = {
-    ops::kNameTranspose, ops::kNameReshape, ops::kNameStridedSlice, ops::kNameExpandDims, ops::kNameTopKFusion,
+    ops::kNameTranspose,  ops::kNameReshape,    ops::kNameStridedSlice,
+    ops::kNameExpandDims, ops::kNameTopKFusion, ops::kNameBroadcastTo,
  };
  if (!support_resize) {
    input_batchsize_index_ = -1;
@ -86,26 +86,55 @@ TensorRTSubGraph::~TensorRTSubGraph() {
  }
 }

+bool TensorRTSubGraph::IsValidProfileDims() const {
+  if (trt_profile_config_.profiles.empty()) {
+    MS_LOG(INFO) << "Number of profiles is 0.";
+    return false;
+  }
+  for (auto &profile : trt_profile_config_.profiles) {
+    if (profile.inputs.size() != trt_profile_config_.input_infos.size()) {
+      MS_LOG(WARNING) << "Profile input size " << profile.inputs.size() << " != input shape size "
+                      << trt_profile_config_.input_infos.size();
+      return false;
+    }
+    for (size_t i = 0; i < profile.inputs.size(); i++) {
+      const auto &profile_input = profile.inputs[i];
+      const auto &input_info = trt_profile_config_.input_infos[i];
+      if (profile_input.min_dims.size() != input_info.input_shape.size()) {
+        MS_LOG(WARNING) << "Profile input " << input_info.name << " min dims number " << profile_input.min_dims.size()
+                        << " != input shape dim number " << input_info.input_shape.size();
+        return false;
+      }
+      if (profile_input.max_dims.size() != input_info.input_shape.size()) {
+        MS_LOG(WARNING) << "Profile input " << input_info.name << " max dims number " << profile_input.max_dims.size()
+                        << " != input shape dim number " << input_info.input_shape.size();
+        return false;
+      }
+      if (profile_input.opt_dims.size() != input_info.input_shape.size()) {
+        MS_LOG(WARNING) << "Profile input " << input_info.name << " opt dims number " << profile_input.opt_dims.size()
+                        << " != input shape dim number " << input_info.input_shape.size();
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
 int TensorRTSubGraph::Init(cudaStream_t stream, cublasHandle_t cublas_handle, cublasLtHandle_t cublaslt_handle) {
  auto ret = GetGraphInOutOps(inputs_, outputs_, &in_ops_, &out_ops_, all_ops_);
  if (ret != RET_OK) {
    MS_LOG(ERROR) << "Get TensorRT subgraph input and output ops failed.";
    return RET_ERROR;
  }
-  profile_ = runtime_->GetBuilder()->createOptimizationProfile();
-  if (profile_ == nullptr) {
-    MS_LOG(ERROR) << "createOptimizationProfile failed.";
-    return RET_ERROR;
-  }
  ctx_ = new (std::nothrow) TensorRTContext();
  if (ctx_ == nullptr) {
    MS_LOG(ERROR) << "New TensorRTContext failed.";
-    return RET_OK;
+    return RET_ERROR;
  }
  ctx_->SetRuntime(runtime_);
  if (!ctx_->Init()) {
    MS_LOG(ERROR) << "New TensorRTContext failed.";
-    return RET_OK;
+    return RET_ERROR;
  }
  if (SetDeviceConfig(stream, cublas_handle, cublaslt_handle) != RET_OK) {
    MS_LOG(WARNING) << "set tensorrt config failed.";
@ -115,6 +144,20 @@ int TensorRTSubGraph::Init(cudaStream_t stream, cublasHandle_t cublas_handle, cu
    MS_LOG(ERROR) << "create Serializer failed.";
    return RET_ERROR;
  }
+  using_input_ranges_ = IsValidProfileDims();
+  if (using_input_ranges_) {
+    for (size_t i = 0; i != trt_profile_config_.profiles.size(); ++i) {
+      profiles_.push_back(runtime_->GetBuilder()->createOptimizationProfile());
+    }
+  } else {
+    profiles_.push_back(runtime_->GetBuilder()->createOptimizationProfile());
+  }
+  for (size_t i = 0; i != profiles_.size(); ++i) {
+    if (profiles_[i] == nullptr) {
+      MS_LOG(ERROR) << "create optimization profile failed.";
+      return RET_ERROR;
+    }
+  }
  engine_ = serializer_->GetSerializedEngine();
  if (engine_ != nullptr) {
    MS_LOG(INFO) << "using serialized engine " << serialize_file_path_;
@ -130,9 +173,11 @@ int TensorRTSubGraph::Init(cudaStream_t stream, cublasHandle_t cublas_handle, cu

 int TensorRTSubGraph::BuildEngine() {
  // print all network ops
-  if (this->config_->addOptimizationProfile(profile_) == -1) {
-    MS_LOG(ERROR) << "addOptimizationProfile failed.";
-    return RET_ERROR;
+  for (auto &profile : profiles_) {
+    if (this->config_->addOptimizationProfile(profile) == -1) {
+      MS_LOG(ERROR) << "addOptimizationProfile failed.";
+      return RET_ERROR;
+    }
  }
  MS_LOG(INFO) << "build engine for tensorrt network: " << ctx_->network()->getName();
  for (int i = 0; i < ctx_->network()->getNbLayers(); i++) {
@ -184,7 +229,8 @@ int TensorRTSubGraph::SetDeviceConfig(cudaStream_t stream, cublasHandle_t cublas
  MS_LOG(INFO) << GetRankID() << " tensorrt subgraph stream: " << stream_;

  // config setMaxWorkspaceSize to 2047 MB for max limit
-  config_->setMaxWorkspaceSize(2047 * (1 << 20));
+  constexpr size_t kWorkspaceSize = 2047 * (1 << 20);
+  config_->setMaxWorkspaceSize(kWorkspaceSize);
  return RET_OK;
 }

@ -197,7 +243,10 @@ bool TensorRTSubGraph::IsInt8Mode() {
  return false;
 }

-nvinfer1::ITensor *TensorRTSubGraph::SetTensorRTNetworkInput(const TensorInfo &in_tensor, size_t index) {
+nvinfer1::ITensor *TensorRTSubGraph::SetTensorRTNetworkInput(const TensorInfo &in_tensor, int index) {
+  if (index < 0) {
+    return nullptr;
+  }
  for (int i = 0; i < ctx_->network()->getNbInputs(); i++) {
    if (in_tensor.Name().compare(ctx_->network()->getInput(i)->getName()) == 0) {
      MS_LOG(INFO) << "input tensor is already added in network: " << in_tensor.Name();
@ -211,68 +260,48 @@ nvinfer1::ITensor *TensorRTSubGraph::SetTensorRTNetworkInput(const TensorInfo &i
    return nullptr;
  }
  nvinfer1::Dims input_dims;
-  if (min_dims_.size() != 0 && min_dims_.size() == opt_dims_.size() && min_dims_.size() == max_dims_.size()) {
+  if (using_input_ranges_) {
    input_dims = SetInputDimsProfile(in_tensor, index);
  } else {
-    input_dims = ParseInputDimsProfile(in_tensor);
+    input_dims = ParseInputDimsProfile(in_tensor, index);
  }
  MS_LOG(INFO) << "add network input: " << in_tensor.Name();
  return ctx_->network()->addInput(in_tensor.Name().c_str(), cuda_dtype, input_dims);
 }

-nvinfer1::Dims TensorRTSubGraph::SetInputDimsProfile(const TensorInfo &in_tensor, size_t index) {
-  nvinfer1::Dims input_dims;
-  input_dims.nbDims = min_dims_[index].nbDims;
-  for (int i = 0; i != input_dims.nbDims; ++i) {
-    input_dims.d[i] = (max_dims_[index].d[i] != min_dims_[index].d[i]) ? -1 : min_dims_[index].d[i];
+nvinfer1::Dims TensorRTSubGraph::SetInputDimsProfile(const TensorInfo &in_tensor, int index) {
+  auto input_info = trt_profile_config_.input_infos[index];
+  auto input_dims = ConvertCudaDims(input_info.input_shape);
+  DebugDims("input dims", input_dims);
+  for (size_t i = 0; i < trt_profile_config_.profiles.size(); i++) {
+    auto &profile = trt_profile_config_.profiles[i];
+    auto min_dims = ConvertCudaDims(profile.inputs[index].min_dims);
+    if (!profiles_[i]->setDimensions(input_info.name.c_str(), nvinfer1::OptProfileSelector::kMIN, min_dims)) {
+      MS_LOG(ERROR) << "setDimensions of kMIN failed for " << input_info.name;
+      return input_dims;
+    }
+    auto opt_dims = ConvertCudaDims(profile.inputs[index].opt_dims);
+    if (!profiles_[i]->setDimensions(input_info.name.c_str(), nvinfer1::OptProfileSelector::kOPT, opt_dims)) {
+      MS_LOG(ERROR) << "setDimensions of kOPT failed for " << input_info.name;
+      return input_dims;
+    }
+
+    auto max_dims = ConvertCudaDims(profile.inputs[index].max_dims);
+    if (!profiles_[i]->setDimensions(input_info.name.c_str(), nvinfer1::OptProfileSelector::kMAX, max_dims)) {
+      MS_LOG(ERROR) << "setDimensions of kMAX failed for " << input_info.name;
+      return input_dims;
+    }
+    DebugDims("min dims", min_dims);
+    DebugDims("opt dims", opt_dims);
+    DebugDims("max dims", max_dims);
  }
-  if (profile_ == nullptr) {
-    MS_LOG(ERROR) << "profile is null.";
-    return input_dims;
-  }
-  if (!profile_->setDimensions(in_tensor.Name().c_str(), nvinfer1::OptProfileSelector::kMIN, min_dims_[index])) {
-    MS_LOG(ERROR) << "setDimensions of kMIN failed for " << in_tensor.Name();
-    return input_dims;
-  }
-  if (!profile_->setDimensions(in_tensor.Name().c_str(), nvinfer1::OptProfileSelector::kOPT, opt_dims_[index])) {
-    MS_LOG(ERROR) << "setDimensions of kOPT failed for " << in_tensor.Name();
-    return input_dims;
-  }
-  if (!profile_->setDimensions(in_tensor.Name().c_str(), nvinfer1::OptProfileSelector::kMAX, max_dims_[index])) {
-    MS_LOG(ERROR) << "setDimensions of kMAX failed for " << in_tensor.Name();
-    return input_dims;
-  }
-  DebugDims(min_dims_[index]);
-  DebugDims(opt_dims_[index]);
-  DebugDims(max_dims_[index]);
  return input_dims;
 }

-nvinfer1::Dims TensorRTSubGraph::ParseInputDimsProfile(const TensorInfo &in_tensor) {
+nvinfer1::Dims TensorRTSubGraph::ParseInputDimsProfile(const TensorInfo &in_tensor, int index) {
  nvinfer1::Dims input_dims = ConvertCudaDims(in_tensor.Shape());
-  if (profile_ == nullptr) {
-    MS_LOG(ERROR) << "profile is null.";
-    return input_dims;
-  }
-  if (runtime_->GetBatchSize() == 0) {
-    runtime_->SetBatchSize(input_dims.d[0]);
-    MS_LOG(INFO) << "batch size init as " << runtime_->GetBatchSize();
-    if (input_batchsize_index_ != -1) {
-      input_dims.d[0] = -1;  // dynamic batch size with wildcard N, default batchsize is first dims
-      input_batchsize_index_ = 0;
-    }
-  } else {
-    if (input_batchsize_index_ != -1) {
-      for (int n = 0; n < input_dims.nbDims; n++) {
-        if (input_dims.d[n] == runtime_->GetBatchSize()) {
-          runtime_->SetBatchSize(std::max(input_dims.d[0], runtime_->GetBatchSize()));
-          // first dims equals to batchsize
-          input_dims.d[n] = -1;
-          input_batchsize_index_ = n;
-          break;
-        }
-      }
-    }
+  if (input_batchsize_index_ != -1) {
+    input_dims.d[0] = -1;
  }
  // only support NHWC HW dim resize
  if (input_hw_index_ != -1) {
@ -281,42 +310,50 @@ nvinfer1::Dims TensorRTSubGraph::ParseInputDimsProfile(const TensorInfo &in_tens
    input_dims.d[input_hw_index_] = -1;
    input_dims.d[input_hw_index_ + 1] = -1;
  }
-  auto shape = in_tensor.Shape();
  // We do not need to check the return of setDimension and addOptimizationProfile here as all dims are explicitly set
-  nvinfer1::Dims input_dims_min = ConvertCudaDims(shape);
+  nvinfer1::Dims input_dims_min = ConvertCudaDims(in_tensor.Shape());
  if (input_batchsize_index_ != -1) {
-    input_dims_min.d[input_batchsize_index_] = 1;
+    input_dims_min.d[0] = 1;
    if (input_hw_index_ != -1) {
      input_dims_min.d[input_hw_index_] = 1;
      input_dims_min.d[input_hw_index_ + 1] = 1;
    }
  }
-  if (!profile_->setDimensions(in_tensor.Name().c_str(), nvinfer1::OptProfileSelector::kMIN, input_dims_min)) {
+  if (!profiles_.front()->setDimensions(in_tensor.Name().c_str(), nvinfer1::OptProfileSelector::kMIN, input_dims_min)) {
    MS_LOG(ERROR) << "setDimensions of kMIN failed for " << in_tensor.Name();
    return input_dims;
  }
-  nvinfer1::Dims input_dims_opt = ConvertCudaDims(shape);
-  if (!profile_->setDimensions(in_tensor.Name().c_str(), nvinfer1::OptProfileSelector::kOPT, input_dims_opt)) {
+  nvinfer1::Dims input_dims_opt = ConvertCudaDims(in_tensor.Shape());
+  if (!profiles_.front()->setDimensions(in_tensor.Name().c_str(), nvinfer1::OptProfileSelector::kOPT, input_dims_opt)) {
    MS_LOG(ERROR) << "setDimensions of kOPT failed for " << in_tensor.Name();
    return input_dims;
  }
-  nvinfer1::Dims input_dims_max = ConvertCudaDims(shape);
+  nvinfer1::Dims input_dims_max = ConvertCudaDims(in_tensor.Shape());
  // input_dims_max should be the same with input network dims
-  if (!profile_->setDimensions(in_tensor.Name().c_str(), nvinfer1::OptProfileSelector::kMAX, input_dims_max)) {
+  if (!profiles_.front()->setDimensions(in_tensor.Name().c_str(), nvinfer1::OptProfileSelector::kMAX, input_dims_max)) {
    MS_LOG(ERROR) << "setDimensions of kMAX failed for " << in_tensor.Name();
    return input_dims;
  }
-  DebugDims(input_dims);
-  DebugDims(input_dims_min);
-  DebugDims(input_dims_opt);
-  DebugDims(input_dims_max);
+  if (trt_profile_config_.profiles.empty()) {
+    ProfileItem profile_item;
+    profile_item.inputs.resize(inputs_.size());
+    trt_profile_config_.profiles.push_back(profile_item);
+  }
+  auto &profile_item = trt_profile_config_.profiles.back();
+  profile_item.inputs[index].min_dims = ConvertMSShape(input_dims_min);
+  profile_item.inputs[index].opt_dims = ConvertMSShape(input_dims_opt);
+  profile_item.inputs[index].max_dims = ConvertMSShape(input_dims_max);
+
+  DebugDims("input min dims", input_dims_min);
+  DebugDims("input opt dims", input_dims_opt);
+  DebugDims("input max dims", input_dims_max);
  return input_dims;
 }

 int TensorRTSubGraph::ParseInputsProfile() {
  MS_LOG(INFO) << "using serialied engine.";
-  for (auto in_tensor : inputs()) {
-    auto dim = ParseInputDimsProfile(in_tensor);
+  for (size_t i = 0; i < inputs_.size(); i++) {
+    auto dim = ParseInputDimsProfile(inputs_[i], i);
    if (dim.nbDims <= 0) {
      MS_LOG(ERROR) << "input dims is invalid.";
      return RET_ERROR;
@ -325,6 +362,15 @@ int TensorRTSubGraph::ParseInputsProfile() {
  return RET_OK;
 }

+int TensorRTSubGraph::GetInputIndexByName(const std::string &name) {
+  for (size_t i = 0; i != inputs().size(); ++i) {
+    if (inputs()[i].Name() == name) {
+      return i;
+    }
+  }
+  return -1;
+}
+
 int TensorRTSubGraph::BuildTensorRTGraph() {
  MS_ASSERT(!all_ops_.empty());
  int ret;
@ -338,7 +384,7 @@ int TensorRTSubGraph::BuildTensorRTGraph() {
      // Data From CPU
      auto in_tensor = cur_op->inputs()[i];
      if (IsSubGraphInputTensor(this->inputs(), in_tensor)) {
-        nvinfer1::ITensor *trt_tensor = SetTensorRTNetworkInput(in_tensor, i);
+        nvinfer1::ITensor *trt_tensor = SetTensorRTNetworkInput(in_tensor, GetInputIndexByName(in_tensor.Name()));
        if (trt_tensor == nullptr) {
          MS_LOG(ERROR) << "SetTensorRTNetworkInput failed for " << in_tensor.Name();
          return RET_ERROR;
@ -442,7 +488,7 @@ int TensorRTSubGraph::Prepare() {
    return RET_ERROR;
  }
  int binding_num = this->engine_->getNbBindings();
-  if (binding_num < 0) {
+  if (binding_num <= 0) {
    MS_LOG(ERROR) << "TensorRTSubGraph binding num < 0.";
    return RET_ERROR;
  }
@ -452,28 +498,36 @@ int TensorRTSubGraph::Prepare() {
    return RET_ERROR;
  }

-  for (auto tensor : inputs_) {
-    auto device_ptr = runtime_->GetAllocator()->MallocDeviceMem(tensor, tensor.DataSize());
+  profile_index_ = MaxVolumnProfileIndex();
+  if (this->trt_context_->setOptimizationProfile(profile_index_)) {
+    MS_LOG(INFO) << "setOptimizationProfile: " << profile_index_;
+  }
+  const auto &profile = trt_profile_config_.profiles[profile_index_];
+  for (size_t i = 0; i != inputs_.size(); ++i) {
+    auto &tensor = inputs_[i];
+    auto max_profile_dims = profile.inputs[i].max_dims;
+    tensor.SetShape(max_profile_dims);
+    int volumn = std::accumulate(max_profile_dims.begin(), max_profile_dims.end(), 1, std::multiplies<int>());
+    auto type_size = lite::DataTypeSize(static_cast<enum TypeId>(tensor.DataType()));
+    auto device_ptr = runtime_->GetAllocator()->MallocDeviceMem(tensor, volumn * type_size);
    if (device_ptr == nullptr) {
      MS_LOG(ERROR) << "malloc for inputs tensor device memory failed.";
      return RET_ERROR;
    }
-    runtime_->SetBatchSize(tensor.Shape()[0]);
-    int index = this->engine_->getBindingIndex(tensor.Name().c_str());
-    MS_LOG(INFO) << "device index " << index << " for tensor : " << tensor.Name() << " attr: " << device_ptr;
+    auto tensor_name = tensor.Name();
+    trt_in_tensor_name_.push_back(tensor_name);
+    int index = GetProfileBindingIndex(tensor_name, profile_index_);
+    MS_LOG(INFO) << "device index " << index << " for tensor : " << tensor_name << " attr: " << device_ptr;
    tensor_bindings_[index] = device_ptr;
-    trt_in_tensor_name_.push_back(tensor.Name());
-    nvinfer1::Dims input_dims = ConvertCudaDims(tensor.Shape());
+    nvinfer1::Dims input_dims = ConvertCudaDims(profile.inputs[i].max_dims);
    for (int od = 0; od < input_dims.nbDims; od++) {
      MS_LOG(DEBUG) << "in tensor " << tensor.Name() << " dims at " << od << " is " << input_dims.d[od];
    }
-
    if (!this->trt_context_->setBindingDimensions(index, input_dims)) {
      MS_LOG(ERROR) << "invalid input dims of " << tensor.Name();
      return RET_ERROR;
    }
  }
-
  if (!this->trt_context_->allInputDimensionsSpecified()) {
    MS_LOG(ERROR) << "input dims need to be specified.";
    return RET_ERROR;
@ -486,30 +540,91 @@ int TensorRTSubGraph::Prepare() {
    }
  }
  for (auto &tensor : outputs_) {
-    int index = this->engine_->getBindingIndex(tensor.Name().c_str());
-    auto out_dims = trt_context_->getBindingDimensions(index);
+    int max_index = GetProfileBindingIndex(tensor.Name(), profile_index_);
+    auto out_dims = trt_context_->getBindingDimensions(max_index);
    int elem_num = std::accumulate(out_dims.d, out_dims.d + out_dims.nbDims, 1, std::multiplies<int>());
-    DebugDims(out_dims);
-    auto new_shape = lite::ConvertMSShape(out_dims);
-    MS_LOG(INFO) << "Set output shape of " << tensor.Name() << " to " << new_shape << "  by tensorrt binding output";
-    tensor.SetShape(new_shape);
-    auto type_size = DataTypeSize(static_cast<enum TypeId>(tensor.DataType()));
+    DebugDims("out dims", out_dims);
+    MS_LOG(INFO) << "Set output shape by tensorrt binding output";
+    tensor.SetShape(lite::ConvertMSShape(out_dims));
+    auto type_size = lite::DataTypeSize(static_cast<enum TypeId>(tensor.DataType()));
    auto device_ptr = runtime_->GetAllocator()->MallocDeviceMem(tensor, elem_num * type_size);
    if (device_ptr == nullptr) {
      MS_LOG(ERROR) << "malloc for outputs tensor device memory failed.";
      return RET_ERROR;
    }
-    tensor_bindings_[index] = device_ptr;
+    for (size_t j = 0; j != profiles_.size(); ++j) {
+      int index = GetProfileBindingIndex(tensor.Name(), j);
+      tensor_bindings_[index] = device_ptr;
+    }
    trt_out_tensor_name_.push_back(tensor.Name());
  }
  return RET_OK;
 }

+int TensorRTSubGraph::SelectProfile(const std::vector<ShapeVector> &new_shapes) const {
+  std::vector<int> profile_index;
+  for (size_t i = 0; i < profiles_.size(); ++i) {
+    const auto &profile = trt_profile_config_.profiles[i];
+    bool condition = true;
+    for (size_t j = 0; j < trt_in_tensor_name_.size(); ++j) {
+      auto new_shape = new_shapes[j];
+      auto profile_input = profile.inputs[j];
+      if (new_shape.size() != profile_input.max_dims.size()) {
+        condition = false;
+      } else {
+        for (size_t od = 0; od < new_shape.size(); od++) {
+          if (new_shape[od] < profile_input.min_dims[od] || new_shape[od] > profile_input.max_dims[od]) {
+            condition = false;
+            break;
+          }
+        }
+      }
+    }
+    if (condition) {
+      profile_index.push_back(i);
+    }
+  }
+  return profile_index.empty() ? -1 : profile_index.front();
+}
+
+size_t TensorRTSubGraph::MaxVolumnProfileIndex() const {
+  int max_volumn = std::numeric_limits<int>::min();
+  size_t max_volumn_index = 0;
+  for (size_t i = 0; i < trt_profile_config_.profiles.size(); ++i) {
+    const auto &profile = trt_profile_config_.profiles[i];
+    // depend on the first input tensor
+    int64_t volumn = std::accumulate(profile.inputs[0].max_dims.begin(), profile.inputs[0].max_dims.end(), 1,
+                                     std::multiplies<int64_t>());
+    if (volumn > max_volumn) {
+      max_volumn_index = i;
+      max_volumn = volumn;
+    }
+  }
+  return max_volumn_index;
+}
+
+int TensorRTSubGraph::GetProfileBindingIndex(const std::string &name, size_t profile_index) {
+  std::string binding_name = name;
+  if (profile_index != 0) {
+    binding_name += " [profile " + std::to_string(profile_index) + "]";
+  }
+  return this->engine_->getBindingIndex(binding_name.c_str());
+}
+
 int TensorRTSubGraph::OnNewInputShapes(const std::vector<ShapeVector> &new_shapes) {
  if (inputs_.size() != new_shapes.size()) {
    MS_LOG(ERROR) << "Graph inputs size " << inputs_.size() << " != resize input size " << new_shapes.size();
    return RET_ERROR;
  }
+  auto select_profile_index = SelectProfile(new_shapes);
+  if (select_profile_index < 0) {
+    MS_LOG(ERROR) << "Not support input shape " << new_shapes;
+    return RET_ERROR;
+  }
+  profile_index_ = static_cast<size_t>(select_profile_index);
+  if (this->trt_context_->setOptimizationProfile(profile_index_)) {
+    MS_LOG(INFO) << "setOptimizationProfile: " << profile_index_;
+  }
  int batch_size = -1;
  for (size_t i = 0; i < trt_in_tensor_name_.size(); i++) {
    if (inputs_[i].Shape() == new_shapes[i]) {
@ -543,9 +658,8 @@ int TensorRTSubGraph::OnNewInputShapes(const std::vector<ShapeVector> &new_shape
      return RET_ERROR;
    }
    batch_size = new_batch_size;
-    runtime_->SetBatchSize(batch_size);

-    int index = this->engine_->getBindingIndex(trt_in_tensor_name_[i].c_str());
+    int index = GetProfileBindingIndex(trt_in_tensor_name_[i], profile_index_);
    // Set actual input size
    nvinfer1::Dims input_dims = ConvertCudaDims(inputs_[i].Shape());
    for (int od = 0; od < input_dims.nbDims; od++) {
@ -563,8 +677,9 @@ int TensorRTSubGraph::OnNewInputShapes(const std::vector<ShapeVector> &new_shape
  }
  if (batch_size != -1) {
    for (size_t i = 0; i < trt_out_tensor_name_.size(); i++) {
-      int index = this->engine_->getBindingIndex(trt_out_tensor_name_[i].c_str());
+      auto index = GetProfileBindingIndex(trt_out_tensor_name_[i], profile_index_);
      auto out_dims = trt_context_->getBindingDimensions(index);
+      DebugDims("out dims", out_dims);
      auto new_shape = lite::ConvertMSShape(out_dims);
      MS_LOG(INFO) << "Set output shape of " << trt_out_tensor_name_[i] << " to " << new_shape
                   << "  by tensorrt binding output";
@ -610,13 +725,13 @@ int TensorRTSubGraph::PreExecute(const std::vector<tensor::Tensor> &inputs,
      }
      runtime_->GetAllocator()->MarkMemValid(trt_tensor_name, true);
    }
-    int index = this->engine_->getBindingIndex(trt_tensor_name.c_str());
+    int index = GetProfileBindingIndex(trt_tensor_name, profile_index_);
    MS_LOG(INFO) << "device index " << index << " for tensor : " << trt_tensor_name << " attr: " << device_ptr;
    tensor_bindings_[index] = device_ptr;
  }
  for (size_t i = 0; i < trt_out_tensor_name_.size(); i++) {
    const auto &trt_out_tensor_name = trt_out_tensor_name_[i];
-    int index = this->engine_->getBindingIndex(trt_out_tensor_name.c_str());
+    int index = GetProfileBindingIndex(trt_out_tensor_name, profile_index_);
    void *device_ptr = nullptr;
    if (outputs.size() > i) {
      auto &output = outputs[i];
@ -651,16 +766,10 @@ int TensorRTSubGraph::PostExecute(std::vector<tensor::Tensor> *outputs) {
  auto has_outputs = !outputs->empty();
  for (size_t i = 0; i < trt_out_tensor_name_.size(); i++) {
    const auto &trt_out_tensor_name = trt_out_tensor_name_[i];
-    int index = this->engine_->getBindingIndex(trt_out_tensor_name.c_str());
+    auto index = GetProfileBindingIndex(trt_out_tensor_name, profile_index_);
    // actual output tensor dims
    auto out_dims = this->trt_context_->getBindingDimensions(index);
    std::vector<int64_t> new_shape = lite::ConvertMSShape(out_dims);
-    // batchsize resize need set new batch size
-    if (input_batchsize_index_ != -1) {
-      if (runtime_->GetBatchSize() != new_shape[output_batchsize_index_]) {
-        new_shape[output_batchsize_index_] = runtime_->GetBatchSize();
-      }
-    }
    outputs_[i].SetShape(new_shape);
    for (int od = 0; od < out_dims.nbDims; od++) {
      MS_LOG(DEBUG) << "out tensor " << trt_out_tensor_name << " dims at " << od << " is " << new_shape[od];
@ -712,20 +821,6 @@ bool TensorRTSubGraph::ValidInputResizeDims(const nvinfer1::Dims &construct_dims
    MS_LOG(ERROR) << "invalid resize input.";
    return false;
  }
-  if (input_hw_index_ == -1) {
-    // only NHWC format support HW resize, otherwise only support batchsize resize
-    for (int d = 0; d < construct_dims.nbDims; d++) {
-      if (d != input_batchsize_index_ && construct_dims.d[d] != resize_input_shape[d]) {
-        MS_LOG(ERROR) << "only support dynamic batch size resize input.";
-        return false;
-      }
-    }
-  } else if ((input_hw_index_ == 1 && construct_dims.d[DIMENSION_3D] != resize_input_shape[DIMENSION_3D]) ||
-             (input_hw_index_ == DIMENSION_2D && construct_dims.d[1] != resize_input_shape[1])) {
-    // input may be nhwc || nchw
-    MS_LOG(ERROR) << "don't support dynamic channel resize input.";
-    return false;
-  }
  return true;
 }

--- a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_subgraph.h
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_subgraph.h
@ -28,6 +28,7 @@
 #include "src/extendrt/delegate/tensorrt/op/tensorrt_op.h"
 #include "src/extendrt/delegate/parameter_cache/embedding_cache_manager.h"
 #include "include/api/context.h"
+#include "common/config_infos.h"

 namespace mindspore::lite {
 using mindspore::lite::RET_ERROR;
@ -42,8 +43,7 @@ class TensorRTSubGraph {
  TensorRTSubGraph(std::vector<TensorRTOp *> ops, const std::vector<TensorInfo> &inputs,
                   const std::vector<TensorInfo> &outputs, const mindspore::Context *ctx,
                   std::shared_ptr<GPUDeviceInfo> device_info, TensorRTRuntime *runtime, bool support_resize,
-                   bool support_hw_resize, const std::vector<nvinfer1::Dims> &min_dims,
-                   const std::vector<nvinfer1::Dims> &opt_dims, const std::vector<nvinfer1::Dims> &max_dims);
+                   bool support_hw_resize, const ProfileConfigs &trt_profile_config);
  ~TensorRTSubGraph();

  int Prepare();
@ -63,6 +63,7 @@ class TensorRTSubGraph {
  std::vector<TensorInfo> &outputs() { return outputs_; }

 private:
+  int GetInputIndexByName(const std::string &name);
  int BuildEngine();

  int SetDeviceConfig(cudaStream_t stream, cublasHandle_t cublas_handle, cublasLtHandle_t cublaslt_handle);
@ -71,7 +72,7 @@ class TensorRTSubGraph {

  bool SupportFP16();

-  nvinfer1::ITensor *SetTensorRTNetworkInput(const TensorInfo &in_tensor, size_t index);
+  nvinfer1::ITensor *SetTensorRTNetworkInput(const TensorInfo &in_tensor, int index);

  ITensorHelper FindTensorRTInputs(TensorRTOp *cur_op, const TensorInfo &in_tensor);

@ -85,17 +86,21 @@ class TensorRTSubGraph {

  int HandleCacheTensor(TensorRTOp *cur_op, const TensorInfo &in_tensor);

-  nvinfer1::Dims ParseInputDimsProfile(const TensorInfo &in_tensor);
-  nvinfer1::Dims SetInputDimsProfile(const TensorInfo &in_tensor, size_t index);
+  nvinfer1::Dims ParseInputDimsProfile(const TensorInfo &in_tensor, int index);
+  nvinfer1::Dims SetInputDimsProfile(const TensorInfo &in_tensor, int index);
  int ParseInputsProfile();

-  bool ValidInputResizeDims(const nvinfer1::Dims &construct_dims, const std::vector<int64_t> &resize_input_shape);
-
  int PreExecute(const std::vector<tensor::Tensor> &inputs, const std::vector<tensor::Tensor> &outputs);
  int PostExecute(std::vector<tensor::Tensor> *outputs);

  int OnNewInputShapes(const std::vector<ShapeVector> &inputs);

+  size_t MaxVolumnProfileIndex() const;
+  int SelectProfile(const std::vector<ShapeVector> &new_shapes) const;
+  int GetProfileBindingIndex(const std::string &name, size_t profile_index);
+  bool ValidInputResizeDims(const nvinfer1::Dims &construct_dims, const std::vector<int64_t> &resize_input_shape);
+  bool IsValidProfileDims() const;
+
  std::string name_;
  std::vector<TensorInfo> inputs_;
  std::vector<TensorInfo> outputs_;
@ -122,7 +127,6 @@ class TensorRTSubGraph {
  nvinfer1::IBuilderConfig *config_{nullptr};
  nvinfer1::ICudaEngine *engine_{nullptr};
  nvinfer1::IExecutionContext *trt_context_{nullptr};
-  nvinfer1::IOptimizationProfile *profile_{nullptr};

  TensorRTContext *ctx_;

@ -138,9 +142,10 @@ class TensorRTSubGraph {
  std::string serialize_file_path_;
  cudaStream_t stream_{nullptr};

-  std::vector<nvinfer1::Dims> min_dims_;
-  std::vector<nvinfer1::Dims> opt_dims_;
-  std::vector<nvinfer1::Dims> max_dims_;
+  std::vector<nvinfer1::IOptimizationProfile *> profiles_{};
+  bool using_input_ranges_{false};
+  ProfileConfigs trt_profile_config_;
+  size_t profile_index_{0};
 };
 }  // namespace mindspore::lite
 #endif  // MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_TENSORRT_SUBGRAPH_H_
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_utils.cc
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_utils.cc
@ -75,7 +75,6 @@ nvinfer1::Dims ConvertCudaDims(const TensorInfo &ms_tensor) {
  }
  return dims;
 }
-
 bool SameDims(nvinfer1::Dims dims, const std::vector<int64_t> &shape) {
  if (dims.nbDims != static_cast<int>(shape.size())) {
    return false;
@ -240,9 +239,9 @@ std::experimental::optional<ActivationParams> TryConvertActivationType(Activatio
    {ActivationType::RELU6, ActivationParams{nvinfer1::ActivationType::kCLIP, true, 0, true, 6}},
    {ActivationType::RELU1, ActivationParams{nvinfer1::ActivationType::kCLIP, true, 0, true, 1}},
    {ActivationType::HARD_TANH, ActivationParams{nvinfer1::ActivationType::kCLIP, true, -1, true, 1}},
-    {ActivationType::SWISH, ActivationParams{nvinfer1::ActivationType::kSIGMOID, false, 0, false, 0}},
    // using plugin
-    {ActivationType::GELU, ActivationParams{nvinfer1::ActivationType::kTHRESHOLDED_RELU, false, 0, false, 0}}};
+    {ActivationType::GELU, ActivationParams{nvinfer1::ActivationType::kTHRESHOLDED_RELU, false, 0, false, 0}},
+    {ActivationType::SWISH, ActivationParams{nvinfer1::ActivationType::kSIGMOID, false, 0, false, 0}}};
  return action_map.find(activation_type) != action_map.end()
           ? std::experimental::optional<ActivationParams>(action_map[activation_type])
           : std::experimental::nullopt;
@ -270,6 +269,10 @@ nvinfer1::ITensor *ConvertTensorWithExpandDims(TensorRTContext *ctx, const Tenso
    MS_LOG(ERROR) << "network is null for ConvertTensorWithExpandDims";
    return nullptr;
  }
+  if (!ms_tensor.IsConst()) {
+    MS_LOG(ERROR) << "ConvertTensorWithExpandDims from a MSTensor with nullptr data";
+    return nullptr;
+  }
  auto origin_shape = ms_tensor.Shape();
  std::vector<int64_t> convert_shape(expect_shape);
  AlignShapeRank(&origin_shape, convert_shape);
@ -295,10 +298,6 @@ nvinfer1::ITensor *ConvertTensorWithExpandDims(TensorRTContext *ctx, const Tenso
    return nullptr;
  }
  nvinfer1::DataType data_type = ConvertDataType(ms_tensor.DataType());
-  if (!ms_tensor.IsConst()) {
-    MS_LOG(ERROR) << "ConvertTensorWithExpandDims from a MSTensor with nullptr data";
-    return nullptr;
-  }
  nvinfer1::Weights weights{data_type, ms_tensor.Data(), ms_tensor.ElementNum()};
  nvinfer1::IConstantLayer *constant_tensor = ctx->network()->addConstant(dims, weights);
  if (constant_tensor == nullptr) {
@ -709,6 +708,87 @@ int ParseData2Vector(const TensorInfo &ms_tensor, std::vector<float> *dst) {
  return RET_OK;
 }

+nvinfer1::ITensor *ExpandDim(TensorRTContext *ctx, nvinfer1::ITensor *input_tensor, int axis) {
+  // input has to prepocess to nchw
+  auto input_dims = input_tensor->getDimensions();
+  nvinfer1::IShuffleLayer *shuffle_layer = ctx->network()->addShuffle(*input_tensor);
+  // if expand dim not at last dim and shape is dynamic, change to expanddim at last dim and transpose
+  bool special_expand = false;
+  for (int i = 0; i < input_dims.nbDims; i++) {
+    special_expand = special_expand || input_dims.d[i] == -1;
+  }
+  special_expand = special_expand && (axis != -1 && axis != input_dims.nbDims);
+
+  if (special_expand) {
+    std::vector<int64_t> new_shape;
+    for (int i = 0; i < input_dims.nbDims; i++) {
+      new_shape.push_back(input_dims.d[i] == -1 ? 0 : input_dims.d[i]);
+    }
+    new_shape.push_back(1);
+    nvinfer1::Dims new_dims = ConvertCudaDims(new_shape);
+    if (new_dims.nbDims == -1) {
+      return nullptr;
+    }
+
+    shuffle_layer->setReshapeDimensions(new_dims);
+    // transpose
+    nvinfer1::Permutation perm{};
+    for (int i = 0; i < new_dims.nbDims; i++) {
+      if (i < axis) {
+        perm.order[i] = i;
+      } else if (i == axis) {
+        perm.order[i] = new_dims.nbDims - 1;
+      } else {
+        perm.order[i] = i - 1;
+      }
+    }
+    nvinfer1::IShuffleLayer *trans_layer = ctx->network()->addShuffle(*shuffle_layer->getOutput(0));
+    if (trans_layer == nullptr) {
+      MS_LOG(ERROR) << "add transpose layer failed for special expand dims op ";
+      return nullptr;
+    }
+    trans_layer->setFirstTranspose(perm);
+    return trans_layer->getOutput(0);
+  } else {
+    std::vector<int64_t> new_shape;
+    for (int i = 0; i < input_dims.nbDims; i++) {
+      if (axis == i) {
+        new_shape.push_back(1);
+      }
+      new_shape.push_back(input_dims.d[i] == -1 ? 0 : input_dims.d[i]);
+    }
+    if (axis == -1 || axis == input_dims.nbDims) {
+      new_shape.push_back(1);
+    }
+    nvinfer1::Dims new_dims = ConvertCudaDims(new_shape);
+    if (new_dims.nbDims == -1) {
+      return nullptr;
+    }
+    shuffle_layer->setReshapeDimensions(new_dims);
+    return shuffle_layer->getOutput(0);
+  }
+}
+
+nvinfer1::ITensor *Broadcast(TensorRTContext *ctx, nvinfer1::ITensor *input, nvinfer1::ITensor *shape) {
+  int rank = shape->getDimensions().d[0];
+
+  nvinfer1::Dims starts{rank};
+  std::fill(starts.d, starts.d + rank, 0);
+  nvinfer1::Dims strides{rank};
+  std::fill(strides.d, strides.d + rank, 1);
+
+  auto slice_layer = ctx->network()->addSlice(*input, starts, {}, strides);
+  slice_layer->setMode(nvinfer1::SliceMode::kWRAP);
+  const int INPUT2 = 2;
+  slice_layer->setInput(INPUT2, *shape);
+
+  auto shuffler_output = slice_layer->getOutput(0);
+  if (shuffler_output == nullptr) {
+    MS_LOG(ERROR) << "add slice layer failed";
+  }
+  return shuffler_output;
+}
+
 nvinfer1::ITensor *Reshape(TensorRTContext *ctx, nvinfer1::ITensor *input, const std::vector<int64_t> &shape) {
  return Reshape(ctx, input, ConvertCudaDims(shape));
 }
@ -723,10 +803,23 @@ nvinfer1::ITensor *Reshape(TensorRTContext *ctx, nvinfer1::ITensor *input, const
  return reshape_layer->getOutput(0);
 }

-void DebugDims(const nvinfer1::Dims &dims) {
-  MS_LOG(DEBUG) << "nbdim : " << dims.nbDims;
+void DebugDims(const std::string &key, const nvinfer1::Dims &dims) {
+  MS_LOG(DEBUG) << key << ":" << dims.nbDims;
  for (int i = 0; i != dims.nbDims; ++i) {
    MS_LOG(DEBUG) << dims.d[i];
  }
 }
+
+template <>
+nvinfer1::DataType GetNvinferDataType<float>() {
+  return nvinfer1::DataType::kFLOAT;
+}
+
+template <>
+nvinfer1::DataType GetNvinferDataType<int>() {
+  return nvinfer1::DataType::kINT32;
+}
+
+template nvinfer1::DataType GetNvinferDataType<float>();
+template nvinfer1::DataType GetNvinferDataType<int>();
 }  // namespace mindspore::lite
--- a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_utils.h
+++ b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_utils.h
@ -138,7 +138,14 @@ nvinfer1::ITensor *ConvertConstantTensor1D(TensorRTContext *ctx, int *weights_ve

 int ParseData2Vector(const TensorInfo &ms_tensor, std::vector<float> *dst);

-void DebugDims(const nvinfer1::Dims &dims);
+void DebugDims(const std::string &key, const nvinfer1::Dims &dims);
+
+nvinfer1::ITensor *ExpandDim(TensorRTContext *ctx, nvinfer1::ITensor *input_tensor, int axis);
+
+nvinfer1::ITensor *Broadcast(TensorRTContext *ctx, nvinfer1::ITensor *input, nvinfer1::ITensor *shape);
+
+template <typename T>
+nvinfer1::DataType GetNvinferDataType();

 template <typename T1, typename T2>
 bool SameDims(const std::vector<T1> &shape1, const std::vector<T2> &shape2) {
--- a/mindspore/lite/src/extendrt/infer_session.cc
+++ b/mindspore/lite/src/extendrt/infer_session.cc
@ -76,11 +76,13 @@ std::vector<std::string> DefaultInferSession::GetOutputNames() { return std::vec
 std::vector<std::string> DefaultInferSession::GetInputNames() { return std::vector<std::string>(); }
 MutableTensorImplPtr DefaultInferSession::GetOutputByTensorName(const std::string &tensorName) { return nullptr; }
 MutableTensorImplPtr DefaultInferSession::GetInputByTensorName(const std::string &name) { return nullptr; }
-std::shared_ptr<InferSession> InferSession::CreateSession(const std::shared_ptr<Context> &context) {
+
+std::shared_ptr<InferSession> InferSession::CreateSession(const std::shared_ptr<Context> &context,
+                                                          const ConfigInfos &config_info) {
  HandleContext(context);
  auto session_type = SelectSession(context);
  MS_LOG(DEBUG) << "Session type " << static_cast<int64_t>(session_type);
-  return SessionRegistry::GetInstance().GetSession(session_type, context);
+  return SessionRegistry::GetInstance().GetSession(session_type, context, config_info);
 }

 void InferSession::HandleContext(const std::shared_ptr<Context> &context) {
@ -148,7 +150,8 @@ SessionType InferSession::SelectSession(const std::shared_ptr<Context> &context)
  return kDefaultSession;
 }

-static std::shared_ptr<InferSession> DefaultSessionCreator(const std::shared_ptr<Context> &ctx) {
+static std::shared_ptr<InferSession> DefaultSessionCreator(const std::shared_ptr<Context> &ctx,
+                                                           const ConfigInfos &config_infos) {
  auto session = std::make_shared<DefaultInferSession>(ctx);
  session->Init(ctx);
  return session;
--- a/mindspore/lite/src/extendrt/infer_session.h
+++ b/mindspore/lite/src/extendrt/infer_session.h
@ -29,12 +29,14 @@
 #include "extendrt/session/type.h"
 #include "common/mutable_tensor_impl.h"
 #include "extendrt/utils/kernel_graph_utils.h"
+#include "src/common/config_infos.h"

 namespace mindspore {
 class InferSession : public std::enable_shared_from_this<InferSession> {
 public:
  virtual ~InferSession() = default;
-  static std::shared_ptr<InferSession> CreateSession(const std::shared_ptr<Context> &context);
+  static std::shared_ptr<InferSession> CreateSession(const std::shared_ptr<Context> &context,
+                                                     const ConfigInfos &config_info);
  static SessionType SelectSession(const std::shared_ptr<Context> &context);
  virtual Status Init(const std::shared_ptr<Context> &context) = 0;
  virtual Status CompileGraph(FuncGraphPtr graph, const void *data = nullptr, size_t size = 0) = 0;
--- a/mindspore/lite/src/extendrt/session/delegate_session.cc
+++ b/mindspore/lite/src/extendrt/session/delegate_session.cc
@ -181,7 +181,8 @@ MutableTensorImplPtr GraphSinkSession::GetInputByTensorName(const std::string &n
  }
  return nullptr;
 }
-static std::shared_ptr<InferSession> DelegateSessionCreator(const std::shared_ptr<Context> &ctx) {
+static std::shared_ptr<InferSession> DelegateSessionCreator(const std::shared_ptr<Context> &ctx,
+                                                            const ConfigInfos &config_infos) {
  auto &device_contexts = ctx->MutableDeviceInfo();
  if (device_contexts.empty()) {
    return nullptr;
@ -189,7 +190,10 @@ static std::shared_ptr<InferSession> DelegateSessionCreator(const std::shared_pt
  auto device_type = device_contexts.at(0)->GetDeviceType();
  auto provider = device_contexts.at(0)->GetProvider();

-  auto delegate = DelegateRegistry::GetInstance().GetDelegate(device_type, provider, ctx);
+  auto delegate = DelegateRegistry::GetInstance().GetDelegate(device_type, provider, ctx, config_infos);
+  if (delegate == nullptr) {
+    return nullptr;
+  }
  auto session = std::make_shared<GraphSinkSession>(delegate);
  session->Init(ctx);
  return session;
--- a/mindspore/lite/src/extendrt/session/factory.cc
+++ b/mindspore/lite/src/extendrt/session/factory.cc
@ -26,18 +26,17 @@ SessionRegistry &SessionRegistry::GetInstance() {
  return instance;
 }

-void SessionRegistry::RegSession(
-  const mindspore::SessionType &session_type,
-  const std::function<std::shared_ptr<InferSession>(const std::shared_ptr<Context> &)> &creator) {
+void SessionRegistry::RegSession(const mindspore::SessionType &session_type, const InferSessionRegFunc &creator) {
  session_map_[session_type] = creator;
 }

 std::shared_ptr<InferSession> SessionRegistry::GetSession(const mindspore::SessionType &session_type,
-                                                          const std::shared_ptr<Context> &ctx) {
+                                                          const std::shared_ptr<Context> &ctx,
+                                                          const ConfigInfos &config_info) {
  auto it = session_map_.find(session_type);
  if (it == session_map_.end()) {
    return nullptr;
  }
-  return it->second(ctx);
+  return it->second(ctx, config_info);
 }
 }  // namespace mindspore
--- a/mindspore/lite/src/extendrt/session/factory.h
+++ b/mindspore/lite/src/extendrt/session/factory.h
@ -23,6 +23,9 @@
 #include "extendrt/infer_session.h"
 #include "include/api/context.h"
 namespace mindspore {
+using InferSessionRegFunc =
+  std::function<std::shared_ptr<InferSession>(const std::shared_ptr<Context> &, const ConfigInfos &)>;
+
 class SessionRegistry {
 public:
  SessionRegistry() = default;
@ -30,21 +33,18 @@ class SessionRegistry {

  static SessionRegistry &GetInstance();

-  void RegSession(const mindspore::SessionType &session_type,
-                  const std::function<std::shared_ptr<InferSession>(const std::shared_ptr<Context> &)> &creator);
+  void RegSession(const mindspore::SessionType &session_type, const InferSessionRegFunc &creator);

-  std::shared_ptr<InferSession> GetSession(const mindspore::SessionType &session_type,
-                                           const std::shared_ptr<Context> &);
+  std::shared_ptr<InferSession> GetSession(const mindspore::SessionType &session_type, const std::shared_ptr<Context> &,
+                                           const ConfigInfos &);

 private:
-  mindspore::HashMap<SessionType, std::function<std::shared_ptr<InferSession>(const std::shared_ptr<Context> &)>>
-    session_map_;
+  mindspore::HashMap<SessionType, InferSessionRegFunc> session_map_;
 };

 class SessionRegistrar {
 public:
-  SessionRegistrar(const mindspore::SessionType &session_type,
-                   const std::function<std::shared_ptr<InferSession>(const std::shared_ptr<Context> &)> &creator) {
+  SessionRegistrar(const mindspore::SessionType &session_type, const InferSessionRegFunc &creator) {
    SessionRegistry::GetInstance().RegSession(session_type, creator);
  }
  ~SessionRegistrar() = default;
--- a/mindspore/lite/src/extendrt/session/lite_infer_session.cc
+++ b/mindspore/lite/src/extendrt/session/lite_infer_session.cc
@ -294,7 +294,8 @@ std::vector<tensor::TensorPtr> LiteInferSession::ConvertToTensors(
  return tensors;
 }

-static std::shared_ptr<InferSession> LiteInferSessionCreator(const std::shared_ptr<Context> &ctx) {
+static std::shared_ptr<InferSession> LiteInferSessionCreator(const std::shared_ptr<Context> &ctx,
+                                                             const ConfigInfos &config_infos) {
  auto session = std::make_shared<LiteInferSession>();
  session->Init(ctx);
  return session;
--- a/mindspore/lite/src/extendrt/session/single_op_session.cc
+++ b/mindspore/lite/src/extendrt/session/single_op_session.cc
@ -277,7 +277,8 @@ MutableTensorImplPtr SingleOpInferSession::GetInputByTensorName(const std::strin
  return nullptr;
 }

-static std::shared_ptr<InferSession> SingleOpSessionCreator(const std::shared_ptr<Context> &ctx) {
+static std::shared_ptr<InferSession> SingleOpSessionCreator(const std::shared_ptr<Context> &ctx,
+                                                            const ConfigInfos &config_infos) {
  auto session = std::make_shared<SingleOpInferSession>();
  session->Init(ctx);
  return session;
--- a/mindspore/lite/src/extendrt/utils/kernel_graph_utils.cc
+++ b/mindspore/lite/src/extendrt/utils/kernel_graph_utils.cc
@ -31,6 +31,44 @@
 namespace mindspore {
 const size_t max_depth = 128;
 GraphId KernelGraphUtils::graph_sum_ = 0;
+
+std::vector<AnfNodePtr> KernelGraphUtils::GetKernelGraphOutputs(const KernelGraphPtr &func_graph) {
+  if (!func_graph) {
+    return {};
+  }
+  std::vector<AnfNodePtr> outputs = func_graph->outputs();
+  while (true) {
+    auto has_replace = false;
+    for (auto it = outputs.begin(); it != outputs.end(); ++it) {
+      auto output = *it;
+      std::vector<AnfNodePtr> one_outputs;
+      if (IsPrimitiveCNode(output, prim::kPrimDepend)) {
+        auto depend = output->cast<CNodePtr>();
+        MS_EXCEPTION_IF_NULL(depend);
+        output = depend->input(kRealInputIndexInDepend);
+      }
+      if (IsPrimitiveCNode(output, prim::kPrimMakeTuple)) {
+        auto make_tuple = output->cast<CNodePtr>();
+        MS_EXCEPTION_IF_NULL(make_tuple);
+        auto &inputs = make_tuple->inputs();
+        one_outputs = std::vector<AnfNodePtr>(inputs.begin() + 1, inputs.end());
+      } else {
+        one_outputs = {output};
+      }
+      if (one_outputs.size() != 1 || one_outputs[0] != output) {
+        it = outputs.erase(it);
+        outputs.insert(it, one_outputs.begin(), one_outputs.end());
+        has_replace = true;
+        break;
+      }
+    }
+    if (!has_replace) {
+      break;
+    }
+  }
+  return outputs;
+}
+
 KernelGraphPtr KernelGraphUtils::ConstructKernelGraph(const FuncGraphPtr &func_graph,
                                                      std::vector<KernelGraphPtr> *all_out_graph,
                                                      mindspore::device::DeviceType device_target) {
@ -965,7 +1003,7 @@ void KernelGraphUtils::GetModelOutputsInfo(uint32_t graph_id, std::vector<tensor
  VectorRef vector_outputs;
  std::map<tensor::TensorPtr, session::KernelWithIndex> tensor_to_node;
  session::KernelMapTensor node_to_tensor;
-  auto anf_outputs = kernel_graph->outputs();
+  auto anf_outputs = KernelGraphUtils::GetKernelGraphOutputs(kernel_graph);
  for (auto &item : anf_outputs) {
    MS_EXCEPTION_IF_NULL(item);
    MS_LOG(INFO) << "Create node output[" << item->DebugString() << "]";
--- a/mindspore/lite/src/extendrt/utils/kernel_graph_utils.h
+++ b/mindspore/lite/src/extendrt/utils/kernel_graph_utils.h
@ -24,11 +24,12 @@
 #include <string>

 #include "backend/common/session/kernel_graph.h"
+#include "include/api/visible.h"

 namespace mindspore {
 using GraphId = uint32_t;
 using KernelGraph = mindspore::session::KernelGraph;
-class KernelGraphUtils : public std::enable_shared_from_this<KernelGraphUtils> {
+class MS_API KernelGraphUtils : public std::enable_shared_from_this<KernelGraphUtils> {
 public:
  KernelGraphUtils() = default;
  virtual ~KernelGraphUtils() = default;
@ -37,6 +38,7 @@ class KernelGraphUtils : public std::enable_shared_from_this<KernelGraphUtils> {
    static KernelGraphUtils instance;
    return instance;
  }
+  static std::vector<AnfNodePtr> GetKernelGraphOutputs(const KernelGraphPtr &func_graph);

  KernelGraphPtr ConstructKernelGraph(const FuncGraphPtr &func_graph, std::vector<KernelGraphPtr> *all_out_graph,
                                      mindspore::device::DeviceType device_target);
--- a/mindspore/lite/src/extendrt/utils/runtime_utils.cc
+++ b/mindspore/lite/src/extendrt/utils/runtime_utils.cc
@ -22,6 +22,7 @@
 #include <memory>

 #include "extendrt/utils/runtime_utils.h"
+#include "extendrt/utils/kernel_graph_utils.h"

 #include "src/extendrt/infer_device_address.h"
 #include "include/common/utils/anfalgo.h"
@ -88,7 +89,7 @@ void RuntimeUtils::CopyInputTensorsToKernelGraph(const std::vector<tensor::Tenso
 void RuntimeUtils::CopyOutputTensorsFromKernelGraph(std::vector<tensor::Tensor> *outputs, KernelGraphPtr kernel_graph) {
  MS_EXCEPTION_IF_NULL(kernel_graph);
  outputs->clear();
-  auto graph_outputs = kernel_graph->outputs();
+  auto graph_outputs = KernelGraphUtils::GetKernelGraphOutputs(kernel_graph);
  for (auto graph_output : graph_outputs) {
    auto real_output_with_index = common::AnfAlgo::VisitKernelWithReturnType(graph_output, 0);
    auto real_output = real_output_with_index.first;
--- a/mindspore/lite/src/extendrt/utils/tensor_default_impl.h
+++ b/mindspore/lite/src/extendrt/utils/tensor_default_impl.h
@ -94,7 +94,8 @@ class TensorDefaultImpl : public MutableTensorImpl {

  void SetDeviceData(void *data) override { device_data_ = data; }
  void *GetDeviceData() override { return device_data_; }
-  bool IsConst() const override { return false; }
+  bool IsConst() const override { return is_const_; }
+  void SetIsConst(bool is_const) { is_const_ = is_const; }

  bool IsDevice() const override { return device_data_ != nullptr; }

@ -138,6 +139,8 @@ class TensorDefaultImpl : public MutableTensorImpl {
  mutable const void *data_ = nullptr;
  bool own_data_ = false;

+  bool is_const_ = false;
+
  void ResizeData() const {
    if (data_ != nullptr && data_ != buffer_.Data()) {
      return;