!41575 TensorRT opt

Merge pull request !41575 from 徐永飞/new_trt
This commit is contained in:
i-robot 2022-09-20 11:53:59 +00:00 committed by Gitee
commit 1c8b861898
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
79 changed files with 2686 additions and 762 deletions

View File

@ -60,8 +60,7 @@ void ParseLine(const std::string &line, std::map<std::string, std::string> *sect
}
} // namespace
int GetAllSectionInfoFromConfigFile(const std::string &file,
std::map<std::string, std::map<std::string, std::string>> *config) {
int GetAllSectionInfoFromConfigFile(const std::string &file, ConfigInfos *config) {
if (file.empty() || config == nullptr) {
MS_LOG(ERROR) << "input Invalid!check file and config.";
return RET_ERROR;

View File

@ -30,13 +30,14 @@
#include <utility>
#include "src/common/utils.h"
#include "src/common/log_adapter.h"
#include "src/common/config_infos.h"
#include "ir/dtype/type_id.h"
namespace mindspore {
namespace lite {
constexpr int MAX_CONFIG_FILE_LENGTH = 1024;
int GetAllSectionInfoFromConfigFile(const std::string &file,
std::map<std::string, std::map<std::string, std::string>> *config);
int GetAllSectionInfoFromConfigFile(const std::string &file, ConfigInfos *config);
void ParserExecutionPlan(const std::map<std::string, std::string> *config_infos,
std::map<std::string, TypeId> *data_type_plan);

View File

@ -0,0 +1,383 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/common/config_infos.h"
#include <vector>
#include "src/common/log_adapter.h"
#include "src/common/common.h"
namespace mindspore {
std::vector<std::string> ProfileParser::Split(const std::string &str, const std::string &delim) {
auto start = 0U;
auto end = str.find(delim);
std::vector<std::string> substrs;
while (end != std::string::npos) {
substrs.push_back(str.substr(start, end - start));
start = end + delim.length();
end = str.find(delim, start);
}
substrs.push_back(str.substr(start, end));
return substrs;
}
bool ProfileParser::StrToInt64(const std::string &str, int64_t *val) {
if (str.empty()) {
return false;
}
int64_t symbol = 1;
size_t index = 0;
if (str[0] == '-') {
symbol = -1;
index++;
}
int64_t val_ret = 0;
constexpr int64_t decimal_times = 10;
for (; index < str.size(); index++) {
auto c = str[index];
if (c < '0' || c > '9') {
return false;
}
val_ret = val_ret * decimal_times + (c - '0');
if (val_ret >= INT32_MAX) {
return false;
}
}
*val = symbol * val_ret;
return true;
}
constexpr int KV_NUM = 2;
bool ProfileParser::ParseShapeStr(const std::vector<std::string> &str_dims, ShapeVector *shape_ptr) {
if (shape_ptr == nullptr) {
return false;
}
if (str_dims.empty()) {
MS_LOG_ERROR << "Invalid input shape dim, dims number cannot be 0";
return false;
}
auto &shape = *shape_ptr;
shape.resize(str_dims.size());
for (size_t i = 0; i != str_dims.size(); ++i) {
int64_t dim = 0;
if (!StrToInt64(str_dims[i], &dim)) {
MS_LOG_ERROR << "Invalid input shape dim, dim value range or format is invalid: " << str_dims[i];
return false;
}
if (dim <= 0 && dim != -1) {
MS_LOG_ERROR << "Invalid input shape dim, dim can only be -1 when dim < 0 " << str_dims[i];
return false;
}
shape[i] = dim;
}
return true;
}
bool ProfileParser::ParseRangeStr(const std::string &range_str, int64_t *min_ptr, int64_t *max_ptr) {
if (min_ptr == nullptr || max_ptr == nullptr) {
return false;
}
auto min_and_max = Split(range_str, "~");
auto &min = *min_ptr;
auto &max = *max_ptr;
constexpr size_t number_range_count = 1;
constexpr size_t min_max_range_count = 2;
if (min_and_max.size() == number_range_count) {
if (!StrToInt64(min_and_max[0], &min)) {
MS_LOG_ERROR << "Invalid dynamic dim value range, dim value range or format is invalid: " << range_str
<< ". It should be 'min~max' or a number.";
return false;
}
max = min;
} else if (min_and_max.size() == min_max_range_count) {
if (!StrToInt64(min_and_max[0], &min) || !StrToInt64(min_and_max[1], &max)) {
MS_LOG_ERROR << "Invalid dynamic dim value range, dim value range or format is invalid: " << range_str
<< ". It should be 'min~max' or a number.";
return false;
}
} else {
MS_LOG_ERROR << "Invalid dynamic dim value range, dim value range or format is invalid: " << range_str
<< ". It should be 'min~max' or a number.";
return false;
}
if (min > max || min <= 0) {
MS_LOG(ERROR) << "Invalid dimension range string format of '" << lite::kDynamicDims << "': " << range_str;
return false;
}
return true;
}
bool ProfileParser::ParseOptDimStr(const std::string &opt_dim_str, int64_t *opt_ptr) {
if (opt_ptr == nullptr) {
return false;
}
auto &opt = *opt_ptr;
opt = std::stoll(opt_dim_str);
if (!StrToInt64(opt_dim_str, &opt)) {
MS_LOG_ERROR << "Invalid opt dim value range, dim value range or format is invalid: " << opt_dim_str
<< ". It should be a number.";
return false;
}
if (opt <= 0) {
MS_LOG(ERROR) << "Invalid opt dim value range '" << lite::kOptimizeDims << "': " << opt_dim_str;
return false;
}
return true;
}
bool ProfileParser::ParseInputShape(const std::string &input_shapes_str, ProfileConfigs *profile_configs_ptr) {
auto &profile_configs = *profile_configs_ptr;
auto input_slices = Split(input_shapes_str, ";");
for (auto input_slice : input_slices) {
auto name_and_shape = Split(input_slice, ":");
if (name_and_shape.size() != KV_NUM) {
MS_LOG(ERROR) << "each tensor has name and shape [" << input_slice << "]";
return false;
}
std::string name = name_and_shape[0];
std::string shape_str = name_and_shape[1];
if (shape_str.front() != '[' || shape_str.back() != ']') {
MS_LOG(ERROR) << "shape format check fail.";
return false;
}
constexpr size_t trim_len = 2;
shape_str = shape_str.substr(1, shape_str.size() - trim_len);
auto str_dims = Split(shape_str, ",");
ProfileInputInfo info;
info.name = name;
ShapeVector &shape = info.input_shape;
if (!ParseShapeStr(str_dims, &shape)) {
MS_LOG(ERROR) << "Invalid input shape dims: " << shape_str;
return false;
}
info.is_dynamic_shape = std::any_of(shape.begin(), shape.end(), [](auto dim) { return dim < 0; });
profile_configs.input_infos.push_back(info);
}
return true;
}
bool ProfileParser::ParseDynamicDims(const std::string &dynamic_dims_str, ProfileConfigs *profile_configs_ptr) {
auto &profile_configs = *profile_configs_ptr;
auto inputs_of_str = Split(dynamic_dims_str, ";");
if (inputs_of_str.size() != profile_configs.input_infos.size()) {
MS_LOG(ERROR) << "The input count " << inputs_of_str.size() << " in '" << lite::kDynamicDims
<< "' != the input count " << profile_configs.input_infos.size() << " '"
<< " in '" << lite::kInputShape;
return false;
}
// for every input
for (size_t input_index = 0; input_index != inputs_of_str.size(); ++input_index) {
auto &info = profile_configs.input_infos[input_index];
auto one_input_str = inputs_of_str[input_index];
auto profiles_of_str = Split(one_input_str, "],[");
if (profiles_of_str.empty()) {
MS_LOG(ERROR) << "The profile count of " << input_index << "th input in '" << lite::kDynamicDims << "' is 0";
return false;
}
if (profile_configs.profiles.empty()) {
profile_configs.profiles.resize(profiles_of_str.size());
for (auto &profile : profile_configs.profiles) {
profile.inputs.resize(profile_configs.input_infos.size());
}
}
if (profiles_of_str.size() != profile_configs.profiles.size()) {
MS_LOG(ERROR) << "The profile count " << profiles_of_str.size() << " of " << input_index << "th input in '"
<< lite::kDynamicDims << "' != profile count " << profile_configs.profiles.size() << " of "
<< (input_index - 1) << " th input";
return false;
}
// for every profile in one input, parse input range: min, max
for (size_t profile_index = 0; profile_index != profiles_of_str.size(); ++profile_index) {
ProfileItem &profile_item = profile_configs.profiles[profile_index];
ProfileInputRange &input_range = profile_item.inputs[input_index];
auto one_profile_str = profiles_of_str[profile_index];
while (one_profile_str.front() == '[' || one_profile_str.front() == ' ') {
one_profile_str = one_profile_str.substr(1);
}
while (one_profile_str.back() == ']' || one_profile_str.back() == ' ') {
one_profile_str = one_profile_str.substr(0, one_profile_str.size() - 1);
}
auto dim_ranges = Split(one_profile_str, ",");
auto &input_shape = info.input_shape;
size_t dynamic_nbdims = std::count(input_shape.begin(), input_shape.end(), -1);
if (dim_ranges.size() != dynamic_nbdims) {
MS_LOG(ERROR) << "Number of dynamic dims in config '" << lite::kDynamicDims << "' " << dim_ranges.size()
<< " != that in '" << lite::kInputShape << "' " << dynamic_nbdims << ".";
return false;
}
size_t range_index = 0;
input_range.min_dims = input_shape;
input_range.max_dims = input_shape;
for (size_t i = 0; i < input_shape.size(); ++i) {
if (input_shape[i] != -1) {
continue;
}
if (!ParseRangeStr(dim_ranges[range_index++], &input_range.min_dims[i], &input_range.max_dims[i])) {
return false;
}
}
input_range.opt_dims = input_range.min_dims; // default
}
}
return true;
}
bool ProfileParser::ParseOptDims(const std::string &opt_dims_str, ProfileConfigs *profile_configs_ptr) {
auto &profile_configs = *profile_configs_ptr;
auto inputs_of_str = Split(opt_dims_str, ";");
if (inputs_of_str.size() != profile_configs.input_infos.size()) {
MS_LOG(ERROR) << "The input count " << inputs_of_str.size() << " in '" << lite::kOptimizeDims
<< "' != the input count " << profile_configs.input_infos.size() << " '"
<< " in '" << lite::kInputShape;
return false;
}
// for every input
for (size_t input_index = 0; input_index != inputs_of_str.size(); ++input_index) {
auto &info = profile_configs.input_infos[input_index];
auto one_input_str = inputs_of_str[input_index];
auto profiles_of_str = Split(one_input_str, "],[");
if (profiles_of_str.size() != profile_configs.profiles.size()) {
MS_LOG(ERROR) << "The profile count " << profiles_of_str.size() << " of " << input_index << "th input in '"
<< lite::kOptimizeDims << "' != profile count " << profile_configs.profiles.size() << " in '"
<< lite::kDynamicDims << "'";
return false;
}
// for every profile in one input, parse input range: min, max
for (size_t profile_index = 0; profile_index != profiles_of_str.size(); ++profile_index) {
ProfileItem &profile_item = profile_configs.profiles[profile_index];
ProfileInputRange &input_range = profile_item.inputs[input_index];
auto one_profile_str = profiles_of_str[profile_index];
while (one_profile_str.front() == '[' || one_profile_str.front() == ' ') {
one_profile_str = one_profile_str.substr(1);
}
while (one_profile_str.back() == ']' || one_profile_str.back() == ' ') {
one_profile_str = one_profile_str.substr(0, one_profile_str.size() - 1);
}
auto opt_dims_vec = Split(one_profile_str, ",");
auto &input_shape = info.input_shape;
size_t dynamic_nbdims = std::count(input_shape.begin(), input_shape.end(), -1);
if (opt_dims_vec.size() != dynamic_nbdims) {
MS_LOG(ERROR) << "Number of dynamic dims in config '" << lite::kOptimizeDims << "' " << opt_dims_vec.size()
<< " != that in '" << lite::kInputShape << "' " << dynamic_nbdims << ".";
return false;
}
size_t dynamic_index = 0;
input_range.opt_dims = input_shape;
for (size_t i = 0; i < input_shape.size(); ++i) {
if (input_shape[i] != -1) {
continue;
}
if (!ParseOptDimStr(opt_dims_vec[dynamic_index++], &input_range.opt_dims[i])) {
return false;
}
}
}
}
return true;
}
bool ProfileParser::Parse(const std::map<std::string, std::string> &context, bool require_opt_when_dym,
ProfileConfigs *profile_configs_ptr) {
if (profile_configs_ptr == nullptr) {
return false;
}
auto &profile_configs = *profile_configs_ptr;
auto get_item = [&context](const std::string &key) -> std::string {
auto it = context.find(key);
return it != context.end() ? it->second : "";
};
auto input_shapes = get_item(lite::kInputShape);
auto dynamic_dims = get_item(lite::kDynamicDims);
auto opt_dims = get_item(lite::kOptimizeDims);
if (input_shapes.empty() && dynamic_dims.empty() && opt_dims.empty()) {
MS_LOG(INFO) << "Do not found config of input range('" << lite::kInputShape << "')";
return true;
}
if (input_shapes.empty()) {
MS_LOG(ERROR) << "Config of '" << lite::kInputShape << " cannot be empty when '" << lite::kInputShape << "' or '"
<< lite::kOptimizeDims << "' is not empty";
return false;
}
if (!ParseInputShape(input_shapes, &profile_configs)) {
MS_LOG(ERROR) << "parse input shape failed.";
return false;
}
if (dynamic_dims.empty()) {
ProfileItem profile_item;
for (size_t i = 0; i < profile_configs.input_infos.size(); i++) {
if (profile_configs.input_infos[i].is_dynamic_shape) {
MS_LOG(ERROR) << "Config of '" << lite::kDynamicDims << "' cannot be empty when " << lite::kInputShape
<< " is dynamic";
return false;
}
auto &input_shape = profile_configs.input_infos[i].input_shape;
ProfileInputRange input_range;
input_range.min_dims = input_shape;
input_range.max_dims = input_shape;
input_range.opt_dims = input_shape;
profile_item.inputs.push_back(input_range);
}
profile_configs.profiles.push_back(profile_item);
return true;
}
if (!ParseDynamicDims(dynamic_dims, &profile_configs)) {
MS_LOG(ERROR) << "parse dynamic dims failed.";
return false;
}
if (require_opt_when_dym) {
if (!ParseOptDims(opt_dims, &profile_configs)) {
MS_LOG(ERROR) << "parse optimization dims failed.";
return false;
}
}
return true;
}
bool ProfileParser::ReorderByInputNames(const std::vector<std::string> &input_names, ProfileConfigs *profile_configs) {
if (input_names.size() != profile_configs->input_infos.size()) {
MS_LOG_ERROR << "Input name size " << input_names.size() << " != profile config input size "
<< profile_configs->input_infos.size();
return false;
}
ProfileConfigs new_profile_configs = *profile_configs;
auto &input_infos = profile_configs->input_infos;
auto &profiles = profile_configs->profiles;
for (size_t input_index = 0; input_index < input_names.size(); input_index++) {
auto input_name = input_names[input_index];
size_t i = 0;
for (; i < input_infos.size(); i++) {
if (input_infos[i].name == input_name) {
new_profile_configs.input_infos[input_index] = input_infos[i];
for (size_t profile_index = 0; profile_index < profiles.size(); profile_index++) {
new_profile_configs.profiles[profile_index].inputs[input_index] = profiles[profile_index].inputs[i];
}
break;
}
}
if (i >= input_infos.size()) {
MS_LOG_ERROR << "Cannot find input " << input_name << " in profile '" << lite::kInputShape << "' config";
return false;
}
}
*profile_configs = new_profile_configs;
return true;
}
} // namespace mindspore

View File

@ -0,0 +1,70 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_COMMON_CONFIG_INFOS_H_
#define MINDSPORE_LITE_SRC_COMMON_CONFIG_INFOS_H_
#include <string>
#include <map>
#include <vector>
#include "include/api/visible.h"
#include "mindapi/base/shape_vector.h"
namespace mindspore {
using ConfigInfos = std::map<std::string, std::map<std::string, std::string>>;
struct ProfileInputInfo {
std::string name;
bool is_dynamic_shape = false;
ShapeVector input_shape;
};
struct ProfileInputRange {
ShapeVector min_dims;
ShapeVector opt_dims;
ShapeVector max_dims;
};
struct ProfileItem {
std::vector<ProfileInputRange> inputs;
};
struct ProfileConfigs {
std::vector<ProfileInputInfo> input_infos;
std::vector<ProfileItem> profiles;
};
class MS_API ProfileParser {
public:
static bool Parse(const std::map<std::string, std::string> &context, bool require_opt_when_dym,
ProfileConfigs *profile_configs);
static bool ReorderByInputNames(const std::vector<std::string> &input_names, ProfileConfigs *profile_configs);
private:
static bool ParseOptDims(const std::string &opt_dims_str, ProfileConfigs *profile_configs);
static bool ParseDynamicDims(const std::string &dynamic_dims_str, ProfileConfigs *profile_configs);
static bool ParseInputShape(const std::string &input_shapes_str, ProfileConfigs *profile_configs);
static bool StrToInt64(const std::string &str, int64_t *val);
static std::vector<std::string> Split(const std::string &str, const std::string &delim);
static bool ParseShapeStr(const std::vector<std::string> &str_dims, ShapeVector *shape_ptr);
static bool ParseRangeStr(const std::string &range_str, int64_t *min_ptr, int64_t *max_ptr);
static bool ParseOptDimStr(const std::string &opt_dim_str, int64_t *opt_ptr);
};
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_COMMON_CONFIG_INFOS_H_

View File

@ -120,6 +120,7 @@ if(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE)
${CCSRC_DIR}/utils/scoped_long_running.cc
${CCSRC_DIR}/utils/cse.cc
${CCSRC_DIR}/utils/comm_manager.cc
${CCSRC_DIR}/utils/signal_util.cc
${CORE_DIR}/utils/status.cc
)
add_library(mindspore-kernel-graph OBJECT ${KERNEL_GRAPH_SRC})

View File

@ -216,10 +216,37 @@ Status Model::BindGLTexture2DMemory(const std::map<std::string, unsigned int> &i
return kSuccess;
}
Status Model::LoadConfig(const std::vector<char> &config_path) { return kSuccess; }
Status Model::LoadConfig(const std::vector<char> &config_path) {
std::unique_lock<std::mutex> impl_lock(g_impl_init_lock);
if (impl_ != nullptr) {
MS_LOG(ERROR) << "impl_ illegal in LoadConfig.";
return Status(kLiteFileError, "Illegal operation.");
}
impl_ = std::make_shared<ModelImpl>();
if (impl_ == nullptr) {
MS_LOG(ERROR) << "Model implement is null.";
return Status(kLiteFileError, "Fail to load config file.");
}
auto ret = impl_->LoadConfig(CharToString(config_path));
if (ret != kSuccess) {
MS_LOG(ERROR) << "impl_ LoadConfig failed,";
return Status(kLiteFileError, "Invalid config file.");
}
return kSuccess;
}
Status Model::UpdateConfig(const std::vector<char> &section,
const std::pair<std::vector<char>, std::vector<char>> &config) {
return kSuccess;
std::unique_lock<std::mutex> impl_lock(g_impl_init_lock);
if (impl_ == nullptr) {
impl_ = std::make_shared<ModelImpl>();
}
if (impl_ != nullptr) {
return impl_->UpdateConfig(CharToString(section), {CharToString(config.first), CharToString(config.second)});
}
MS_LOG(ERROR) << "Model implement is null!";
return kLiteFileError;
}
} // namespace mindspore

View File

@ -44,7 +44,7 @@ Status ModelImpl::BuildByBufferImpl(const void *model_data, size_t data_size, Mo
// user does not set mindir_path, convert from model_path
mindir_path = model_path.substr(0, model_path.rfind("/"));
}
session_ = InferSession::CreateSession(model_context);
session_ = InferSession::CreateSession(model_context, config_info_);
if (session_ == nullptr) {
MS_LOG(ERROR) << "Create session failed.";
return kLiteNullptr;
@ -254,7 +254,6 @@ Status ModelImpl::Predict() {
auto outputs = GetOutputs();
return Predict(inputs, &outputs);
}
bool ModelImpl::HasPreprocess() { return graph_->graph_data_->GetPreprocess().empty() ? false : true; }
Status ModelImpl::Preprocess(const std::vector<std::vector<MSTensor>> &inputs, std::vector<MSTensor> *outputs) {
@ -345,19 +344,13 @@ Status ModelImpl::PredictWithPreprocess(const std::vector<std::vector<MSTensor>>
}
Status ModelImpl::LoadConfig(const std::string &config_path) {
std::map<std::string, std::map<std::string, std::string>> all_config_info;
ConfigInfos all_config_info;
int ret = lite::GetAllSectionInfoFromConfigFile(config_path, &all_config_info);
if (ret != kSuccess) {
MS_LOG(ERROR) << "GetAllSectionInfoFromConfigFile fail!ret: " << ret;
return kLiteFileError;
}
config_info_ = all_config_info;
std::map<std::string, std::string> config_info = all_config_info[kExecutionPlan];
if (config_info.empty()) {
MS_LOG(WARNING) << "No valid execution plan info in config file.";
return kSuccess;
}
return kSuccess;
}

View File

@ -29,6 +29,8 @@
#include "include/common/utils/utils.h"
#include "ir/func_graph.h"
#include "extendrt/infer_session.h"
#include "src/common/config_infos.h"
#ifndef _WIN32
#include <dlfcn.h>
#endif
@ -73,10 +75,11 @@ class ModelImpl {
std::shared_ptr<Graph> graph_ = nullptr;
std::shared_ptr<InferSession> session_ = nullptr;
std::shared_ptr<Context> context_ = nullptr;
ConfigInfos config_info_;
std::map<std::string, TypeId> execution_plan_;
#ifndef _WIN32
void *handle_ = nullptr;
#endif
std::map<std::string, std::map<std::string, std::string>> config_info_;
};
} // namespace mindspore
#endif // MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_MODEL_MODEL_IMPL_H_

View File

@ -1096,7 +1096,7 @@ Status ModelPool::InitByPath(const std::string &model_path, const std::shared_pt
size_t size = 0;
graph_buf_ = lite::ReadFile(model_path.c_str(), &size);
if (graph_buf_ == nullptr) {
MS_LOG(ERROR) << "read ms model failed, model path: " << model_path;
MS_LOG(ERROR) << "read model failed, model path: " << model_path;
return kLiteNullptr;
}
auto status = Init(graph_buf_, size, runner_config);

View File

@ -24,11 +24,15 @@
#include "runtime/hardware/device_context.h"
#include "src/extendrt/delegate_graph_executor.h"
#include "include/api/context.h"
#include "src/common/config_infos.h"
namespace mindspore {
using mindspore::device::GraphExecutor;
// TODO(zhaizhiqiang): Wrap graph executor as delegate.
// typedef std::shared_ptr<GraphSinkDelegate> (*DelegateCreator)(const std::shared_ptr<Context> &);
typedef std::shared_ptr<GraphExecutor> (*DelegateCreator)(const std::shared_ptr<Context> &);
using DelegateCreator =
std::function<std::shared_ptr<GraphExecutor>(const std::shared_ptr<Context> &, const ConfigInfos &)>;
class MS_API DelegateRegistry {
public:
DelegateRegistry() = default;
@ -48,7 +52,7 @@ class MS_API DelegateRegistry {
}
std::shared_ptr<GraphExecutor> GetDelegate(const mindspore::DeviceType &device_type, const std::string &provider,
const std::shared_ptr<Context> &ctx) {
const std::shared_ptr<Context> &ctx, const ConfigInfos &config_infos) {
// find common delegate
auto it = creator_map_.find(device_type);
if (it == creator_map_.end()) {
@ -58,7 +62,7 @@ class MS_API DelegateRegistry {
if (creator_it == it->second.end()) {
return nullptr;
}
return creator_it->second(ctx);
return creator_it->second(ctx, config_infos);
}
private:

View File

@ -26,6 +26,7 @@ set(LITE_SRC
${LITE_DIR}/src/common/log.cc
${LITE_DIR}/src/common/prim_util.cc
${LITE_DIR}/src/common/tensor_util.cc
${LITE_DIR}/src/common/config_infos.cc
${LITE_DIR}/src/litert/allocator.cc
${LITE_DIR}/src/litert/inner_allocator.cc
${LITE_DIR}/src/litert/runtime_allocator.cc

View File

@ -41,7 +41,9 @@ const int64_t kBufferSize = 1024;
} // namespace
const char litert_provider[] = "litert";
LiteRTGraphExecutor::LiteRTGraphExecutor(const std::shared_ptr<mindspore::Context> &context) : context_(context) {
LiteRTGraphExecutor::LiteRTGraphExecutor(const std::shared_ptr<mindspore::Context> &context,
const ConfigInfos &config_infos)
: context_(context), config_infos_(config_infos) {
lite_session_ = CreateLiteSession(ContextUtils::Convert(context_.get()));
}
@ -355,8 +357,9 @@ bool LiteRTGraphExecutor::IsNeedExtractTensorData(mindspore::schema::MetaGraphT
return false;
}
static std::shared_ptr<device::GraphExecutor> LiteRTGraphExecutorCreator(const std::shared_ptr<Context> &ctx) {
return std::make_shared<LiteRTGraphExecutor>(ctx);
static std::shared_ptr<device::GraphExecutor> LiteRTGraphExecutorCreator(const std::shared_ptr<Context> &ctx,
const ConfigInfos &config_infos) {
return std::make_shared<LiteRTGraphExecutor>(ctx, config_infos);
}
REG_DELEGATE(kCPU, litert_provider, LiteRTGraphExecutorCreator);

View File

@ -28,12 +28,13 @@
#include "include/model.h"
#include "src/litert/lite_session.h"
#include "src/common/helper/infer_helpers.h"
#include "src/common/config_infos.h"
namespace mindspore {
class LiteRTGraphExecutor : public LiteGraphExecutor {
public:
LiteRTGraphExecutor() = default;
explicit LiteRTGraphExecutor(const std::shared_ptr<mindspore::Context> &context);
LiteRTGraphExecutor(const std::shared_ptr<mindspore::Context> &context, const ConfigInfos &config_infos);
~LiteRTGraphExecutor() = default;
bool CompileGraph(const FuncGraphPtr &graph, const std::map<string, string> &compile_options) override;
bool RunGraph(const FuncGraphPtr &graph, const std::vector<tensor::Tensor> &inputs,
@ -56,6 +57,7 @@ class LiteRTGraphExecutor : public LiteGraphExecutor {
private:
const std::shared_ptr<mindspore::Context> context_;
ConfigInfos config_infos_;
lite::LiteGraph lite_graph_;
std::shared_ptr<lite::LiteSession> lite_session_;
std::shared_ptr<mindspore::infer::helper::InferHelpers> helpers_ = nullptr;

View File

@ -39,6 +39,20 @@ __global__ void LogicalOrKernel(const T *input_addr1, const T *input_addr2, T *o
}
}
template <typename T>
__global__ void GreaterOrEqualKernal(const T *input1, const T *input2, T *output, int element_cnt) {
for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < element_cnt; pos += blockDim.x * gridDim.x) {
output[pos] = (input1[pos] >= input2[pos]);
}
}
template <typename T>
__global__ void LessOrEqualKernal(const T *input1, const T *input2, T *output, int element_cnt) {
for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < element_cnt; pos += blockDim.x * gridDim.x) {
output[pos] = (input1[pos] <= input2[pos]);
}
}
template <typename T>
void LogicalNot(const T *input1, T *output, int element_cnt, cudaStream_t stream) {
LogicalNotKernel<<<GET_BLOCKS(element_cnt), GET_THREADS, 0, stream>>>(input1, output, element_cnt);
@ -54,6 +68,26 @@ void LogicalOr(const T *input1, const T *input2, T *output, int element_cnt, cud
LogicalOrKernel<<<GET_BLOCKS(element_cnt), GET_THREADS, 0, stream>>>(input1, input2, output, element_cnt);
}
template <typename T>
void GreaterOrEqual(const T *input1, const T *input2, T *output, int element_cnt, cudaStream_t stream) {
GreaterOrEqualKernal<<<GET_BLOCKS(element_cnt), GET_THREADS, 0, stream>>>(input1, input2, output, element_cnt);
}
template <typename T>
void LessOrEqual(const T *input1, const T *input2, T *output, int element_cnt, cudaStream_t stream) {
LessOrEqualKernal<<<GET_BLOCKS(element_cnt), GET_THREADS, 0, stream>>>(input1, input2, output, element_cnt);
}
template void GreaterOrEqual(const float *input1, const float *input2, float *output, int element_cnt,
cudaStream_t stream);
template void GreaterOrEqual(const int *input1, const int *input2, int *output, int element_cnt, cudaStream_t stream);
template void LessOrEqual(const float *input1, const float *input2, float *output, int element_cnt,
cudaStream_t stream);
template void LessOrEqual(const int *input1, const int *input2, int *output, int element_cnt, cudaStream_t stream);
template void LogicalNot(const int32_t *input1, int32_t *output, int element_cnt, cudaStream_t stream);
template void LogicalAnd(const int32_t *input1, const int32_t *input2, int32_t *output, int element_cnt,

View File

@ -26,4 +26,10 @@ void LogicalOr(const T *input1, const T *input2, T *output, int element_cnt, cud
template <typename T>
void LogicalNot(const T *input1, T *output, int element_cnt, cudaStream_t stream);
template <typename T>
void GreaterOrEqual(const T *input1, const T *input2, T *output, int element_cnt, cudaStream_t stream);
template <typename T>
void LessOrEqual(const T *input1, const T *input2, T *output, int element_cnt, cudaStream_t stream);
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_LOGICAL_H_

View File

@ -90,8 +90,8 @@ int ActivationOptPlugin::RunCudaActivation(const nvinfer1::PluginTensorDesc *inp
break;
}
case (ActivationType::SWISH): {
CalSwish(GetDimsVolume(inputDesc[0].dims), static_cast<const float *>(inputs[0]),
static_cast<float *>(outputs[0]), stream, device_id_);
Swish(GetDimsVolume(inputDesc[0].dims), static_cast<const float *>(inputs[0]), static_cast<float *>(outputs[0]),
stream, device_id_);
break;
}
default: {

View File

@ -0,0 +1,100 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/extendrt/delegate/tensorrt/op/batchnorm_tensorrt.h"
#include <memory>
#include <numeric>
#include "ops/fused_batch_norm.h"
namespace mindspore::lite {
int BatchNormTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
const std::vector<TensorInfo> &out_tensors) {
if (in_tensors.size() != INPUT_SIZE5 && in_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
return RET_ERROR;
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << in_tensors.size();
return RET_ERROR;
}
return RET_OK;
}
int BatchNormTensorRT::AddInnerOp(TensorRTContext *ctx) {
CHECK_NULL_RETURN(ctx->network());
auto norm_op = AsOps<ops::FusedBatchNorm>();
CHECK_NULL_RETURN(norm_op);
epsilon_ = norm_op->get_epsilon();
ITensorHelper norm_input;
int ret = PreprocessInputs2SameDim(ctx, input(ctx, 0), &norm_input);
if (ret != RET_OK || norm_input.trt_tensor_ == nullptr) {
MS_LOG(ERROR) << "PreprocessInputs2SameDim norm_input failed for " << op_name_;
return RET_ERROR;
}
auto expect_shape = ConvertMSShape(norm_input.trt_tensor_->getDimensions());
gamma_ = ConvertTensorWithExpandDims(ctx, in_tensors_[1], expect_shape, op_name_ + in_tensors_[1].Name());
CHECK_NULL_RETURN(gamma_);
beta_ =
ConvertTensorWithExpandDims(ctx, in_tensors_[BETA_INDEX], expect_shape, op_name_ + in_tensors_[BETA_INDEX].Name());
CHECK_NULL_RETURN(beta_);
mean_ =
ConvertTensorWithExpandDims(ctx, in_tensors_[MEAN_INDEX], expect_shape, op_name_ + in_tensors_[MEAN_INDEX].Name());
CHECK_NULL_RETURN(mean_);
var_ =
ConvertTensorWithExpandDims(ctx, in_tensors_[VAR_INDEX], expect_shape, op_name_ + in_tensors_[VAR_INDEX].Name());
CHECK_NULL_RETURN(var_);
return RunAsTrtOps(ctx, norm_input);
}
int BatchNormTensorRT::RunAsTrtOps(TensorRTContext *ctx, ITensorHelper norm_input) {
// var + min epsilon
auto const_epsilon = ConvertScalarToITensor(ctx, norm_input.trt_tensor_->getDimensions().nbDims, &epsilon_,
DataType::kNumberTypeFloat32, op_name_ + "_epsilion");
CHECK_NULL_RETURN(const_epsilon);
auto var_epsilon =
ctx->network()->addElementWise(*var_, *const_epsilon, nvinfer1::ElementWiseOperation::kSUM)->getOutput(0);
CHECK_NULL_RETURN(var_epsilon);
// standard deviation
auto std_dev = ctx->network()->addUnary(*var_epsilon, nvinfer1::UnaryOperation::kSQRT)->getOutput(0);
CHECK_NULL_RETURN(std_dev);
auto scale = ctx->network()->addElementWise(*gamma_, *std_dev, nvinfer1::ElementWiseOperation::kDIV)->getOutput(0);
CHECK_NULL_RETURN(scale);
auto mean_mul_scale =
ctx->network()->addElementWise(*mean_, *scale, nvinfer1::ElementWiseOperation::kPROD)->getOutput(0);
CHECK_NULL_RETURN(mean_mul_scale);
auto bias =
ctx->network()->addElementWise(*beta_, *mean_mul_scale, nvinfer1::ElementWiseOperation::kSUB)->getOutput(0);
CHECK_NULL_RETURN(bias);
// scale with bias
auto scale_layer =
ctx->network()->addElementWise(*norm_input.trt_tensor_, *scale, nvinfer1::ElementWiseOperation::kPROD);
this->layer_ = scale_layer;
auto scale_out = scale_layer->getOutput(0);
CHECK_NULL_RETURN(scale_out);
auto beta_out = ctx->network()->addElementWise(*scale_out, *bias, nvinfer1::ElementWiseOperation::kSUM)->getOutput(0);
CHECK_NULL_RETURN(beta_out);
ctx->RegisterTensor(ITensorHelper{beta_out, Format::NCHW, true}, out_tensors_[0].Name());
return RET_OK;
}
REGISTER_TENSORRT_CREATOR(ops::kNameFusedBatchNorm, BatchNormTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,50 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_BATCH_NORM_TENSORRT_H_
#define MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_BATCH_NORM_TENSORRT_H_
#include <string>
#include <vector>
#include "src/extendrt/delegate/tensorrt/op/tensorrt_op.h"
namespace mindspore::lite {
constexpr int BETA_INDEX = 2;
constexpr int MEAN_INDEX = 3;
constexpr int VAR_INDEX = 4;
class BatchNormTensorRT : public TensorRTOp {
public:
BatchNormTensorRT(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
const std::vector<TensorInfo> &out_tensors, std::string name)
: TensorRTOp(base_operator, in_tensors, out_tensors, name) {}
~BatchNormTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
const std::vector<TensorInfo> &out_tensors) override;
private:
int RunAsTrtOps(TensorRTContext *ctx, ITensorHelper helper);
float epsilon_{0.0f};
nvinfer1::ITensor *gamma_{nullptr};
nvinfer1::ITensor *beta_{nullptr};
nvinfer1::ITensor *mean_{nullptr};
nvinfer1::ITensor *var_{nullptr};
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_BATCH_NORM_TENSORRT_H_

View File

@ -29,7 +29,7 @@
namespace mindspore::lite {
int BatchToSpaceTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
const std::vector<TensorInfo> &out_tensors) {
if (in_tensors.size() != 3) {
if (in_tensors.size() != INPUT_SIZE3) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
return RET_ERROR;
}
@ -38,8 +38,6 @@ int BatchToSpaceTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
return RET_ERROR;
}
dynamic_shape_params_.support_dynamic_ = false;
dynamic_shape_params_.support_hw_dynamic_ = false;
return RET_OK;
}
@ -52,19 +50,17 @@ int BatchToSpaceTensorRT::AddInnerOp(TensorRTContext *ctx) {
MS_LOG(ERROR) << "block_h not equal block_w " << op_name_;
return RET_ERROR;
}
const int *pad_ptr = reinterpret_cast<const int *>(in_tensors_[2].Data());
const int *pad_ptr = reinterpret_cast<const int *>(in_tensors_[INPUT_SIZE2].Data());
int ph0 = *(pad_ptr + 0);
int ph1 = *(pad_ptr + 1);
int pw0 = *(pad_ptr + 2);
int pw1 = *(pad_ptr + 3);
auto in_dims = input_tensor->getDimensions();
int in = in_dims.d[0];
int ic = in_dims.d[1];
int ih = in_dims.d[2];
int iw = in_dims.d[3];
int pw0 = *(pad_ptr + INPUT_SIZE2);
int pw1 = *(pad_ptr + INPUT_SIZE3);
auto plugin =
std::make_shared<BatchToSpacePlugin>(input_tensor->getName(), in, ic, ih, iw, bh, ph0, ph1, pw0, pw1, device_id_);
auto plugin = std::make_shared<BatchToSpacePlugin>(input_tensor->getName(), bh, ph0, ph1, pw0, pw1, device_id_);
if (plugin == nullptr) {
MS_LOG(ERROR) << "add batchtospace plugin failed for" << op_name_;
return RET_ERROR;
}
nvinfer1::ITensor *inputTensors[] = {input_tensor};
nvinfer1::IPluginV2Layer *space2batch_opt_layer = ctx->network()->addPluginV2(inputTensors, 1, *plugin);
if (space2batch_opt_layer == nullptr) {
@ -93,15 +89,20 @@ int BatchToSpacePlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
int BatchToSpacePlugin::RunCudaBatchToSpace(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs,
void *const *outputs, cudaStream_t stream) {
int on = in_ / (bh_ * bh_);
int oc = ic_;
int oh = ih_ * bh_ - ph0_ - ph1_;
int ow = iw_ * bh_ - pw0_ - pw1_;
nvinfer1::Dims input_dims = inputDesc[0].dims;
int in = input_dims.d[0];
int ic = input_dims.d[1];
int ih = input_dims.d[2];
int iw = input_dims.d[3];
int on = in / (bh_ * bh_);
int oc = ic;
int oh = ih * bh_ - ph0_ - ph1_;
int ow = iw * bh_ - pw0_ - pw1_;
int size = on * oc * oh * ow;
CalBatchToSpace<float>(size, static_cast<const float *>(inputs[0]), in_, ih_, iw_, ic_, on, oh, ow, oc, ph0_, ph1_,
pw0_, pw1_, bh_, static_cast<float *>(outputs[0]), device_id_, stream);
CalBatchToSpace<float>(size, static_cast<const float *>(inputs[0]), in, ih, iw, ic, on, oh, ow, oc, ph0_, ph1_, pw0_,
pw1_, bh_, static_cast<float *>(outputs[0]), device_id_, stream);
return RET_OK;
}
@ -115,7 +116,7 @@ nvinfer1::IPluginV2DynamicExt *BatchToSpacePlugin::clone() const noexcept {
return plugin;
}
size_t BatchToSpacePlugin::getSerializationSize() const noexcept { return sizeof(int) * 9; }
size_t BatchToSpacePlugin::getSerializationSize() const noexcept { return sizeof(int) * 5; }
nvinfer1::DimsExprs BatchToSpacePlugin::getOutputDimensions(int index, const nvinfer1::DimsExprs *inputs,
int nbInputDims,
@ -127,19 +128,15 @@ nvinfer1::DimsExprs BatchToSpacePlugin::getOutputDimensions(int index, const nvi
auto bh = exprBuilder.constant(bh_);
dims.d[1] = inputs[0].d[1];
auto ph_sum = exprBuilder.constant(ph0_ + ph1_);
auto pod0 = exprBuilder.operation(nvinfer1::DimensionOperation::kPROD, *inputs[0].d[2], *bh);
dims.d[2] = exprBuilder.operation(nvinfer1::DimensionOperation::kSUB, *pod0, *ph_sum);
auto pod0 = exprBuilder.operation(nvinfer1::DimensionOperation::kPROD, *inputs[0].d[INPUT_SIZE2], *bh);
dims.d[INPUT_SIZE2] = exprBuilder.operation(nvinfer1::DimensionOperation::kSUB, *pod0, *ph_sum);
auto pw_sum = exprBuilder.constant(pw0_ + pw1_);
auto pod1 = exprBuilder.operation(nvinfer1::DimensionOperation::kPROD, *inputs[0].d[3], *bh);
dims.d[3] = exprBuilder.operation(nvinfer1::DimensionOperation::kSUB, *pod1, *pw_sum);
auto pod1 = exprBuilder.operation(nvinfer1::DimensionOperation::kPROD, *inputs[0].d[INPUT_SIZE3], *bh);
dims.d[INPUT_SIZE3] = exprBuilder.operation(nvinfer1::DimensionOperation::kSUB, *pod1, *pw_sum);
return dims;
}
void BatchToSpacePlugin::serialize(void *buffer) const noexcept {
SerializeValue(&buffer, &in_, sizeof(int));
SerializeValue(&buffer, &ic_, sizeof(int));
SerializeValue(&buffer, &ih_, sizeof(int));
SerializeValue(&buffer, &iw_, sizeof(int));
SerializeValue(&buffer, &bh_, sizeof(int));
SerializeValue(&buffer, &ph0_, sizeof(int));
SerializeValue(&buffer, &ph1_, sizeof(int));

View File

@ -40,13 +40,8 @@ class BatchToSpaceTensorRT : public TensorRTOp {
constexpr auto BATCHTOSPACETENSORRT_PLUGIN_NAME{"BatchToSpacePlugin"};
class BatchToSpacePlugin : public TensorRTPlugin {
public:
BatchToSpacePlugin(const std::string name, int in, int ic, int ih, int iw, int bh, int ph0, int ph1, int pw0, int pw1,
uint32_t device_id)
BatchToSpacePlugin(const std::string name, int bh, int ph0, int ph1, int pw0, int pw1, uint32_t device_id)
: TensorRTPlugin(name, std::string(BATCHTOSPACETENSORRT_PLUGIN_NAME), device_id),
in_(in),
ic_(ic),
ih_(ih),
iw_(iw),
bh_(bh),
ph0_(ph0),
ph1_(ph1),
@ -56,23 +51,15 @@ class BatchToSpacePlugin : public TensorRTPlugin {
BatchToSpacePlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
: TensorRTPlugin(std::string(name), std::string(BATCHTOSPACETENSORRT_PLUGIN_NAME)) {
const nvinfer1::PluginField *fields = fc->fields;
in_ = static_cast<const int *>(fields[0].data)[0];
ic_ = static_cast<const int *>(fields[1].data)[0];
ih_ = static_cast<const int *>(fields[2].data)[0];
iw_ = static_cast<const int *>(fields[3].data)[0];
bh_ = static_cast<const int *>(fields[4].data)[0];
ph0_ = static_cast<const int *>(fields[5].data)[0];
ph1_ = static_cast<const int *>(fields[6].data)[0];
pw0_ = static_cast<const int *>(fields[7].data)[0];
pw1_ = static_cast<const int *>(fields[8].data)[0];
bh_ = static_cast<const int *>(fields[0].data)[0];
ph0_ = static_cast<const int *>(fields[1].data)[0];
ph1_ = static_cast<const int *>(fields[2].data)[0];
pw0_ = static_cast<const int *>(fields[3].data)[0];
pw1_ = static_cast<const int *>(fields[4].data)[0];
}
BatchToSpacePlugin(const char *name, const void *serialData, size_t serialLength)
: TensorRTPlugin(std::string(name), std::string(BATCHTOSPACETENSORRT_PLUGIN_NAME)) {
DeserializeValue(&serialData, &serialLength, &in_, sizeof(int));
DeserializeValue(&serialData, &serialLength, &ic_, sizeof(int));
DeserializeValue(&serialData, &serialLength, &ih_, sizeof(int));
DeserializeValue(&serialData, &serialLength, &iw_, sizeof(int));
DeserializeValue(&serialData, &serialLength, &bh_, sizeof(int));
DeserializeValue(&serialData, &serialLength, &ph0_, sizeof(int));
DeserializeValue(&serialData, &serialLength, &ph1_, sizeof(int));
@ -93,15 +80,11 @@ class BatchToSpacePlugin : public TensorRTPlugin {
private:
int RunCudaBatchToSpace(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs, void *const *outputs,
cudaStream_t stream);
size_t in_;
size_t ic_;
size_t ih_;
size_t iw_;
size_t bh_;
size_t ph0_;
size_t ph1_;
size_t pw0_;
size_t pw1_;
int bh_;
int ph0_;
int ph1_;
int pw0_;
int pw1_;
const std::string layer_name_;
std::string name_space_;
};

View File

@ -40,7 +40,7 @@ int ConcateTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::
return RET_OK;
}
int ConcateTensorRT::AddInnerOp(TensorRTContext *ctx) {
int ConcateTensorRT::CheckParams(TensorRTContext *ctx) {
if (ctx == nullptr || ctx->network() == nullptr) {
MS_LOG(ERROR) << "context or network is invalid";
return RET_ERROR;
@ -59,6 +59,14 @@ int ConcateTensorRT::AddInnerOp(TensorRTContext *ctx) {
<< ", but origin ms tensor has " << in_tensors_.size();
return RET_ERROR;
}
return RET_OK;
}
int ConcateTensorRT::AddInnerOp(TensorRTContext *ctx) {
if (CheckParams(ctx) != RET_OK) {
MS_LOG(ERROR) << "Check input tensors failed: " << op_name_;
return RET_ERROR;
}
nvinfer1::ITensor *trt_input_tensors[in_tensors_.size()];
int ret = PreProcessInputs(ctx, trt_input_tensors);
@ -67,8 +75,15 @@ int ConcateTensorRT::AddInnerOp(TensorRTContext *ctx) {
return ret;
}
if (type_ == ops::kNameStack) {
for (size_t i = 0; i != in_tensors_.size(); ++i) {
bool has_rank_0 = false;
for (size_t i = 0; i < in_tensors_.size(); ++i) {
if (!input(ctx, i).is_tensor) {
has_rank_0 = true;
break;
}
}
if (type_ == ops::kNameStack && !has_rank_0) {
for (size_t i = 0; i < in_tensors_.size(); ++i) {
auto shuffle_layer = ctx->network()->addShuffle(*trt_input_tensors[i]);
if (shuffle_layer == nullptr) {
MS_LOG(ERROR) << "addShuffle failed for TensorRT.";
@ -95,7 +110,7 @@ int ConcateTensorRT::AddInnerOp(TensorRTContext *ctx) {
}
concate_layer->setName(op_name_.c_str());
auto concat_output = concate_layer->getOutput(0);
ctx->RegisterTensor(ITensorHelper{concat_output, out_format_, true}, out_tensors_[0].Name());
ctx->RegisterTensor(ITensorHelper{concat_output, NCHW, true}, out_tensors_[0].Name());
this->layer_ = concate_layer;
return RET_OK;
}

View File

@ -43,6 +43,7 @@ class ConcateTensorRT : public TensorRTOp {
private:
int PreProcessInputs(TensorRTContext *ctx, nvinfer1::ITensor *trt_input_tensors[]);
int CheckParams(TensorRTContext *ctx);
Format out_format_{Format::NCHW};
bool same_format_{true};

View File

@ -0,0 +1,81 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/extendrt/delegate/tensorrt/op/constantofshape_tensorrt.h"
#include <numeric>
#include "src/extendrt/delegate/tensorrt/tensorrt_utils.h"
#include "ops/constant_of_shape.h"
namespace mindspore::lite {
int ConstantOfShapeTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
const std::vector<TensorInfo> &out_tensors) {
if (in_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size() << " : " << op_name_;
return RET_ERROR;
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size() << " : " << op_name_;
return RET_ERROR;
}
auto constofshape_op = AsOps<ops::ConstantOfShape>();
if (constofshape_op == nullptr) {
MS_LOG(ERROR) << "Failed to as operator ConstantOfShape: " << op_name_;
return RET_ERROR;
}
DataType data_type = static_cast<DataType>(constofshape_op->get_data_type());
if (data_type != DataType::kNumberTypeInt32 && data_type != DataType::kNumberTypeFloat32) {
MS_LOG(ERROR) << "Unsupported data type for " << op_name_;
return RET_ERROR;
}
return RET_OK;
}
int ConstantOfShapeTensorRT::AddInnerOp(TensorRTContext *ctx) {
auto constofshape_op = AsOps<ops::ConstantOfShape>();
if (constofshape_op == nullptr) {
MS_LOG(ERROR) << "Failed to as operator ConstantOfShape: " << op_name_;
return RET_ERROR;
}
auto &&value_vector = constofshape_op->get_value();
auto value_tensor = ctx->ConvertTo1DTensor(value_vector);
CHECK_NULL_RETURN(value_tensor);
auto unsqueeze_layer = ctx->network()->addShuffle(*value_tensor);
CHECK_NULL_RETURN(unsqueeze_layer);
auto shape = input(ctx, 0).trt_tensor_;
int rank = shape->getDimensions().d[0];
nvinfer1::Dims unsqueeze{rank};
std::fill(unsqueeze.d, unsqueeze.d + rank, 1);
unsqueeze_layer->setReshapeDimensions(unsqueeze);
unsqueeze_layer->setZeroIsPlaceholder(false);
value_tensor = unsqueeze_layer->getOutput(0);
CHECK_NULL_RETURN(value_tensor);
auto out_tensor = Broadcast(ctx, value_tensor, shape);
if (static_cast<DataType>(constofshape_op->get_data_type()) == DataType::kNumberTypeInt32) {
out_tensor = TRTTensorCast(ctx, out_tensor, nvinfer1::DataType::kINT32, op_name_ + "_cast_out");
}
auto output_helper = ITensorHelper{out_tensor, Format::NCHW, true};
ctx->RegisterTensor(output_helper, out_tensors_[0].Name());
MS_LOG(DEBUG) << "output " << GetTensorFormat(output_helper);
this->layer_ = unsqueeze_layer;
return RET_OK;
}
REGISTER_TENSORRT_CREATOR(ops::kNameConstantOfShape, ConstantOfShapeTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,38 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_CONSTANT_OF_SHAPE_TENSORRT_H_
#define MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_CONSTANT_OF_SHAPE_TENSORRT_H_
#include <string>
#include <vector>
#include <algorithm>
#include "src/extendrt/delegate/tensorrt/op/tensorrt_op.h"
namespace mindspore::lite {
class ConstantOfShapeTensorRT : public TensorRTOp {
public:
ConstantOfShapeTensorRT(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
const std::vector<TensorInfo> &out_tensors, std::string name)
: TensorRTOp(base_operator, in_tensors, out_tensors, name) {}
~ConstantOfShapeTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
const std::vector<TensorInfo> &out_tensors) override;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_CONSTANT_OF_SHAPE_TENSORRT_H_

View File

@ -34,6 +34,7 @@
#include "ops/equal.h"
#include "ops/less.h"
#include "ops/greater.h"
#include "ops/floor_mod.h"
namespace mindspore::lite {
namespace {
@ -118,8 +119,12 @@ int ElementWiseTensorRT::AddInnerOp(TensorRTContext *ctx) {
MS_LOG(ERROR) << "PreprocessInputTensors failed.";
return RET_ERROR;
}
nvinfer1::IElementWiseLayer *cal_layer =
ctx->network()->addElementWise(*x_input.trt_tensor_, *y_input.trt_tensor_, element_wise_op_);
nvinfer1::IElementWiseLayer *cal_layer;
if (type_ == ops::kNameFloorMod) {
cal_layer = AddFoorMod(ctx, x_input.trt_tensor_, y_input.trt_tensor_);
} else {
cal_layer = ctx->network()->addElementWise(*x_input.trt_tensor_, *y_input.trt_tensor_, element_wise_op_);
}
if (cal_layer == nullptr) {
MS_LOG(ERROR) << "addElementWise failed for TensorRT.";
@ -164,7 +169,8 @@ int ElementWiseTensorRT::AddInnerOp(TensorRTContext *ctx) {
MS_LOG(INFO) << "bool result cast to int32" << op_name_;
}
#endif
auto output_helper = ITensorHelper{op_out_tensor, x_input.format_, x_input.same_format_};
auto is_tensor = x_input.is_tensor || y_input.is_tensor;
auto output_helper = ITensorHelper{op_out_tensor, x_input.format_, x_input.same_format_, is_tensor};
ctx->RegisterTensor(output_helper, out_tensors_[0].Name());
MS_LOG(DEBUG) << "output " << GetTensorFormat(output_helper);
return RET_OK;
@ -198,6 +204,25 @@ int ElementWiseTensorRT::PreprocessInputTensors(TensorRTContext *ctx, ITensorHel
return RET_OK;
}
nvinfer1::IElementWiseLayer *ElementWiseTensorRT::AddFoorMod(TensorRTContext *ctx, nvinfer1::ITensor *x0_trt,
nvinfer1::ITensor *x1_trt) {
nvinfer1::IElementWiseLayer *layer_0 =
ctx->network()->addElementWise(*x0_trt, *x1_trt, nvinfer1::ElementWiseOperation::kFLOOR_DIV);
layer_0->setName((op_name_ + "_floor_div").c_str());
auto result_0 = layer_0->getOutput(0);
nvinfer1::IElementWiseLayer *layer_1 =
ctx->network()->addElementWise(*result_0, *x1_trt, nvinfer1::ElementWiseOperation::kPROD);
layer_1->setName((op_name_ + "_prod").c_str());
auto result_1 = layer_1->getOutput(0);
nvinfer1::IElementWiseLayer *layer_2 =
ctx->network()->addElementWise(*x0_trt, *result_1, nvinfer1::ElementWiseOperation::kSUB);
layer_2->setName((op_name_ + "_sub").c_str());
return layer_2;
}
nvinfer1::ITensor *ElementWiseTensorRT::AddActivation(TensorRTContext *ctx, nvinfer1::ITensor *in_tensor) {
ActivationType activation = ActivationType::NO_ACTIVATION;
if (type_ == ops::kNameAddFusion) {
@ -255,11 +280,13 @@ nvinfer1::ITensor *ElementWiseTensorRT::AddActivation(TensorRTContext *ctx, nvin
int ElementWiseTensorRT::AddConstTensor(TensorRTContext *ctx) {
int const_tensor_index = in_tensors_[0].IsConst() ? 0 : 1;
auto expect_shape = ConvertMSShape(input(ctx, 1 - const_tensor_index).trt_tensor_->getDimensions());
nvinfer1::ITensor *constant_input =
ConvertConstantTensorWithDims(ctx, in_tensors_[const_tensor_index], expect_shape, op_name_);
auto &const_tensor = in_tensors_[const_tensor_index];
nvinfer1::ITensor *constant_input = ConvertConstantTensorWithDims(ctx, const_tensor, expect_shape, op_name_);
CHECK_NULL_RETURN(constant_input);
auto const_helper = ITensorHelper{constant_input, input(ctx, 1 - const_tensor_index).format_, true};
ctx->RegisterTensor(const_helper, in_tensors_[const_tensor_index].Name());
auto const_shape = const_tensor.Shape();
auto is_scalar = const_shape.empty() || (const_shape.size() == 1 && const_shape[0] == 1);
auto const_helper = ITensorHelper{constant_input, input(ctx, 1 - const_tensor_index).format_, true, !is_scalar};
ctx->RegisterTensor(const_helper, const_tensor.Name());
return RET_OK;
}
@ -273,6 +300,7 @@ REGISTER_TENSORRT_CREATOR(ops::kNameEltwise, ElementWiseTensorRT)
REGISTER_TENSORRT_CREATOR(ops::kNameMinimum, ElementWiseTensorRT)
REGISTER_TENSORRT_CREATOR(ops::kNameMaximum, ElementWiseTensorRT)
REGISTER_TENSORRT_CREATOR(ops::kNameBiasAdd, ElementWiseTensorRT)
REGISTER_TENSORRT_CREATOR(ops::kNameFloorMod, ElementWiseTensorRT)
#if TRT_VERSION_GE(7, 2)
REGISTER_TENSORRT_CREATOR(ops::kNameEqual, ElementWiseTensorRT)
REGISTER_TENSORRT_CREATOR(ops::kNameLess, ElementWiseTensorRT)

View File

@ -37,6 +37,8 @@ class ElementWiseTensorRT : public TensorRTOp {
private:
nvinfer1::ITensor *AddActivation(TensorRTContext *ctx, nvinfer1::ITensor *in_tensor);
nvinfer1::IElementWiseLayer *AddFoorMod(TensorRTContext *ctx, nvinfer1::ITensor *x0_trt, nvinfer1::ITensor *x1_trt);
int AddConstTensor(TensorRTContext *ctx);
bool SameTensor(nvinfer1::ITensor *trt_tensor, TensorInfo *ms_tensor);

View File

@ -0,0 +1,79 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/extendrt/delegate/tensorrt/op/fill_tensorrt.h"
#include <numeric>
#include "src/extendrt/delegate/tensorrt/tensorrt_utils.h"
#include "ops/fill.h"
namespace mindspore::lite {
int FillTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
const std::vector<TensorInfo> &out_tensors) {
#if TRT_VERSION_GE(8, 2)
if (in_tensors.size() != INPUT_SIZE2) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size() << " : " << op_name_;
return RET_ERROR;
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size() << " : " << op_name_;
return RET_ERROR;
}
return RET_OK;
#else
MS_LOG(WARNING) << "low TensorRT version don't support fill op, please upgrade TensorRT version to 8.2 or higher";
return RET_ERROR;
#endif
}
int FillTensorRT::AddInnerOp(TensorRTContext *ctx) {
#if TRT_VERSION_GE(8, 2)
ITensorHelper fill_input;
nvinfer1::FillOperation op = nvinfer1::FillOperation::kLINSPACE;
auto *fill_layer = ctx->network()->addFill({}, op);
if (fill_layer == nullptr) {
MS_LOG(ERROR) << "addFill failed for TensorRT : " << op_name_;
return RET_ERROR;
}
fill_layer->setInput(0, *input(ctx, 1).trt_tensor_);
nvinfer1::ITensor *alpha_tensor = nullptr;
if (!in_tensors_[0].IsConst()) {
alpha_tensor = input(ctx, 0).trt_tensor_;
} else {
alpha_tensor =
ConvertScalarToITensor(ctx, 0, in_tensors_[0].Data(), in_tensors_[0].DataType(), op_name_ + "_alpha");
}
fill_layer->setInput(1, *alpha_tensor);
int nbdims = input(ctx, 1).trt_tensor_->getDimensions().d[0];
nvinfer1::ITensor *beta_tensor = nullptr;
if (in_tensors_[0].DataType() == DataType::kNumberTypeInt32) {
beta_tensor = ctx->ConvertTo1DTensor(std::vector<int>(nbdims, 0));
} else {
beta_tensor = ctx->ConvertTo1DTensor(std::vector<float>(nbdims, 0.f));
}
fill_layer->setInput(INPUT_SIZE2, *beta_tensor);
nvinfer1::ITensor *out_tensor = fill_layer->getOutput(0);
ctx->RegisterTensor(ITensorHelper{out_tensor, Format::NCHW, true}, out_tensors_[0].Name());
this->layer_ = fill_layer;
return RET_OK;
#else
MS_LOG(WARNING) << "low TensorRT version don't support fill op, please upgrade TensorRT version to 8.2 or higher";
return RET_ERROR;
#endif
}
REGISTER_TENSORRT_CREATOR(ops::kNameFill, FillTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,41 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_FILL_TENSORRT_H_
#define MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_FILL_TENSORRT_H_
#include <string>
#include <vector>
#include <algorithm>
#include "src/extendrt/delegate/tensorrt/op/tensorrt_op.h"
namespace mindspore::lite {
class FillTensorRT : public TensorRTOp {
public:
FillTensorRT(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
const std::vector<TensorInfo> &out_tensors, std::string name)
: TensorRTOp(base_operator, in_tensors, out_tensors, name) {}
~FillTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
const std::vector<TensorInfo> &out_tensors) override;
private:
std::vector<float> zeros_;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_FILL_TENSORRT_H_

View File

@ -88,7 +88,7 @@ int FullyConnectedTensorRT::AddInnerOp(TensorRTContext *ctx) {
}
int FullyConnectedTensorRT::PreprocessInputs(TensorRTContext *ctx, ITensorHelper *fc_input) {
auto ret = PreprocessInputs2SameDim(ctx, input(ctx, !in_tensors_[1].IsConst()), fc_input);
auto ret = PreprocessInputs2SameDim(ctx, input(ctx, in_tensors_[1].IsConst() ? 0 : 1), fc_input);
if (ret != RET_OK) {
MS_LOG(ERROR) << "PreprocessInputs2SameDim failed for " << op_name_;
return ret;

View File

@ -85,7 +85,7 @@ int GatherTensorRT::AddInnerOp(TensorRTContext *ctx) {
nvinfer1::ITensor *op_output = gather_layer->getOutput(0);
auto old_shape = ConvertMSShape(op_output->getDimensions());
// keep shape
if (in_tensors_[1].Shape().empty() && old_shape.size() > 1) {
if (indices_tensor.trt_tensor_->getDimensions().nbDims == 0 && old_shape.size() > 1) {
auto squeeze = ctx->network()->addShuffle(*op_output);
if (squeeze == nullptr) {
MS_LOG(ERROR) << "add output squeeze failed for " << op_name_;

View File

@ -0,0 +1,111 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/extendrt/delegate/tensorrt/op/greaterorequal_tensorrt.h"
#include <cuda_runtime.h>
#include <numeric>
#include <memory>
#include <vector>
#include <functional>
#include <unordered_map>
#include "NvInferRuntimeCommon.h"
#include "src/extendrt/delegate/tensorrt/tensorrt_utils.h"
#include "src/extendrt/delegate/tensorrt/cuda_impl/logical.cuh"
#include "ops/greater_equal.h"
namespace mindspore::lite {
int GreaterorequalTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
const std::vector<TensorInfo> &out_tensors) {
if (in_tensors.size() != INPUT_SIZE2) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
return RET_ERROR;
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
return RET_ERROR;
}
return RET_OK;
}
int GreaterorequalTensorRT::AddInnerOp(TensorRTContext *ctx) {
if (ctx == nullptr || ctx->network() == nullptr) {
MS_LOG(ERROR) << "network or input tensor is invalid";
return RET_ERROR;
}
nvinfer1::ITensor *inputTensors[] = {input(ctx, 0).trt_tensor_, input(ctx, 1).trt_tensor_};
auto plugin = std::make_shared<GreaterorequalPlugin>(op_name_, schema::PrimitiveType_GreaterEqual);
if (plugin == nullptr) {
MS_LOG(ERROR) << "create GreaterorequalPlugin failed for " << op_name_;
return RET_ERROR;
}
nvinfer1::IPluginV2Layer *greaterorequal_layer = ctx->network()->addPluginV2(inputTensors, 2, *plugin);
this->layer_ = greaterorequal_layer;
nvinfer1::ITensor *op_out_tensor = greaterorequal_layer->getOutput(0);
if (op_out_tensor == nullptr) {
MS_LOG(ERROR) << "greaterorequal out tensor is nullptr.";
return RET_ERROR;
}
ctx->RegisterTensor(ITensorHelper{op_out_tensor, input(ctx, 0).format_, input(ctx, 0).same_format_},
out_tensors_[0].Name());
return RET_OK;
}
REGISTER_TENSORRT_PLUGIN(GreaterorequalPluginCreater);
template class TensorRTPluginCreater<GreaterorequalPlugin>;
template <class T>
nvinfer1::PluginFieldCollection TensorRTPluginCreater<T>::field_collection_{};
template <class T>
std::vector<nvinfer1::PluginField> TensorRTPluginCreater<T>::fields_;
int GreaterorequalPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs,
void *const *outputs, void *workspace, cudaStream_t stream) noexcept {
return RunCudaGreaterorequal(inputDesc, inputs, outputs, stream);
}
int GreaterorequalPlugin::RunCudaGreaterorequal(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs,
void *const *outputs, cudaStream_t stream) {
if (inputDesc->type == nvinfer1::DataType::kINT32) {
GreaterOrEqual(static_cast<const int *>(inputs[0]), static_cast<const int *>(inputs[1]),
static_cast<int *>(outputs[0]), GetDimsVolume(inputDesc[0].dims), stream);
} else if (inputDesc->type == nvinfer1::DataType::kFLOAT) {
GreaterOrEqual(static_cast<const float *>(inputs[0]), static_cast<const float *>(inputs[1]),
static_cast<float *>(outputs[0]), GetDimsVolume(inputDesc[0].dims), stream);
} else {
MS_LOG(ERROR) << "unsupported equal data type";
return RET_ERROR;
}
return RET_OK;
}
nvinfer1::IPluginV2DynamicExt *GreaterorequalPlugin::clone() const noexcept {
auto *plugin = new (std::nothrow) GreaterorequalPlugin(*this);
if (plugin == nullptr) {
MS_LOG(ERROR) << "malloc greaterorequal plugin failed";
return nullptr;
}
plugin->setPluginNamespace(name_space_.c_str());
return plugin;
}
size_t GreaterorequalPlugin::getSerializationSize() const noexcept { return sizeof(schema::PrimitiveType); }
void GreaterorequalPlugin::serialize(void *buffer) const noexcept {
SerializeValue(&buffer, &primitive_type_, sizeof(schema::PrimitiveType));
}
REGISTER_TENSORRT_CREATOR(ops::kNameGreaterEqual, GreaterorequalTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,77 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_GREATEROREQUAL_PLUGIN_H_
#define MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_GREATEROREQUAL_PLUGIN_H_
#include <string>
#include <vector>
#include "src/extendrt/delegate/tensorrt/op/tensorrt_op.h"
#include "src/extendrt/delegate/tensorrt/op/tensorrt_plugin.h"
namespace mindspore::lite {
class GreaterorequalTensorRT : public TensorRTOp {
public:
GreaterorequalTensorRT(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
const std::vector<TensorInfo> &out_tensors, std::string name)
: TensorRTOp(base_operator, in_tensors, out_tensors, name) {}
~GreaterorequalTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
const std::vector<TensorInfo> &out_tensors) override;
};
constexpr auto GREATEROREQUAL_PLUGIN_NAME{"GreaterorequalPlugin"};
class GreaterorequalPlugin : public TensorRTPlugin {
public:
GreaterorequalPlugin(const std::string name, schema::PrimitiveType primitive_type)
: TensorRTPlugin(name, std::string(GREATEROREQUAL_PLUGIN_NAME)), primitive_type_(primitive_type) {}
GreaterorequalPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
: TensorRTPlugin(std::string(name), std::string(GREATEROREQUAL_PLUGIN_NAME)) {
const nvinfer1::PluginField *fields = fc->fields;
primitive_type_ = static_cast<const schema::PrimitiveType *>(fields[0].data)[0];
}
GreaterorequalPlugin(const char *name, const void *serialData, size_t serialLength)
: TensorRTPlugin(std::string(name), std::string(GREATEROREQUAL_PLUGIN_NAME)) {
DeserializeValue(&serialData, &serialLength, &primitive_type_, sizeof(schema::PrimitiveType));
}
GreaterorequalPlugin() = delete;
nvinfer1::IPluginV2DynamicExt *clone() const noexcept override;
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
size_t getSerializationSize() const noexcept override;
void serialize(void *buffer) const noexcept override;
private:
int RunCudaGreaterorequal(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs,
void *const *outputs, cudaStream_t stream);
const std::string layer_name_;
std::string name_space_;
schema::PrimitiveType primitive_type_;
};
class GreaterorequalPluginCreater : public TensorRTPluginCreater<GreaterorequalPlugin> {
public:
GreaterorequalPluginCreater() : TensorRTPluginCreater(std::string(GREATEROREQUAL_PLUGIN_NAME)) {}
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_GREATEROREQUAL_PLUGIN_H_

View File

@ -0,0 +1,110 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/extendrt/delegate/tensorrt/op/lessorequal_tensorrt.h"
#include <cuda_runtime.h>
#include <numeric>
#include <memory>
#include <vector>
#include <functional>
#include <unordered_map>
#include "src/extendrt/delegate/tensorrt/tensorrt_utils.h"
#include "NvInferRuntimeCommon.h"
#include "src/extendrt/delegate/tensorrt/cuda_impl/logical.cuh"
#include "ops/less_equal.h"
namespace mindspore::lite {
int LessorequalTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
const std::vector<TensorInfo> &out_tensors) {
if (in_tensors.size() != INPUT_SIZE2) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
return RET_ERROR;
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
return RET_ERROR;
}
return RET_OK;
}
int LessorequalTensorRT::AddInnerOp(TensorRTContext *ctx) {
if (ctx == nullptr || ctx->network() == nullptr) {
MS_LOG(ERROR) << "network or input tensor is invalid";
return RET_ERROR;
}
nvinfer1::ITensor *inputTensors[] = {input(ctx, 0).trt_tensor_, input(ctx, 1).trt_tensor_};
auto plugin = std::make_shared<LessorequalPlugin>(op_name_, schema::PrimitiveType_LessEqual);
if (plugin == nullptr) {
MS_LOG(ERROR) << "create LessorequalPlugin failed for " << op_name_;
return RET_ERROR;
}
nvinfer1::IPluginV2Layer *lessorequal_layer = ctx->network()->addPluginV2(inputTensors, 2, *plugin);
this->layer_ = lessorequal_layer;
nvinfer1::ITensor *op_out_tensor = lessorequal_layer->getOutput(0);
if (op_out_tensor == nullptr) {
MS_LOG(ERROR) << "lessorequal out tensor is nullptr.";
return RET_ERROR;
}
ctx->RegisterTensor(ITensorHelper{op_out_tensor, input(ctx, 0).format_, input(ctx, 0).same_format_},
out_tensors_[0].Name());
return RET_OK;
}
REGISTER_TENSORRT_PLUGIN(LessorequalPluginCreater);
template class TensorRTPluginCreater<LessorequalPlugin>;
template <class T>
nvinfer1::PluginFieldCollection TensorRTPluginCreater<T>::field_collection_{};
template <class T>
std::vector<nvinfer1::PluginField> TensorRTPluginCreater<T>::fields_;
int LessorequalPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs,
void *const *outputs, void *workspace, cudaStream_t stream) noexcept {
return RunCudaLessorequal(inputDesc, inputs, outputs, stream);
}
int LessorequalPlugin::RunCudaLessorequal(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs,
void *const *outputs, cudaStream_t stream) {
if (inputDesc->type == nvinfer1::DataType::kINT32) {
LessOrEqual(static_cast<const int *>(inputs[0]), static_cast<const int *>(inputs[1]),
static_cast<int *>(outputs[0]), GetDimsVolume(inputDesc[0].dims), stream);
} else if (inputDesc->type == nvinfer1::DataType::kFLOAT) {
LessOrEqual(static_cast<const float *>(inputs[0]), static_cast<const float *>(inputs[1]),
static_cast<float *>(outputs[0]), GetDimsVolume(inputDesc[0].dims), stream);
} else {
MS_LOG(ERROR) << "unsupported equal data type";
return RET_ERROR;
}
return RET_OK;
}
nvinfer1::IPluginV2DynamicExt *LessorequalPlugin::clone() const noexcept {
auto *plugin = new (std::nothrow) LessorequalPlugin(*this);
if (plugin == nullptr) {
MS_LOG(ERROR) << "malloc lessorequal plugin failed";
return nullptr;
}
plugin->setPluginNamespace(name_space_.c_str());
return plugin;
}
size_t LessorequalPlugin::getSerializationSize() const noexcept { return sizeof(schema::PrimitiveType); }
void LessorequalPlugin::serialize(void *buffer) const noexcept {
SerializeValue(&buffer, &primitive_type_, sizeof(schema::PrimitiveType));
}
REGISTER_TENSORRT_CREATOR(ops::kNameLessEqual, LessorequalTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,77 @@
/**
* Copyright 2022 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_LESSOREQUAL_PLUGIN_H_
#define MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_LESSOREQUAL_PLUGIN_H_
#include <string>
#include <vector>
#include "src/extendrt/delegate/tensorrt/op/tensorrt_op.h"
#include "src/extendrt/delegate/tensorrt/op/tensorrt_plugin.h"
namespace mindspore::lite {
class LessorequalTensorRT : public TensorRTOp {
public:
LessorequalTensorRT(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
const std::vector<TensorInfo> &out_tensors, std::string name)
: TensorRTOp(base_operator, in_tensors, out_tensors, name) {}
~LessorequalTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
const std::vector<TensorInfo> &out_tensors) override;
};
constexpr auto LESSOREQUAL_PLUGIN_NAME{"LessorequalPlugin"};
class LessorequalPlugin : public TensorRTPlugin {
public:
LessorequalPlugin(const std::string name, schema::PrimitiveType primitive_type)
: TensorRTPlugin(name, std::string(LESSOREQUAL_PLUGIN_NAME)), primitive_type_(primitive_type) {}
LessorequalPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
: TensorRTPlugin(std::string(name), std::string(LESSOREQUAL_PLUGIN_NAME)) {
const nvinfer1::PluginField *fields = fc->fields;
primitive_type_ = static_cast<const schema::PrimitiveType *>(fields[0].data)[0];
}
LessorequalPlugin(const char *name, const void *serialData, size_t serialLength)
: TensorRTPlugin(std::string(name), std::string(LESSOREQUAL_PLUGIN_NAME)) {
DeserializeValue(&serialData, &serialLength, &primitive_type_, sizeof(schema::PrimitiveType));
}
LessorequalPlugin() = delete;
nvinfer1::IPluginV2DynamicExt *clone() const noexcept override;
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
size_t getSerializationSize() const noexcept override;
void serialize(void *buffer) const noexcept override;
private:
int RunCudaLessorequal(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs, void *const *outputs,
cudaStream_t stream);
const std::string layer_name_;
std::string name_space_;
schema::PrimitiveType primitive_type_;
};
class LessorequalPluginCreater : public TensorRTPluginCreater<LessorequalPlugin> {
public:
LessorequalPluginCreater() : TensorRTPluginCreater(std::string(LESSOREQUAL_PLUGIN_NAME)) {}
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_LESSOREQUAL_PLUGIN_H_

View File

@ -281,6 +281,7 @@ nvinfer1::ITensor *LSTMTensorRT::ConcateAll(TensorRTContext *ctx, std::vector<nv
return nullptr;
}
concat->setAxis(axis);
this->layer_ = concat;
return concat->getOutput(0);
}

View File

@ -103,7 +103,6 @@ class LSTMTensorRT : public TensorRTOp {
int *state_weight_offset, int *bias_offset, LstmWeights *lstm_weights,
const LstmState &next_state);
// nvinfer1::INetworkDefinition *network_{nullptr};
nvinfer1::ITensor *input_data_{nullptr};
nvinfer1::ITensor *sequence_size_input_{nullptr};
nvinfer1::ITensor *op_data_out_{nullptr};

View File

@ -235,8 +235,8 @@ nvinfer1::ITensor *MatMulTensorRT::AddBias(TensorRTContext *ctx, nvinfer1::ITens
if (in_tensors_.size() == kBiasIndex + 1) {
nvinfer1::ITensor *bias = nullptr;
if (in_tensors_[kBiasIndex].Shape().size() < static_cast<size_t>(out_tensor->getDimensions().nbDims)) {
std::vector<int64_t> expect_dims(out_tensors_[0].Shape());
expect_dims[0] = out_tensor->getDimensions().d[0];
std::vector<int64_t> expect_dims(input_tensor->getDimensions().nbDims, 1);
expect_dims[expect_dims.size() - 1] = in_tensors_[kBiasIndex].Shape().back();
bias = ConvertTensorWithExpandDims(ctx, in_tensors_[kBiasIndex], expect_dims, op_name_);
} else if (in_tensors_[kBiasIndex].Shape().size() == static_cast<size_t>(out_tensor->getDimensions().nbDims)) {
bias = ConvertConstantTensor(ctx, in_tensors_[kBiasIndex], op_name_);

View File

@ -80,7 +80,6 @@ int PadTensorRT::AddInnerOp(TensorRTContext *ctx) {
int h_post;
int w_pre;
int w_post;
// MS_LOG(INFO) << in_tensors_[0].format();
if (SameDims(pad_input->getDimensions(), in_tensors_[0].Shape())) {
// NCHW: 0: N_pre, 1: N_post, 2: C_pre, 3: C_post, 4: H_pre, 5: H_post, 6: W_pre, 7: W_post
if (*padding_data != 0 || *(padding_data + 1) != 0 || *(padding_data + 2) != 0 || *(padding_data + 3) != 0) {

View File

@ -61,7 +61,7 @@ int ReduceTensorRT::AddInnerOp(TensorRTContext *ctx) {
CHECK_NULL_RETURN(reduce_input);
}
uint32_t reduceAxis = GetAxis();
uint32_t reduceAxis = GetAxis(ctx);
auto reduce_operation_opt = TryConvertTRTReduceMode(reduce_op->get_mode());
if (!reduce_operation_opt) {
MS_LOG(WARNING) << "invalid reduce for TensorRT, need check: " << static_cast<int>(reduce_op->get_mode());
@ -88,7 +88,7 @@ int ReduceTensorRT::AddInnerOp(TensorRTContext *ctx) {
return RET_OK;
}
uint32_t ReduceTensorRT::GetAxis() {
uint32_t ReduceTensorRT::GetAxis(TensorRTContext *ctx) {
// axis
uint32_t reduceAxis = 0;
auto axis_tensor = this->in_tensors_[1];
@ -102,7 +102,8 @@ uint32_t ReduceTensorRT::GetAxis() {
auto axis_data = reinterpret_cast<const int *>(axis_tensor.Data());
CHECK_NULL_RETURN(axis_data);
for (int i = 0; i < axis_tensor.ElementNum(); i++) {
int format_axis_data = (*axis_data == -1) ? in_tensors_[0].Shape().size() - 1 : *axis_data;
auto input_0 = input(ctx, 0).trt_tensor_;
int format_axis_data = (*axis_data == -1) ? input_0->getDimensions().nbDims - 1 : *axis_data;
MS_LOG(DEBUG) << op_name_ << " reduceAxis at index : " << *axis_data;
reduceAxis |= 1u << format_axis_data;
axis_data++;

View File

@ -38,7 +38,7 @@ class ReduceTensorRT : public TensorRTOp {
const std::vector<TensorInfo> &out_tensors) override;
private:
uint32_t GetAxis();
uint32_t GetAxis(TensorRTContext *ctx);
Format out_format_;
};
} // namespace mindspore::lite

View File

@ -40,7 +40,16 @@ int ResizeTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::v
MS_LOG(ERROR) << "convert failed " << op_name_;
return RET_ERROR;
}
dynamic_shape_params_.support_hw_dynamic_ = false;
if (resize_op_->get_coordinate_transform_mode() == CoordinateTransformMode::ALIGN_CORNERS &&
resize_op_->get_method() == ResizeMethod::LINEAR) {
MS_LOG(ERROR) << "Resize op do not support coordinate_transform_mode == ALIGN_CORNERS when method == LINEAR "
<< op_name_;
return RET_ERROR;
}
dynamic_shape_params_.support_hw_dynamic_ =
(resize_op_->get_new_height() > 0 && resize_op_->get_new_width() > 0) ? false : true;
dynamic_shape_params_.support_hw_dynamic_ &= resize_op_->get_method() != ResizeMethod::LINEAR;
return RET_OK;
}
@ -53,16 +62,7 @@ int ResizeTensorRT::AddInnerOp(TensorRTContext *ctx) {
nvinfer1::ITensor *resize_in_tensor = input(ctx, 0).trt_tensor_;
MS_LOG(DEBUG) << "origin input " << GetTensorFormat(input(ctx, 0));
MS_LOG(DEBUG) << "after transpose input " << GetTensorFormat(resize_in_tensor, Format::NCHW, true);
auto method = resize_op_->get_method();
nvinfer1::ITensor *output_tensor = nullptr;
if (method == ResizeMethod::LINEAR) {
MS_LOG(INFO) << "using plugin for resize";
output_tensor = RunPlugin(ctx, resize_in_tensor);
} else {
output_tensor = RunTensorRT(ctx, resize_in_tensor);
}
nvinfer1::ITensor *output_tensor = RunTensorRT(ctx, resize_in_tensor);
if (output_tensor == nullptr) {
return RET_ERROR;
}
@ -73,26 +73,35 @@ int ResizeTensorRT::AddInnerOp(TensorRTContext *ctx) {
}
nvinfer1::ITensor *ResizeTensorRT::RunPlugin(TensorRTContext *ctx, nvinfer1::ITensor *resize_in_tensor) {
constexpr int non_const_resize_input_count = 2;
if (ReadyInputsNumber(ctx) == non_const_resize_input_count) {
std::vector<int> resize_shape;
if (ReadyInputsNumber(ctx) == INPUT_SIZE2) {
MS_LOG(ERROR) << "dynamic input is not support!";
return nullptr;
} else {
const int *resize_ptr = reinterpret_cast<const int *>(in_tensors_[1].Data());
std::vector<int> resize_shape;
for (int i = 0; i != in_tensors_[1].ElementNum(); ++i) {
resize_shape.push_back(*(resize_ptr + i));
if (in_tensors_.size() == 1) {
resize_shape.push_back(static_cast<float>(resize_op_->get_new_height()));
resize_shape.push_back(static_cast<float>(resize_op_->get_new_width()));
} else {
const int *resize_ptr = reinterpret_cast<const int *>(in_tensors_[1].Data());
for (int i = 0; i != in_tensors_[1].ElementNum(); ++i) {
resize_shape.push_back(*(resize_ptr + i));
}
if (resize_shape.size() != INPUT_SIZE2) {
MS_LOG(ERROR) << "Do not support resize number more than 2";
return nullptr;
}
}
constexpr int resize_hw_dims_count = 2;
if (resize_shape.size() != resize_hw_dims_count) {
MS_LOG(ERROR) << "Do not support resize number more than 2";
bool using_half_pixel = (resize_op_->get_coordinate_transform_mode() == CoordinateTransformMode::HALF_PIXEL);
auto plugin = std::make_shared<ResizeLinear2DPlugin>(resize_in_tensor->getName(), resize_shape[0], resize_shape[1],
using_half_pixel, device_id_);
if (plugin == nullptr) {
MS_LOG(ERROR) << "add ResizeLinear2D plugin failed for " << op_name_;
return nullptr;
}
auto plugin =
std::make_shared<ResizeLinear2DPlugin>(resize_in_tensor->getName(), resize_shape[0], resize_shape[1], device_id_);
nvinfer1::ITensor *inputTensors[] = {resize_in_tensor};
nvinfer1::IPluginV2Layer *resize_layer = ctx->network()->addPluginV2(inputTensors, 1, *plugin);
if (resize_layer == nullptr) {
MS_LOG(ERROR) << "add spacetobatch op failed for TensorRT.";
MS_LOG(ERROR) << "add resize op failed for TensorRT.";
return nullptr;
}
resize_layer->setName(op_name_.c_str());
@ -124,18 +133,38 @@ nvinfer1::ITensor *ResizeTensorRT::RunTensorRT(TensorRTContext *ctx, nvinfer1::I
int ResizeTensorRT::SetOutputDims(TensorRTContext *ctx, nvinfer1::ITensor *resize_in_tensor,
nvinfer1::IResizeLayer *resize_layer) {
auto in_dims = resize_in_tensor->getDimensions();
nvinfer1::Dims in_dims = resize_in_tensor->getDimensions();
if (in_tensors_.size() == 1 && in_dims.nbDims == DIMENSION_4D) {
// hw is static, but has dynamic batch size
float scales[DIMENSION_4D]{1, 1, 1, 1};
scales[kNCHW_H] = static_cast<float>(resize_op_->get_new_height()) / static_cast<float>(in_dims.d[kNCHW_H]);
scales[kNCHW_W] = static_cast<float>(resize_op_->get_new_width()) / static_cast<float>(in_dims.d[kNCHW_W]);
resize_layer->setScales(scales, DIMENSION_4D);
nvinfer1::Dims4 new_dims(in_dims.d[0], in_dims.d[1], resize_op_->get_new_height(),
resize_op_->get_new_width()); // nchw
resize_layer->setOutputDimensions(new_dims); // static shape
} else {
auto shape_value_tensor = in_tensors_[1];
if (!shape_value_tensor.IsConst() && in_tensors_.size() >= INPUT_SIZE2) {
// dynamic output shape
resize_layer->setInput(1, *input(ctx, 1).trt_tensor_);
auto shape_tensor = input(ctx, 1).trt_tensor_;
if (shape_tensor->getDimensions().d[0] == INPUT_SIZE4) {
resize_layer->setInput(1, *shape_tensor);
} else {
auto in_tensor_shape = ctx->network()->addShape(*resize_in_tensor)->getOutput(0);
CHECK_NULL_RETURN(in_tensor_shape);
nvinfer1::Dims start_dims{1, {0}};
nvinfer1::Dims size_dims{1, {2}};
nvinfer1::Dims stride_dims{1, {1}};
auto nc = ctx->network()->addSlice(*in_tensor_shape, start_dims, size_dims, stride_dims)->getOutput(0);
CHECK_NULL_RETURN(nc);
nvinfer1::ITensor *trt_input_tensors[INPUT_SIZE2];
trt_input_tensors[0] = nc;
trt_input_tensors[1] = shape_tensor;
auto concat_layer = ctx->network()->addConcatenation(trt_input_tensors, INPUT_SIZE2);
concat_layer->setAxis(0);
auto nchw = concat_layer->getOutput(0);
CHECK_NULL_RETURN(nchw);
nchw = TRTTensorCast(ctx, nchw, nvinfer1::DataType::kINT32, op_name_ + "_input_nchw_to_int32");
resize_layer->setInput(1, *nchw);
}
} else {
std::vector<float> out_shape;
ParseValueFromShapeTensor(ctx, shape_value_tensor, &out_shape);
@ -170,7 +199,6 @@ int ResizeTensorRT::SetOutputDims(TensorRTContext *ctx, nvinfer1::ITensor *resiz
float scales[DIMENSION_4D]{1, 1, 1, 1};
scales[kNCHW_H] = out_shape[kNCHW_H];
scales[kNCHW_W] = out_shape[kNCHW_W];
MS_LOG(WARNING) << scales[0] << " " << scales[1] << " " << scales[2] << " " << scales[3];
resize_layer->setScales(scales, DIMENSION_4D);
}
}
@ -237,12 +265,25 @@ int ResizeTensorRT::SetParams(nvinfer1::IResizeLayer *resize_layer) {
}
resize_layer->setResizeMode(method_map.at(method));
// unsupported for trt6, but support setCoordinateTransformation() in version8
auto coordinate_transform_mode = resize_op_->get_coordinate_transform_mode();
// unsupported for trt6, but support setCoordinateTransformation() in version8
#if TRT_VERSION_GE(8, 0)
std::map<CoordinateTransformMode, nvinfer1::ResizeCoordinateTransformation> transform_map = {
{CoordinateTransformMode::ASYMMETRIC, nvinfer1::ResizeCoordinateTransformation::kASYMMETRIC},
{CoordinateTransformMode::ALIGN_CORNERS, nvinfer1::ResizeCoordinateTransformation::kALIGN_CORNERS},
{CoordinateTransformMode::HALF_PIXEL, nvinfer1::ResizeCoordinateTransformation::kHALF_PIXEL}};
auto transform_it = transform_map.find(coordinate_transform_mode);
if (transform_it == transform_map.end()) {
MS_LOG(ERROR) << op_name_ << " not support resize coordinate transform mode " << coordinate_transform_mode;
return RET_ERROR;
}
resize_layer->setCoordinateTransformation(transform_it->second);
#else
if (coordinate_transform_mode != CoordinateTransformMode::ASYMMETRIC) {
MS_LOG(WARNING) << op_name_ << " has coordinate_transform_mode may not supported: "
<< static_cast<int>(coordinate_transform_mode);
}
#endif
return RET_OK;
}
@ -265,8 +306,8 @@ int ResizeLinear2DPlugin::RunCudaResizeLinear2D(const nvinfer1::PluginTensorDesc
float h_scale = dims.d[kNCHW_H] / static_cast<float>(resize_h_);
float w_scale = dims.d[kNCHW_W] / static_cast<float>(resize_w_);
CalResizeBilinear(static_cast<const float *>(inputs[0]), dims.d[0], dims.d[1], dims.d[kNCHW_H], dims.d[kNCHW_W],
resize_h_, resize_w_, h_scale, w_scale, false, static_cast<float *>(outputs[0]), device_id_,
stream);
resize_h_, resize_w_, h_scale, w_scale, using_half_pixel_, static_cast<float *>(outputs[0]),
device_id_, stream);
return RET_OK;
}
@ -280,7 +321,7 @@ nvinfer1::IPluginV2DynamicExt *ResizeLinear2DPlugin::clone() const noexcept {
return plugin;
}
size_t ResizeLinear2DPlugin::getSerializationSize() const noexcept { return sizeof(int) * 2; }
size_t ResizeLinear2DPlugin::getSerializationSize() const noexcept { return sizeof(int) * INPUT_SIZE2 + sizeof(bool); }
nvinfer1::DimsExprs ResizeLinear2DPlugin::getOutputDimensions(int32_t index, const nvinfer1::DimsExprs *inputs,
int nbInputDims,
@ -299,6 +340,7 @@ nvinfer1::DimsExprs ResizeLinear2DPlugin::getOutputDimensions(int32_t index, con
void ResizeLinear2DPlugin::serialize(void *buffer) const noexcept {
SerializeValue(&buffer, &resize_h_, sizeof(int));
SerializeValue(&buffer, &resize_w_, sizeof(int));
SerializeValue(&buffer, &using_half_pixel_, sizeof(bool));
}
REGISTER_TENSORRT_CREATOR(ops::kNameResize, ResizeTensorRT)
} // namespace mindspore::lite

View File

@ -41,10 +41,11 @@ class ResizeTensorRT : public TensorRTOp {
const std::vector<TensorInfo> &out_tensors) override;
private:
int SetOutputDims(TensorRTContext *ctx, nvinfer1::ITensor *resize_in_tensor, nvinfer1::IResizeLayer *resize_layer);
nvinfer1::ITensor *RunPlugin(TensorRTContext *ctx, nvinfer1::ITensor *resize_in_tensor);
nvinfer1::ITensor *RunTensorRT(TensorRTContext *ctx, nvinfer1::ITensor *resize_in_tensor);
int SetOutputDims(TensorRTContext *ctx, nvinfer1::ITensor *resize_in_tensor, nvinfer1::IResizeLayer *resize_layer);
void ParseValueFromShapeTensor(TensorRTContext *ctx, const TensorInfo &shape_value_tensor,
std::vector<float> *out_shape);
@ -61,22 +62,25 @@ class ResizeTensorRT : public TensorRTOp {
constexpr auto RESIZELINEAR2D_PLUGIN_NAME{"ResizeLinear2DPlugin"};
class ResizeLinear2DPlugin : public TensorRTPlugin {
public:
ResizeLinear2DPlugin(const std::string name, int resize_h, int resize_w, uint32_t device_id)
ResizeLinear2DPlugin(const std::string name, int resize_h, int resize_w, bool using_half_pixel, uint32_t device_id)
: TensorRTPlugin(name, std::string(RESIZELINEAR2D_PLUGIN_NAME), device_id),
resize_h_(resize_h),
resize_w_(resize_w) {}
resize_w_(resize_w),
using_half_pixel_(using_half_pixel) {}
ResizeLinear2DPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
: TensorRTPlugin(std::string(name), std::string(RESIZELINEAR2D_PLUGIN_NAME)) {
const nvinfer1::PluginField *fields = fc->fields;
resize_h_ = static_cast<const int *>(fields[0].data)[0];
resize_w_ = static_cast<const int *>(fields[1].data)[0];
using_half_pixel_ = static_cast<const bool *>(fields[2].data)[0];
}
ResizeLinear2DPlugin(const char *name, const void *serialData, size_t serialLength)
: TensorRTPlugin(std::string(name), std::string(RESIZELINEAR2D_PLUGIN_NAME)) {
DeserializeValue(&serialData, &serialLength, &resize_h_, sizeof(int));
DeserializeValue(&serialData, &serialLength, &resize_w_, sizeof(int));
DeserializeValue(&serialData, &serialLength, &using_half_pixel_, sizeof(bool));
}
ResizeLinear2DPlugin() = delete;
@ -94,6 +98,7 @@ class ResizeLinear2DPlugin : public TensorRTPlugin {
void *const *outputs, cudaStream_t stream);
int resize_h_;
int resize_w_;
bool using_half_pixel_;
const std::string layer_name_;
std::string name_space_;
};

View File

@ -48,7 +48,7 @@ class ScaleTensorRT : public TensorRTOp {
Format out_format_;
bool out_same_format_{false};
bool out_same_format_{true};
nvinfer1::ScaleMode mode_;

View File

@ -16,6 +16,7 @@
#include "src/extendrt/delegate/tensorrt/op/shape_tensorrt.h"
#include "ops/shape.h"
#include "ops/dynamic_shape.h"
namespace mindspore::lite {
int ShapeTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
@ -32,8 +33,6 @@ int ShapeTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::ve
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
return RET_ERROR;
}
dynamic_shape_params_.support_dynamic_ = false;
dynamic_shape_params_.support_hw_dynamic_ = false;
return RET_OK;
}
int ShapeTensorRT::AddInnerOp(TensorRTContext *ctx) {
@ -55,4 +54,5 @@ int ShapeTensorRT::AddInnerOp(TensorRTContext *ctx) {
return RET_OK;
}
REGISTER_TENSORRT_CREATOR(ops::kNameShape, ShapeTensorRT)
REGISTER_TENSORRT_CREATOR(ops::kNameDynamicShape, ShapeTensorRT)
} // namespace mindspore::lite

View File

@ -72,6 +72,11 @@ int ShuffleTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::
MS_LOG(ERROR) << "Unsupported shape tensor of " << type_;
return RET_ERROR;
}
} else if (type_ == ops::kNameBroadcastTo) {
if (in_tensors.size() != INPUT_SIZE2) {
MS_LOG(ERROR) << "PrimitiveType_Transpose Unsupported in_tensors size: " << in_tensors.size();
return RET_ERROR;
}
} else {
MS_LOG(ERROR) << "Unsupported op type:" << type_;
return RET_ERROR;
@ -123,12 +128,19 @@ int ShuffleTensorRT::AddInnerOp(TensorRTContext *ctx) {
MS_LOG(ERROR) << "Unsupported op type for " << op_name_;
return RET_ERROR;
}
if (ret == RET_OK) {
auto output_helper = ITensorHelper{shuffler_output_, out_format_, true};
ctx->RegisterTensor(output_helper, out_tensors_[0].Name());
MS_LOG(DEBUG) << "output " << GetTensorFormat(output_helper);
if (ret != RET_OK) {
MS_LOG(ERROR) << "AddOp failed for " << op_name_;
return ret;
}
return ret;
if (shuffler_output_ == nullptr) {
MS_LOG(ERROR) << "output tensor create failed for " << op_name_;
return RET_ERROR;
}
auto output_helper = ITensorHelper{shuffler_output_, out_format_, true};
ctx->RegisterTensor(output_helper, out_tensors_[0].Name());
MS_LOG(DEBUG) << "output " << GetTensorFormat(output_helper);
return RET_OK;
}
int ShuffleTensorRT::InputTensorPreprocess(TensorRTContext *ctx) {
@ -189,7 +201,7 @@ int ShuffleTensorRT::AddUnsqueezeOp(nvinfer1::IShuffleLayer *shuffle_layer) {
nvinfer1::ITensor *expand_input = shuffler_input_;
if (input(ctx_, 0).is_tensor == true) {
for (size_t i = 0; i < param_axis_.size(); i++) {
expand_input = ExpandDim(shuffle_layer, expand_input, param_axis_[i]);
expand_input = ExpandDim(ctx_, expand_input, param_axis_[i]);
}
}
shuffler_output_ = expand_input;
@ -242,8 +254,12 @@ int ShuffleTensorRT::AddReshapeOp(nvinfer1::IShuffleLayer *shuffle_layer) {
auto &shape_tensor = in_tensors_[1];
if (shape_tensor.IsConst()) {
// static shuffle layer
shuffle_layer->setReshapeDimensions(
InferReshapeDims(shuffler_input_->getDimensions(), in_tensors_[0].Shape(), out_tensors_[0].Shape()));
nvinfer1::Dims reshape_dims{static_cast<int>(shape_tensor.ElementNum())};
const int *shape_ptr = reinterpret_cast<const int *>(shape_tensor.Data());
for (int i = 0; i != shape_tensor.ElementNum(); ++i) {
reshape_dims.d[i] = *(shape_ptr + i);
}
shuffle_layer->setReshapeDimensions(reshape_dims);
} else {
if (in_tensors_.size() != INPUT_SIZE2) {
MS_LOG(ERROR) << "invalid shape tensor for reshape " << op_name_;
@ -270,6 +286,10 @@ int ShuffleTensorRT::AddFlattenOp(nvinfer1::IShuffleLayer *shuffle_layer) {
}
int ShuffleTensorRT::AddExpandDimsOp(nvinfer1::IShuffleLayer *shuffle_layer) {
if (!input(ctx_, 0).is_tensor) {
shuffler_output_ = shuffler_input_;
return RET_OK;
}
int axis;
if (in_tensors_[1].DataType() == DataType::kNumberTypeInt64) {
auto axis_data = static_cast<const int64_t *>(in_tensors_[1].Data());
@ -281,114 +301,44 @@ int ShuffleTensorRT::AddExpandDimsOp(nvinfer1::IShuffleLayer *shuffle_layer) {
MS_LOG(WARNING) << op_name_ << " axis tensor data type is " << static_cast<int>(in_tensors_[1].DataType());
return RET_ERROR;
}
shuffler_output_ = ExpandDim(shuffle_layer, shuffler_input_, axis);
shuffler_output_ = ExpandDim(ctx_, shuffler_input_, axis);
return shuffler_output_ == nullptr ? RET_ERROR : RET_OK;
}
int ShuffleTensorRT::AddBroadcastToOp(nvinfer1::IShuffleLayer *shuffle_layer) {
if (out_tensors_[0].ElementNum() != in_tensors_[0].ElementNum() &&
out_tensors_[0].Shape().size() == in_tensors_[0].Shape().size()) {
MS_LOG(WARNING) << "broadcast element cnt changes, ignore broadcast for " << op_name_;
shuffle_layer->setReshapeDimensions(shuffler_input_->getDimensions());
MS_LOG(WARNING) << "here " << op_name_;
} else if (out_tensors_[0].ElementNum() == in_tensors_[0].ElementNum()) {
nvinfer1::Dims new_dims = ConvertCudaDims(out_tensors_[0].Shape());
if (new_dims.nbDims == -1) {
MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
return RET_ERROR;
}
new_dims.d[0] = shuffler_input_->getDimensions().d[0];
shuffle_layer->setReshapeDimensions(new_dims);
MS_LOG(WARNING) << "here " << op_name_;
if (!in_tensors_[1].IsConst()) {
auto input_shape_tensor = input(ctx_, 1).trt_tensor_;
shuffler_output_ = Broadcast(ctx_, shuffler_input_, input_shape_tensor);
} else {
MS_LOG(ERROR) << "broadcast needs check for " << op_name_;
std::vector<int> input_shape;
const int *shape_ptr = reinterpret_cast<const int *>(in_tensors_[1].Data());
for (int i = 0; i != in_tensors_[1].ElementNum(); ++i) {
input_shape.push_back(*(shape_ptr + i));
}
nvinfer1::Dims in_tensor_dims = shuffler_input_->getDimensions();
auto input_shape_tensor = ctx_->ConvertTo1DTensor(input_shape);
while (in_tensor_dims.nbDims < static_cast<int64_t>(input_shape.size())) {
shuffler_input_ = ExpandDim(ctx_, shuffler_input_, 0);
if (shuffler_input_->getDimensions().nbDims == -1) {
MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
return RET_ERROR;
}
shuffle_layer->setReshapeDimensions(shuffler_input_->getDimensions());
shuffler_input_ = shuffle_layer->getOutput(0);
in_tensor_dims = shuffler_input_->getDimensions();
}
auto size_tensor = ctx_->network()->addShape(*shuffler_input_)->getOutput(0);
size_tensor = ctx_->network()
->addElementWise(*input_shape_tensor, *size_tensor, nvinfer1::ElementWiseOperation::kMAX)
->getOutput(0);
shuffler_output_ = Broadcast(ctx_, shuffler_input_, size_tensor);
}
shuffler_output_ = shuffle_layer->getOutput(0);
return shuffler_output_ == nullptr ? RET_ERROR : RET_OK;
}
nvinfer1::ITensor *ShuffleTensorRT::ExpandDim(nvinfer1::IShuffleLayer *shuffle_layer, nvinfer1::ITensor *input_tensor,
int axis) {
auto input_dims = input_tensor->getDimensions();
// if expand dim not at last dim and shape is dynamic, change to expanddim at last dim and transpose
bool special_expand = false;
for (int i = 0; i < input_dims.nbDims; i++) {
special_expand = special_expand || input_dims.d[i] == -1;
}
special_expand = special_expand && (axis != -1 && axis != input_dims.nbDims - 1);
if (special_expand) {
std::vector<int64_t> new_shape;
for (int i = 0; i < input_dims.nbDims; i++) {
new_shape.push_back(input_dims.d[i] == -1 ? 0 : input_dims.d[i]);
}
new_shape.push_back(1);
nvinfer1::Dims new_dims = ConvertCudaDims(new_shape);
if (new_dims.nbDims == -1) {
MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
return nullptr;
}
shuffle_layer->setReshapeDimensions(new_dims);
// transpose
nvinfer1::Permutation perm{};
for (int i = 0; i < new_dims.nbDims; i++) {
if (i < axis) {
perm.order[i] = i;
} else if (i == axis) {
perm.order[i] = new_dims.nbDims - 1;
} else {
perm.order[i] = i - 1;
}
}
nvinfer1::IShuffleLayer *trans_layer = ctx_->network()->addShuffle(*shuffle_layer->getOutput(0));
if (trans_layer == nullptr) {
MS_LOG(ERROR) << "add transpose layer failed for special expand dims op " << op_name_;
return nullptr;
}
trans_layer->setFirstTranspose(perm);
return trans_layer->getOutput(0);
} else {
std::vector<int64_t> new_shape;
for (int i = 0; i < input_dims.nbDims; i++) {
if (axis == i) {
new_shape.push_back(1);
}
new_shape.push_back(input_dims.d[i] == -1 ? 0 : input_dims.d[i]);
}
if (axis == -1 || axis == input_dims.nbDims) {
new_shape.push_back(1);
}
nvinfer1::Dims new_dims = ConvertCudaDims(new_shape);
if (new_dims.nbDims == -1) {
MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
return nullptr;
}
shuffle_layer->setReshapeDimensions(new_dims);
return shuffle_layer->getOutput(0);
}
}
nvinfer1::Dims ShuffleTensorRT::InferReshapeDims(const nvinfer1::Dims &input_dims,
const std::vector<int64_t> &ms_input_shape,
const std::vector<int64_t> &ms_output_shape) {
// tensorrt support infer shape of 0 and -1
nvinfer1::Dims reshape_dims = ConvertCudaDims(ms_output_shape);
if (reshape_dims.nbDims == -1) {
MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
return reshape_dims;
}
for (int i = 0; i < reshape_dims.nbDims; i++) {
if (input_dims.d[i] == -1) {
if (ms_input_shape[i] == ms_output_shape[i]) {
reshape_dims.d[i] = 0;
} else {
reshape_dims.d[i] = -1;
}
}
MS_LOG(DEBUG) << "reshape infer_index " << i << " value: " << reshape_dims.d[i];
}
return reshape_dims;
}
REGISTER_TENSORRT_CREATOR(ops::kNameUnsqueeze, ShuffleTensorRT)
REGISTER_TENSORRT_CREATOR(ops::kNameSqueeze, ShuffleTensorRT)
REGISTER_TENSORRT_CREATOR(ops::kNameReshape, ShuffleTensorRT)

View File

@ -42,9 +42,6 @@ class ShuffleTensorRT : public TensorRTOp {
int AddFlattenOp(nvinfer1::IShuffleLayer *shuffle_layer);
int AddExpandDimsOp(nvinfer1::IShuffleLayer *shuffle_layer);
int AddBroadcastToOp(nvinfer1::IShuffleLayer *shuffle_layer);
nvinfer1::ITensor *ExpandDim(nvinfer1::IShuffleLayer *shuffle_layer, nvinfer1::ITensor *input_tensor, int axis);
nvinfer1::Dims InferReshapeDims(const nvinfer1::Dims &input_dims, const std::vector<int64_t> &ms_input_shape,
const std::vector<int64_t> &ms_output_shape);
Format out_format_ = Format::NCHW;
nvinfer1::ITensor *shuffler_input_{nullptr};

View File

@ -71,24 +71,32 @@ class StrideSliceTensorRTUtil final : public SliceTensorRTUtil {
}
int axis_value = *(static_cast<const int *>(in_tensors.at(axis_index).Data()));
int start_value = *(static_cast<const int *>(begin.Data()));
int size_value = *(static_cast<const int *>(end.Data()));
int end_value = *(static_cast<const int *>(end.Data()));
int stride_value = *(static_cast<const int *>(stride.Data()));
auto input_dims = helper.trt_tensor_->getDimensions();
start_dims.nbDims = input_dims.nbDims;
size_dims.nbDims = input_dims.nbDims;
stride_dims = nvinfer1::Dims{size_dims.nbDims, {}};
std::fill(start_dims.d, start_dims.d + start_dims.nbDims, 0);
std::fill(stride_dims.d, stride_dims.d + stride_dims.nbDims, 1);
if (start_value < 0) {
start_value = input_dims.d[axis_value] + start_value;
}
for (int i = 0; i < start_dims.nbDims; i++) {
if (i == axis_value) {
start_dims.d[i] = start_value;
size_dims.d[i] = size_value >= 0 ? std::min(size_value, input_dims.d[i]) - start_dims.d[i]
: size_value + input_dims.d[i] - start_dims.d[i];
stride_dims.d[i] = stride_value;
if (end_value >= 0) {
size_dims.d[i] = std::min(end_value, input_dims.d[i]) - start_dims.d[i];
} else if (end_value >= -input_dims.d[i]) {
size_dims.d[i] = end_value + input_dims.d[i] - start_dims.d[i];
} else {
size_dims.d[i] = input_dims.d[i];
}
} else {
start_dims.d[i] = 0;
size_dims.d[i] = helper.trt_tensor_->getDimensions().d[i];
}
}
int stride_value = *(static_cast<const int *>(stride.Data()));
stride_dims = nvinfer1::Dims{size_dims.nbDims, {}};
std::fill(stride_dims.d, stride_dims.d + stride_dims.nbDims, stride_value);
}
return std::make_tuple(start_dims, size_dims, stride_dims);
}
@ -103,7 +111,7 @@ class StrideSliceTensorRTUtil final : public SliceTensorRTUtil {
shape.erase(shape.begin() + i);
}
}
return Reshape(ctx, input, shape);
return shape.empty() ? nullptr : Reshape(ctx, input, shape);
}
return input;
}
@ -136,8 +144,8 @@ class SliceFusionTensorRTUtil final : public SliceTensorRTUtil {
const auto &begin = in_tensors.at(1);
const auto &size = in_tensors.at(SIZE_INDEX);
auto start_dims = lite::ConvertCudaDims(begin.Data(), begin.ElementNum());
auto size_dims = lite::ConvertCudaDims(size.Data(), size.ElementNum());
auto start_dims = lite::ConvertCudaDims(begin);
auto size_dims = lite::ConvertCudaDims(size);
for (int i = 0; i != size_dims.nbDims; ++i) {
if (size_dims.d[i] == -1) {
size_dims.d[i] = helper.trt_tensor_->getDimensions().d[i];
@ -278,17 +286,16 @@ int SliceTensorRT::AddInnerOp(TensorRTContext *ctx) {
this->layer_ = slice_layer;
slice_layer->setName(op_name_.c_str());
nvinfer1::ITensor *out_tensor = slice_layer->getOutput(0);
out_tensor = util_->PostProcess(ctx, out_tensor, in_tensors_, out_tensors_);
if (out_tensor == nullptr) {
MS_LOG(ERROR) << "output tensor create failed";
return RET_ERROR;
auto post_tensor = util_->PostProcess(ctx, out_tensor, in_tensors_, out_tensors_);
bool rank_0 = false;
if (post_tensor == nullptr) {
rank_0 = true;
post_tensor = out_tensor;
}
auto helper = ITensorHelper{out_tensor, slice_input.format_, slice_input.same_format_};
auto helper = ITensorHelper{post_tensor, slice_input.format_, slice_input.same_format_, !rank_0};
ctx->RegisterTensor(helper, out_tensors_[0].Name());
MS_LOG(DEBUG) << "slice output : " << GetTensorFormat(helper);
return RET_OK;
}
REGISTER_TENSORRT_CREATOR(ops::kNameStridedSlice, SliceTensorRT)
REGISTER_TENSORRT_CREATOR(ops::kNameSliceFusion, SliceTensorRT)
REGISTER_TENSORRT_CREATOR(ops::kNameCrop, SliceTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,131 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/extendrt/delegate/tensorrt/op/slicefusion_tensorrt.h"
#include <algorithm>
#include <utility>
#include "src/extendrt/delegate/tensorrt/tensorrt_utils.h"
#include "ops/fusion/slice_fusion.h"
namespace mindspore::lite {
nvinfer1::ITensor *SliceFusionTensorRT::GetDynamicSliceSize(TensorRTContext *ctx, nvinfer1::ITensor *input,
nvinfer1::Dims start_dims, nvinfer1::Dims size_dims) {
auto in_tensor_shape = ctx->network()->addShape(*input)->getOutput(0);
if (in_tensor_shape == nullptr) {
MS_LOG(ERROR) << "add shape layer of input failed!";
return nullptr;
}
std::vector<nvinfer1::ITensor *> shape_tensors;
auto input_dims = input->getDimensions();
std::vector<int> input_shape_vec;
for (int i = 0; i != input_dims.nbDims; ++i) {
if (input_dims.d[i] == -1) {
if (!input_shape_vec.empty()) {
shape_tensors.push_back(ctx->ConvertTo1DTensor(input_shape_vec));
input_shape_vec.clear();
}
auto starts = nvinfer1::Dims{1, {i}};
auto size = nvinfer1::Dims{1, {1}};
auto strides = nvinfer1::Dims{1, {1}};
auto slice_layer = ctx->network()->addSlice(*in_tensor_shape, starts, size, strides);
if (slice_layer == nullptr) {
MS_LOG(ERROR) << "add slice layer failed";
return nullptr;
}
auto start_tensor = ctx->ConvertTo1DTensor(start_dims.d[i]);
shape_tensors.push_back(
ctx->network()
->addElementWise(*slice_layer->getOutput(0), *start_tensor, nvinfer1::ElementWiseOperation::kSUB)
->getOutput(0));
} else {
input_shape_vec.push_back(size_dims.d[i]);
}
}
if (!input_shape_vec.empty()) {
shape_tensors.push_back(ctx->ConvertTo1DTensor(input_shape_vec));
}
nvinfer1::ITensor *concat_tensors[shape_tensors.size()];
for (size_t i = 0; i != shape_tensors.size(); ++i) {
concat_tensors[i] = shape_tensors[i];
}
auto concat_layer = ctx->network()->addConcatenation(concat_tensors, shape_tensors.size());
if (concat_layer == nullptr) {
MS_LOG(ERROR) << "add concat layer failed!";
return nullptr;
}
concat_layer->setAxis(0);
return concat_layer->getOutput(0);
}
int SliceFusionTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
const std::vector<TensorInfo> &out_tensors) {
if (in_tensors.size() != SLICE_INPUT_SIZE) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
return RET_ERROR;
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
return RET_ERROR;
}
dynamic_shape_params_.support_hw_dynamic_ = false;
return RET_OK;
}
int SliceFusionTensorRT::AddInnerOp(TensorRTContext *ctx) {
ITensorHelper slice_input;
int ret = PreprocessInputs2SameDim(ctx, input(ctx, 0), &slice_input);
if (ret != RET_OK || slice_input.trt_tensor_ == nullptr) {
MS_LOG(ERROR) << "PreprocessInputs2SameDim input tensor failed for " << op_name_;
return RET_ERROR;
}
const auto &begin = in_tensors_.at(1);
const auto &size = in_tensors_.at(SIZE_INDEX);
auto start_dims = lite::ConvertCudaDims(begin);
auto size_dims = lite::ConvertCudaDims(size);
nvinfer1::ITensor *size_tensor = nullptr;
for (int i = 0; i != size_dims.nbDims; ++i) {
if (size_dims.d[i] == -1 && !IsDynamicInput(ctx, 0)) {
size_dims.d[i] = slice_input.trt_tensor_->getDimensions().d[i];
}
}
if (IsDynamicInput(ctx, 0)) {
size_tensor = GetDynamicSliceSize(ctx, slice_input.trt_tensor_, start_dims, size_dims);
size_dims = nvinfer1::Dims{-1};
}
auto stride_dims = lite::ConvertCudaDims(1, begin.ElementNum());
nvinfer1::ISliceLayer *slice_layer =
ctx->network()->addSlice(*slice_input.trt_tensor_, start_dims, size_dims, stride_dims);
if (slice_layer == nullptr) {
MS_LOG(ERROR) << "add Slice op failed for TensorRT: " << op_name_;
return RET_ERROR;
}
if (size_tensor != nullptr) {
slice_layer->setInput(INPUT_SIZE2, *size_tensor);
}
this->layer_ = slice_layer;
slice_layer->setName(op_name_.c_str());
nvinfer1::ITensor *out_tensor = slice_layer->getOutput(0);
auto helper = ITensorHelper{out_tensor, slice_input.format_, slice_input.same_format_};
ctx->RegisterTensor(helper, out_tensors_[0].Name());
MS_LOG(DEBUG) << "slice output : " << GetTensorFormat(helper);
return RET_OK;
}
REGISTER_TENSORRT_CREATOR(ops::kNameSliceFusion, SliceFusionTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,44 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_SLICE_FUSION_TENSORRT_H_
#define MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_SLICE_FUSION_TENSORRT_H_
#include <string>
#include <vector>
#include <memory>
#include "src/extendrt/delegate/tensorrt/op/tensorrt_op.h"
namespace mindspore::lite {
constexpr int SIZE_INDEX = 2;
constexpr int SLICE_INPUT_SIZE = 3;
class SliceFusionTensorRT : public TensorRTOp {
public:
SliceFusionTensorRT(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
const std::vector<TensorInfo> &out_tensors, std::string name)
: TensorRTOp(base_operator, in_tensors, out_tensors, name) {}
~SliceFusionTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
const std::vector<TensorInfo> &out_tensors) override;
private:
nvinfer1::ITensor *GetDynamicSliceSize(TensorRTContext *ctx, nvinfer1::ITensor *input, nvinfer1::Dims,
nvinfer1::Dims);
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_SLICE_FUSION_TENSORRT_H_

View File

@ -29,7 +29,7 @@
namespace mindspore::lite {
int SpaceToBatchTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
const std::vector<TensorInfo> &out_tensors) {
if (in_tensors.size() != 3) {
if (in_tensors.size() != INPUT_SIZE3) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
return RET_ERROR;
}
@ -38,8 +38,6 @@ int SpaceToBatchTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
return RET_ERROR;
}
dynamic_shape_params_.support_dynamic_ = false;
dynamic_shape_params_.support_hw_dynamic_ = false;
return RET_OK;
}
@ -52,19 +50,17 @@ int SpaceToBatchTensorRT::AddInnerOp(TensorRTContext *ctx) {
MS_LOG(ERROR) << "block_h not equal block_w " << op_name_;
return RET_ERROR;
}
const int *pad_ptr = reinterpret_cast<const int *>(in_tensors_[2].Data());
const int *pad_ptr = reinterpret_cast<const int *>(in_tensors_[INPUT_SIZE2].Data());
int ph0 = *(pad_ptr + 0);
int ph1 = *(pad_ptr + 1);
int pw0 = *(pad_ptr + 2);
int pw1 = *(pad_ptr + 3);
auto in_dims = input_tensor->getDimensions();
int in = in_dims.d[0];
int ic = in_dims.d[1];
int ih = in_dims.d[2];
int iw = in_dims.d[3];
int pw0 = *(pad_ptr + INPUT_SIZE2);
int pw1 = *(pad_ptr + INPUT_SIZE3);
auto plugin =
std::make_shared<SpaceToBatchPlugin>(input_tensor->getName(), in, ic, ih, iw, bh, ph0, ph1, pw0, pw1, device_id_);
auto plugin = std::make_shared<SpaceToBatchPlugin>(input_tensor->getName(), bh, ph0, ph1, pw0, pw1, device_id_);
if (plugin == nullptr) {
MS_LOG(ERROR) << "add spacetobatch plugin failed for" << op_name_;
return RET_ERROR;
}
nvinfer1::ITensor *inputTensors[] = {input_tensor};
nvinfer1::IPluginV2Layer *space2batch_opt_layer = ctx->network()->addPluginV2(inputTensors, 1, *plugin);
if (space2batch_opt_layer == nullptr) {
@ -93,15 +89,20 @@ int SpaceToBatchPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
int SpaceToBatchPlugin::RunCudaSpaceToBatch(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs,
void *const *outputs, cudaStream_t stream) {
int on = in_ * bh_ * bh_;
int oc = ic_;
int oh = (ih_ + ph0_ + ph1_) / bh_;
int ow = (iw_ + pw0_ + pw1_) / bh_;
nvinfer1::Dims input_dims = inputDesc[0].dims;
int in = input_dims.d[0];
int ic = input_dims.d[1];
int ih = input_dims.d[2];
int iw = input_dims.d[3];
int on = in * bh_ * bh_;
int oc = ic;
int oh = (ih + ph0_ + ph1_) / bh_;
int ow = (iw + pw0_ + pw1_) / bh_;
int size = in_ * ic_ * ih_ * iw_;
int size = in * ic * ih * iw;
CalSpaceToBatch<float>(size, static_cast<const float *>(inputs[0]), in_, ih_, iw_, ic_, on, oh, ow, oc, ph0_, ph1_,
pw0_, pw1_, bh_, static_cast<float *>(outputs[0]), device_id_, stream);
CalSpaceToBatch<float>(size, static_cast<const float *>(inputs[0]), in, ih, iw, ic, on, oh, ow, oc, ph0_, ph1_, pw0_,
pw1_, bh_, static_cast<float *>(outputs[0]), device_id_, stream);
return RET_OK;
}
@ -115,7 +116,7 @@ nvinfer1::IPluginV2DynamicExt *SpaceToBatchPlugin::clone() const noexcept {
return plugin;
}
size_t SpaceToBatchPlugin::getSerializationSize() const noexcept { return sizeof(int) * 9; }
size_t SpaceToBatchPlugin::getSerializationSize() const noexcept { return sizeof(int) * 5; }
nvinfer1::DimsExprs SpaceToBatchPlugin::getOutputDimensions(int32_t index, const nvinfer1::DimsExprs *inputs,
int nbInputDims,
@ -127,19 +128,15 @@ nvinfer1::DimsExprs SpaceToBatchPlugin::getOutputDimensions(int32_t index, const
dims.d[1] = inputs[0].d[1];
auto bh = exprBuilder.constant(bh_);
auto ph_sum = exprBuilder.constant(ph0_ + ph1_);
auto sum0 = exprBuilder.operation(nvinfer1::DimensionOperation::kSUM, *inputs[0].d[2], *ph_sum);
dims.d[2] = exprBuilder.operation(nvinfer1::DimensionOperation::kFLOOR_DIV, *sum0, *bh);
auto sum0 = exprBuilder.operation(nvinfer1::DimensionOperation::kSUM, *inputs[0].d[INPUT_SIZE2], *ph_sum);
dims.d[INPUT_SIZE2] = exprBuilder.operation(nvinfer1::DimensionOperation::kFLOOR_DIV, *sum0, *bh);
auto pw_sum = exprBuilder.constant(pw0_ + pw1_);
auto sum1 = exprBuilder.operation(nvinfer1::DimensionOperation::kSUM, *inputs[0].d[3], *pw_sum);
dims.d[3] = exprBuilder.operation(nvinfer1::DimensionOperation::kFLOOR_DIV, *sum1, *bh);
auto sum1 = exprBuilder.operation(nvinfer1::DimensionOperation::kSUM, *inputs[0].d[INPUT_SIZE3], *pw_sum);
dims.d[INPUT_SIZE3] = exprBuilder.operation(nvinfer1::DimensionOperation::kFLOOR_DIV, *sum1, *bh);
return dims;
}
void SpaceToBatchPlugin::serialize(void *buffer) const noexcept {
SerializeValue(&buffer, &in_, sizeof(int));
SerializeValue(&buffer, &ic_, sizeof(int));
SerializeValue(&buffer, &ih_, sizeof(int));
SerializeValue(&buffer, &iw_, sizeof(int));
SerializeValue(&buffer, &bh_, sizeof(int));
SerializeValue(&buffer, &ph0_, sizeof(int));
SerializeValue(&buffer, &ph1_, sizeof(int));

View File

@ -40,13 +40,8 @@ class SpaceToBatchTensorRT : public TensorRTOp {
constexpr auto SPACETOTENSORRT_PLUGIN_NAME{"SpaceToBatchPlugin"};
class SpaceToBatchPlugin : public TensorRTPlugin {
public:
SpaceToBatchPlugin(const std::string name, int in, int ic, int ih, int iw, int bh, int ph0, int ph1, int pw0, int pw1,
uint32_t device_id)
SpaceToBatchPlugin(const std::string name, int bh, int ph0, int ph1, int pw0, int pw1, uint32_t device_id)
: TensorRTPlugin(name, std::string(SPACETOTENSORRT_PLUGIN_NAME), device_id),
in_(in),
ic_(ic),
ih_(ih),
iw_(iw),
bh_(bh),
ph0_(ph0),
ph1_(ph1),
@ -56,23 +51,15 @@ class SpaceToBatchPlugin : public TensorRTPlugin {
SpaceToBatchPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
: TensorRTPlugin(std::string(name), std::string(SPACETOTENSORRT_PLUGIN_NAME)) {
const nvinfer1::PluginField *fields = fc->fields;
in_ = static_cast<const int *>(fields[0].data)[0];
ic_ = static_cast<const int *>(fields[1].data)[0];
ih_ = static_cast<const int *>(fields[2].data)[0];
iw_ = static_cast<const int *>(fields[3].data)[0];
bh_ = static_cast<const int *>(fields[4].data)[0];
ph0_ = static_cast<const int *>(fields[5].data)[0];
ph1_ = static_cast<const int *>(fields[6].data)[0];
pw0_ = static_cast<const int *>(fields[7].data)[0];
pw1_ = static_cast<const int *>(fields[8].data)[0];
bh_ = static_cast<const int *>(fields[0].data)[0];
ph0_ = static_cast<const int *>(fields[1].data)[0];
ph1_ = static_cast<const int *>(fields[2].data)[0];
pw0_ = static_cast<const int *>(fields[3].data)[0];
pw1_ = static_cast<const int *>(fields[4].data)[0];
}
SpaceToBatchPlugin(const char *name, const void *serialData, size_t serialLength)
: TensorRTPlugin(std::string(name), std::string(SPACETOTENSORRT_PLUGIN_NAME)) {
DeserializeValue(&serialData, &serialLength, &in_, sizeof(int));
DeserializeValue(&serialData, &serialLength, &ic_, sizeof(int));
DeserializeValue(&serialData, &serialLength, &ih_, sizeof(int));
DeserializeValue(&serialData, &serialLength, &iw_, sizeof(int));
DeserializeValue(&serialData, &serialLength, &bh_, sizeof(int));
DeserializeValue(&serialData, &serialLength, &ph0_, sizeof(int));
DeserializeValue(&serialData, &serialLength, &ph1_, sizeof(int));
@ -93,15 +80,11 @@ class SpaceToBatchPlugin : public TensorRTPlugin {
private:
int RunCudaSpaceToBatch(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs, void *const *outputs,
cudaStream_t stream);
size_t in_;
size_t ic_;
size_t ih_;
size_t iw_;
size_t bh_;
size_t ph0_;
size_t ph1_;
size_t pw0_;
size_t pw1_;
int bh_;
int ph0_;
int ph1_;
int pw0_;
int pw1_;
const std::string layer_name_;
std::string name_space_;
};

View File

@ -24,19 +24,53 @@
namespace mindspore::lite {
int SplitTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
const std::vector<TensorInfo> &out_tensors) {
if (!IsShapeKnown()) {
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() != 1 && in_tensors.size() != INPUT_SIZE2) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
return RET_ERROR;
}
dynamic_shape_params_.support_dynamic_ = false;
dynamic_shape_params_.support_hw_dynamic_ = false;
return RET_OK;
}
nvinfer1::ITensor *SplitTensorRT::GetDynamicSliceSize(TensorRTContext *ctx, nvinfer1::ITensor *input, size_t i) {
auto in_tensor_shape = ctx->network()->addShape(*input)->getOutput(0);
if (in_tensor_shape == nullptr) {
MS_LOG(ERROR) << "add shape layer of input failed!";
return nullptr;
}
auto len_tensor = ctx->ConvertTo1DTensor(static_cast<int>(size_splits_[i]));
if (len_tensor == nullptr) {
MS_LOG(ERROR) << "convert 1d tensor failed!";
return nullptr;
}
nvinfer1::ITensor *concat_input_tensors[INPUT_SIZE2];
concat_input_tensors[0] = in_tensor_shape;
concat_input_tensors[1] = len_tensor;
auto concat_layer = ctx->network()->addConcatenation(concat_input_tensors, INPUT_SIZE2);
if (concat_layer == nullptr) {
MS_LOG(ERROR) << "add concat layer failed!";
return nullptr;
}
concat_layer->setAxis(0);
auto shape_and_len = concat_layer->getOutput(0);
if (shape_and_len == nullptr) {
MS_LOG(ERROR) << "get concat layer result failed!";
return nullptr;
}
std::vector<int> gather_slices(input->getDimensions().nbDims);
std::iota(gather_slices.begin(), gather_slices.end(), 0);
gather_slices[axis_] = gather_slices.size();
auto gather_slices_tensor = ctx->ConvertTo1DTensor(gather_slices);
nvinfer1::IGatherLayer *gather_layer = ctx->network()->addGather(*shape_and_len, *gather_slices_tensor, 0);
if (gather_layer == nullptr) {
MS_LOG(ERROR) << "add gather layer failed!";
return nullptr;
}
return gather_layer->getOutput(0);
}
int SplitTensorRT::AddInnerOp(TensorRTContext *ctx) {
ITensorHelper split_input;
int ret = PreprocessInputs2SameDim(ctx, input(ctx, 0), &split_input);
@ -78,16 +112,24 @@ int SplitTensorRT::AddInnerOp(TensorRTContext *ctx) {
for (int i = 0; i != output_num_; ++i) {
nvinfer1::Dims start_dims = lite::ConvertCudaDims(0, input_nbdims);
start_dims.d[axis_] = axis_dim_index;
nvinfer1::Dims size_dims{-1};
nvinfer1::ITensor *size_tensor = nullptr;
if (!IsDynamicInput(ctx, 0)) {
size_dims = split_input.trt_tensor_->getDimensions();
size_dims.d[axis_] = size_splits_[i];
} else {
size_tensor = GetDynamicSliceSize(ctx, split_input.trt_tensor_, i);
}
axis_dim_index += size_splits_[i];
nvinfer1::Dims size_dims = split_input.trt_tensor_->getDimensions();
size_dims.d[axis_] = size_splits_[i];
slice_layer = ctx->network()->addSlice(*split_input.trt_tensor_, start_dims, size_dims, one_dims);
if (slice_layer == nullptr) {
MS_LOG(ERROR) << "add Slice op failed for TensorRT: " << op_name_;
return RET_ERROR;
}
if (size_tensor != nullptr) {
slice_layer->setInput(INPUT_SIZE2, *size_tensor);
}
nvinfer1::ITensor *out_tensor = slice_layer->getOutput(0);
if (type_ == ops::kNameUnstack) {

View File

@ -35,10 +35,11 @@ class SplitTensorRT : public TensorRTOp {
const std::vector<TensorInfo> &out_tensors) override;
private:
nvinfer1::ITensor *GetDynamicSliceSize(TensorRTContext *ctx, nvinfer1::ITensor *input, size_t i);
int ParseParams(const ITensorHelper &helper);
int64_t axis_;
int64_t output_num_;
std::vector<int64_t> size_splits_;
int output_num_;
std::vector<int> size_splits_;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_SPLIT_TENSORRT_H_

View File

@ -0,0 +1,187 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#include "src/extendrt/delegate/tensorrt/op/strideslice_tensorrt.h"
#include <algorithm>
#include <numeric>
#include "src/extendrt/delegate/tensorrt/tensorrt_utils.h"
#include "ops/strided_slice.h"
namespace mindspore::lite {
nvinfer1::ITensor *StrideSliceTensorRT::GetDynamicSliceSize(TensorRTContext *ctx, nvinfer1::ITensor *input,
int size_dim, int axis) {
auto in_tensor_shape = ctx->network()->addShape(*input)->getOutput(0);
if (in_tensor_shape == nullptr) {
MS_LOG(ERROR) << "add shape layer of input failed!";
return nullptr;
}
auto len_tensor = ctx->ConvertTo1DTensor(static_cast<int>(size_dim));
if (len_tensor == nullptr) {
MS_LOG(ERROR) << "convert 1d tensor failed!";
return nullptr;
}
nvinfer1::ITensor *concat_input_tensors[INPUT_SIZE2];
concat_input_tensors[0] = in_tensor_shape;
concat_input_tensors[1] = len_tensor;
auto concat_layer = ctx->network()->addConcatenation(concat_input_tensors, INPUT_SIZE2);
if (concat_layer == nullptr) {
MS_LOG(ERROR) << "add concat layer failed!";
return nullptr;
}
concat_layer->setAxis(0);
auto shape_and_len = concat_layer->getOutput(0);
if (shape_and_len == nullptr) {
MS_LOG(ERROR) << "get concat layer result failed!";
return nullptr;
}
std::vector<int> gather_slices(input->getDimensions().nbDims);
std::iota(gather_slices.begin(), gather_slices.end(), 0);
gather_slices[axis] = gather_slices.size();
auto gather_slices_tensor = ctx->ConvertTo1DTensor(gather_slices);
nvinfer1::IGatherLayer *gather_layer = ctx->network()->addGather(*shape_and_len, *gather_slices_tensor, 0);
if (gather_layer == nullptr) {
MS_LOG(ERROR) << "add gather layer failed!";
return nullptr;
}
return gather_layer->getOutput(0);
}
int StrideSliceTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
const std::vector<TensorInfo> &out_tensors) {
if (in_tensors.size() < HAS_AXIS - 1) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
return RET_ERROR;
}
if (out_tensors.size() != 1) {
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
return RET_ERROR;
}
if (!in_tensors.at(BEGINS_INDEX).IsConst() || !in_tensors.at(ENDS_INDEX).IsConst()) {
MS_LOG(ERROR) << "invalid input tensor for: " << op_name_;
return RET_ERROR;
}
return RET_OK;
}
nvinfer1::ILayer *StrideSliceTensorRT::MakeLayer(TensorRTContext *ctx, const ITensorHelper &slice_input) {
const auto &begin = in_tensors_.at(BEGINS_INDEX);
const auto &stride = in_tensors_.back();
const auto &end = in_tensors_.at(ENDS_INDEX);
auto input_dims = slice_input.trt_tensor_->getDimensions();
nvinfer1::Dims start_dims{input_dims};
nvinfer1::Dims size_dims{input_dims};
nvinfer1::Dims stride_dims{input_dims};
int64_t axis_index = in_tensors_.size() == HAS_AXIS ? AXIS_INDEX : -1;
nvinfer1::ITensor *size_tensor = nullptr;
if (begin.ElementNum() == input_dims.nbDims) {
start_dims = lite::ConvertCudaDims(begin);
auto end_dims = lite::ConvertCudaDims(end);
for (int i = 0; i < size_dims.nbDims; i++) {
size_dims.d[i] = end_dims.d[i] - start_dims.d[i];
}
stride_dims = lite::ConvertCudaDims(stride);
} else {
if (begin.ElementNum() != 1 || end.ElementNum() != 1 || stride.ElementNum() != 1) {
MS_LOG(ERROR)
<< "Only support element number of begin, end and stride to be 1 when this number < input dims number, op: "
<< op_name_;
return nullptr;
}
int axis_value = axis_index == -1 ? 0 : *(static_cast<const int *>(in_tensors_.at(axis_index).Data()));
int start_value = *(static_cast<const int *>(begin.Data()));
int end_value = *(static_cast<const int *>(end.Data()));
int stride_value = *(static_cast<const int *>(stride.Data()));
std::fill(start_dims.d, start_dims.d + start_dims.nbDims, 0);
std::fill(stride_dims.d, stride_dims.d + stride_dims.nbDims, 1);
size_dims = slice_input.trt_tensor_->getDimensions();
if (start_value < 0) {
start_value = input_dims.d[axis_value] + start_value;
}
for (int i = 0; i < start_dims.nbDims; i++) {
if (i == axis_value) {
start_dims.d[i] = start_value;
stride_dims.d[i] = stride_value;
if (end_value >= 0) {
size_dims.d[i] = std::min(end_value, input_dims.d[i]) - start_dims.d[i];
} else if (end_value >= -input_dims.d[i]) {
size_dims.d[i] = end_value + input_dims.d[i] - start_dims.d[i];
} else {
size_dims.d[i] = input_dims.d[i];
}
}
}
if (IsDynamicInput(ctx, 0)) {
size_tensor = GetDynamicSliceSize(ctx, slice_input.trt_tensor_, size_dims.d[axis_value], axis_value);
size_dims = nvinfer1::Dims{-1};
}
}
nvinfer1::ISliceLayer *slice_layer =
ctx->network()->addSlice(*slice_input.trt_tensor_, start_dims, size_dims, stride_dims);
if (slice_layer == nullptr) {
MS_LOG(ERROR) << "add Slice op failed for TensorRT: " << op_name_;
return nullptr;
}
if (size_tensor != nullptr) {
slice_layer->setInput(INPUT_SIZE2, *size_tensor);
}
return slice_layer;
}
int StrideSliceTensorRT::AddInnerOp(TensorRTContext *ctx) {
ITensorHelper slice_input;
int ret = PreprocessInputs2SameDim(ctx, input(ctx, 0), &slice_input);
if (ret != RET_OK || slice_input.trt_tensor_ == nullptr) {
MS_LOG(ERROR) << "PreprocessInputs2SameDim input tensor failed for " << op_name_;
return RET_ERROR;
}
auto slice_layer = MakeLayer(ctx, slice_input);
if (slice_layer == nullptr) {
return RET_ERROR;
}
this->layer_ = slice_layer;
slice_layer->setName(op_name_.c_str());
nvinfer1::ITensor *out_tensor = slice_layer->getOutput(0);
auto shape = ConvertMSShape(out_tensor->getDimensions());
bool rank_0 = false;
shrink_axis_ = AsOps<ops::StridedSlice>()->get_shrink_axis_mask();
if (shrink_axis_ != 0) {
for (int i = shape.size() - 1; i >= 0; --i) {
int mask = 1 << i;
if ((shrink_axis_ & mask) != 0) {
shape.erase(shape.begin() + i);
}
}
if (!shape.empty()) {
out_tensor = Reshape(ctx, out_tensor, shape);
} else {
rank_0 = true;
}
}
auto helper = ITensorHelper{out_tensor, slice_input.format_, slice_input.same_format_, !rank_0};
ctx->RegisterTensor(helper, out_tensors_[0].Name());
MS_LOG(DEBUG) << "slice output : " << GetTensorFormat(helper);
return RET_OK;
}
REGISTER_TENSORRT_CREATOR(ops::kNameStridedSlice, StrideSliceTensorRT)
} // namespace mindspore::lite

View File

@ -0,0 +1,48 @@
/**
* Copyright 2021 Huawei Technologies Co., Ltd
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_STRIDE_SLICE_TENSORRT_H_
#define MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_STRIDE_SLICE_TENSORRT_H_
#include <string>
#include <vector>
#include <tuple>
#include <memory>
#include "src/extendrt/delegate/tensorrt/op/tensorrt_op.h"
namespace mindspore::lite {
constexpr int BEGINS_INDEX = 1;
constexpr int ENDS_INDEX = 2;
constexpr int HAS_AXIS = 5;
constexpr int AXIS_INDEX = 3;
class StrideSliceTensorRT : public TensorRTOp {
public:
StrideSliceTensorRT(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
const std::vector<TensorInfo> &out_tensors, std::string name)
: TensorRTOp(base_operator, in_tensors, out_tensors, name) {}
~StrideSliceTensorRT() override = default;
int AddInnerOp(TensorRTContext *ctx) override;
int IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
const std::vector<TensorInfo> &out_tensors) override;
private:
nvinfer1::ITensor *GetDynamicSliceSize(TensorRTContext *ctx, nvinfer1::ITensor *input, int, int);
nvinfer1::ILayer *MakeLayer(TensorRTContext *ctx, const ITensorHelper &slice_input);
size_t shrink_axis_;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_STRIDE_SLICE_TENSORRT_H_

View File

@ -87,6 +87,11 @@ int TensorRTOp::ReadyInputsNumber(TensorRTContext *ctx) const {
bool TensorRTOp::IsShapeKnown() { return true; }
bool TensorRTOp::IsDynamicInput(TensorRTContext *ctx, size_t k) {
nvinfer1::Dims dims = input(ctx, k).trt_tensor_->getDimensions();
return std::any_of(dims.d, dims.d + dims.nbDims, [](int d) { return d == -1; });
}
int TensorRTOp::Prepare(void **network_tensor_bindings, nvinfer1::ICudaEngine *engine) {
if (op_binding_tensor_.size() != 0) {
MS_LOG(ERROR) << "need special op Prepare for " << op_name_;

View File

@ -40,6 +40,7 @@ namespace mindspore::lite {
constexpr int INPUT_SIZE2 = 2;
constexpr int INPUT_SIZE3 = 3;
constexpr int INPUT_SIZE4 = 4;
constexpr int INPUT_SIZE5 = 5;
struct BindingHelper {
std::string name_;
@ -115,6 +116,7 @@ class TensorRTOp {
nvinfer1::ILayer *layer() { return layer_; }
bool GetSupportInputBool();
bool IsDynamicInput(TensorRTContext *ctx, size_t k);
void SetSupportInputBool(bool support_input_bool);
template <class OpsT>

View File

@ -87,12 +87,12 @@ class TensorRTPluginCreater : public nvinfer1::IPluginCreator {
const char *getPluginNamespace() const noexcept override { return name_space_.c_str(); }
nvinfer1::IPluginV2 *createPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc) noexcept {
nvinfer1::IPluginV2 *createPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc) noexcept override {
return new (std::nothrow) T(name, fc);
}
nvinfer1::IPluginV2 *deserializePlugin(const char *name, const void *serialData, size_t serialLength) noexcept {
return new (std::nothrow) T(name, serialData, serialLength);
nvinfer1::IPluginV2 *deserializePlugin(const char *name, const void *data, size_t len) noexcept override {
return new (std::nothrow) T(name, data, len);
}
protected:

View File

@ -21,6 +21,7 @@
#include <functional>
#include "include/api/kernel.h"
#include "src/common/utils.h"
#include "src/extendrt/utils/tensor_default_impl.h"
namespace mindspore::lite {
class TensorInfoImpl {
@ -29,46 +30,24 @@ class TensorInfoImpl {
TensorInfoImpl(const std::string &name, mindspore::DataType type, const std::vector<int64_t> &shape,
mindspore::Format format, const void *data, size_t data_len,
const mindspore::tensor::TensorPtr &tensor_val)
: name_(name),
dType_(type),
shape_(shape),
format_(format),
data_(data),
data_len_(data_len),
tensor_val_(tensor_val) {
is_const_ = (data_ != nullptr);
if (data_ == nullptr || data_len_ == 0) {
auto ele_num = ElementNum();
auto type_size = item_size();
temp_data_.resize(ele_num * type_size);
data_ = temp_data_.data();
data_len_ = temp_data_.size();
}
}
void SetShape(const std::vector<int64_t> &shape) {
shape_ = shape;
auto new_elem_num = ElementNum();
auto type_size = item_size();
auto data_size = new_elem_num * type_size;
if (data_size != temp_data_.size() && data_ == temp_data_.data()) {
temp_data_.resize(data_size);
data_ = temp_data_.data();
data_len_ = data_size;
}
: tensor_impl_(name, type, shape), tensor_val_(tensor_val) {
tensor_impl_.SetFormat(format);
auto is_const = (data != nullptr);
tensor_impl_.SetIsConst(is_const);
SetData(data, data_len);
}
int64_t ElementNum() const { return std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies<int64_t>()); }
size_t item_size() const { return DataTypeSize(static_cast<enum TypeId>(dType_)); }
std::string name_;
mindspore::DataType dType_ = mindspore::DataType::kTypeUnknown;
std::vector<int64_t> shape_;
mindspore::Format format_ = DEFAULT_FORMAT;
const void *data_ = nullptr;
size_t data_len_ = 0;
bool is_const_ = false;
std::vector<uint8_t> temp_data_;
size_t item_size() const { return DataTypeSize(static_cast<enum TypeId>(tensor_impl_.DataType())); }
void SetData(const void *data, size_t data_len) {
if (data != nullptr && data_len != 0) {
if (tensor_impl_.DataSize() != data_len) {
MS_LOG_WARNING << "Tensor expect data size " << tensor_impl_.DataSize() << " != data len " << data_len
<< ", shape: " << tensor_impl_.Shape() << ", dtype: " << tensor_impl_.DataType();
}
tensor_impl_.SetData(const_cast<void *>(data), false);
}
}
TensorDefaultImpl tensor_impl_;
mindspore::tensor::TensorPtr tensor_val_ = nullptr;
};
@ -82,21 +61,21 @@ std::string TensorInfo::Name() const {
if (impl_ == nullptr) {
return "";
}
return impl_->name_;
return impl_->tensor_impl_.Name();
}
mindspore::DataType TensorInfo::DataType() const {
if (impl_ == nullptr) {
return mindspore::DataType::kTypeUnknown;
}
return impl_->dType_;
return impl_->tensor_impl_.DataType();
}
mindspore::Format TensorInfo::format() const {
if (impl_ == nullptr) {
return DEFAULT_FORMAT;
}
return impl_->format_;
return impl_->tensor_impl_.Format();
}
const std::vector<int64_t> &TensorInfo::Shape() const {
@ -104,21 +83,21 @@ const std::vector<int64_t> &TensorInfo::Shape() const {
if (impl_ == nullptr) {
return empty_shape;
}
return impl_->shape_;
return impl_->tensor_impl_.Shape();
}
const void *TensorInfo::Data() const {
if (impl_ == nullptr) {
return nullptr;
}
return impl_->data_;
return impl_->tensor_impl_.Data().get();
}
void *TensorInfo::MutableData() {
if (impl_ == nullptr) {
return nullptr;
}
return const_cast<void *>(impl_->data_);
return const_cast<void *>(impl_->tensor_impl_.MutableData());
}
size_t TensorInfo::DataSize() const {
@ -132,7 +111,7 @@ bool TensorInfo::IsConst() const {
if (impl_ == nullptr) {
return 0;
}
return impl_->is_const_ && impl_->data_ != nullptr;
return impl_->tensor_impl_.IsConst();
}
size_t TensorInfo::item_size() const {
@ -146,22 +125,21 @@ void TensorInfo::SetShape(const std::vector<int64_t> &shape) {
if (impl_ == nullptr) {
return;
}
impl_->SetShape(shape);
impl_->tensor_impl_.SetShape(shape);
}
void TensorInfo::SetData(const void *data, size_t data_len) {
if (impl_ == nullptr) {
return;
}
impl_->data_ = data;
impl_->data_len_ = data_len;
impl_->SetData(data, data_len);
}
int64_t TensorInfo::ElementNum() const {
if (impl_ == nullptr) {
return 0;
}
return impl_->ElementNum();
return impl_->tensor_impl_.ElementNum();
}
TensorInfo &TensorInfo::operator=(const TensorInfo &other) {

View File

@ -15,6 +15,7 @@
*/
#include "src/extendrt/delegate/tensorrt/tensorrt_context.h"
#include "src/extendrt/delegate/tensorrt/tensorrt_utils.h"
namespace mindspore::lite {
TensorRTContext::~TensorRTContext() {
@ -22,6 +23,9 @@ TensorRTContext::~TensorRTContext() {
network_->destroy();
network_ = nullptr;
}
for (auto ptr : owner_memorys_) {
free(ptr);
}
}
bool TensorRTContext::Init() {
@ -72,4 +76,31 @@ ITensorHelper TensorRTContext::MsName2Tensor(const std::string &ms_name) {
MS_LOG(WARNING) << "Get Tensorrt tensor by ms_tensor: " << ms_name << " fail!";
return {};
}
template <typename T>
nvinfer1::ITensor *TensorRTContext::ConvertTo1DTensor(T value) {
return ConvertTo1DTensor(std::vector<T>{value});
}
template <typename T>
nvinfer1::ITensor *TensorRTContext::ConvertTo1DTensor(const std::vector<T> &values) {
void *ptr = malloc(values.size() * sizeof(T));
const T *begin = &values[0];
memcpy(ptr, reinterpret_cast<const void *>(begin), values.size() * sizeof(T));
owner_memorys_.push_back(ptr);
nvinfer1::Weights weights{GetNvinferDataType<T>(), ptr, static_cast<int64_t>(values.size())};
nvinfer1::Dims dims{1, {static_cast<int>(values.size())}};
nvinfer1::IConstantLayer *constant_tensor = network()->addConstant(dims, weights);
if (constant_tensor == nullptr) {
MS_LOG(ERROR) << "create constant_tensor failed.";
return nullptr;
}
return constant_tensor->getOutput(0);
}
template nvinfer1::ITensor *TensorRTContext::ConvertTo1DTensor(int value);
template nvinfer1::ITensor *TensorRTContext::ConvertTo1DTensor(float value);
template nvinfer1::ITensor *TensorRTContext::ConvertTo1DTensor(const std::vector<int> &values);
template nvinfer1::ITensor *TensorRTContext::ConvertTo1DTensor(const std::vector<float> &values);
} // namespace mindspore::lite

View File

@ -20,6 +20,7 @@
#include <NvInfer.h>
#include <string>
#include <unordered_map>
#include <vector>
#include "src/extendrt/delegate/tensorrt/tensorrt_runtime.h"
namespace mindspore::lite {
@ -42,11 +43,17 @@ class TensorRTContext {
bool HasTensor(const std::string &name) const;
ITensorHelper MsName2Tensor(const std::string &ms_name);
template <typename T>
nvinfer1::ITensor *ConvertTo1DTensor(T value);
template <typename T>
nvinfer1::ITensor *ConvertTo1DTensor(const std::vector<T> &values);
private:
int counter_{0};
nvinfer1::INetworkDefinition *network_{nullptr};
std::unordered_map<std::string, ITensorHelper> ms_name2trt_tensor_;
TensorRTRuntime *runtime_{nullptr};
std::vector<void *> owner_memorys_;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_TENSORRT_CONTEXT_H_

View File

@ -22,43 +22,18 @@
#include <map>
#include <string>
#include <utility>
#include "ccsrc/kernel/kernel.h"
#include <algorithm>
#include "src/extendrt/delegate/delegate_utils.h"
#include "ccsrc/kernel/common_utils.h"
#include "ccsrc/backend/common/optimizer/helper.h"
#include "ccsrc/include/common/utils/convert_utils.h"
#include "src/extendrt/utils/kernel_graph_utils.h"
#include "common/config_infos.h"
namespace mindspore::lite {
namespace {
const char tensorrt_provider[] = "tensorrt";
std::vector<std::string> Split(const std::string &str, const std::string &delim) {
auto start = 0U;
auto end = str.find(delim);
std::vector<std::string> substrs;
while (end != std::string::npos) {
substrs.push_back(str.substr(start, end - start));
start = end + delim.length();
end = str.find(delim, start);
}
substrs.push_back(str.substr(start, end));
return substrs;
}
std::vector<nvinfer1::Dims> StringToDims(const std::string &str) {
std::vector<nvinfer1::Dims> all_dims;
std::vector<std::string> all_str_dims = Split(str, ";");
for (const std::string &str_dims : all_str_dims) {
std::vector<std::string> str_dims_vec = Split(str_dims, ",");
nvinfer1::Dims dims;
dims.nbDims = str_dims_vec.size();
for (size_t i = 0; i != str_dims_vec.size(); ++i) {
dims.d[i] = std::stoi(str_dims_vec[i]);
}
all_dims.push_back(dims);
}
return all_dims;
}
struct NodeWithOutputIndex {
NodeWithOutputIndex() = default;
NodeWithOutputIndex(session::KernelWithIndex kernel_index, TensorInfo tensor_info)
@ -308,7 +283,7 @@ Status GetModelOutputsInfo(KernelGraphPtr kernel_graph, std::vector<NodeWithOutp
MS_EXCEPTION_IF_NULL(kernel_graph);
MS_EXCEPTION_IF_NULL(tensor_info_list_ptr);
auto &tensor_info_list = *tensor_info_list_ptr;
auto outputs = kernel_graph->outputs();
auto outputs = KernelGraphUtils::GetKernelGraphOutputs(kernel_graph);
// find parameters of graph inputs
for (size_t i = 0; i < outputs.size(); ++i) {
auto output = outputs[i];
@ -335,31 +310,8 @@ Status GetModelOutputsInfo(KernelGraphPtr kernel_graph, std::vector<NodeWithOutp
return kSuccess;
}
} // namespace
TensorRTExecutor::TensorRTExecutor(const std::shared_ptr<mindspore::Context> &context,
const std::string &cache_model_path, size_t vocab_size, size_t device_cache_size,
const std::map<std::string, std::string> &ms_cache)
: context_(context),
cache_model_path_(cache_model_path),
vocab_size_(vocab_size),
device_cache_size_(device_cache_size) {
if (ms_cache.find("serialize_path") != ms_cache.end()) {
serialize_path_ = ms_cache.at("serialize_path");
}
if (ms_cache.find("min_dims") != ms_cache.end()) {
min_dims_ = StringToDims(ms_cache.at("min_dims"));
}
if (ms_cache.find("opt_dims") != ms_cache.end()) {
opt_dims_ = StringToDims(ms_cache.at("opt_dims"));
}
if (ms_cache.find("max_dims") != ms_cache.end()) {
max_dims_ = StringToDims(ms_cache.at("max_dims"));
}
if (min_dims_.size() != opt_dims_.size() || min_dims_.size() != max_dims_.size()) {
MS_LOG(WARNING) << "number of min_dims, opt_dims, max_dims are not equal";
}
}
TensorRTExecutor::TensorRTExecutor(const std::shared_ptr<mindspore::Context> &context) : context_(context) {}
TensorRTExecutor::TensorRTExecutor(const std::shared_ptr<mindspore::Context> &context, const ConfigInfos &config_infos)
: context_(context), config_infos_(config_infos) {}
TensorRTExecutor::~TensorRTExecutor() {
if (runtime_ != nullptr) {
@ -387,12 +339,13 @@ bool IsHardwareSupport() {
return true;
}
Status TensorRTExecutor::Init() {
bool TensorRTExecutor::Init() {
if (!IsHardwareSupport()) {
return mindspore::kLiteNotSupport;
return false;
}
if (context_ == nullptr) {
MS_LOG_ERROR << "Context cannot be nullptr";
return false;
}
std::vector<std::shared_ptr<DeviceInfoContext>> device_list = context_->MutableDeviceInfo();
@ -401,50 +354,71 @@ Status TensorRTExecutor::Init() {
});
if (iter == device_list.end()) {
MS_LOG(ERROR) << "no gpu device info found for TensorRT.";
return mindspore::kLiteError;
return false;
}
auto gpu_info = (*iter)->Cast<GPUDeviceInfo>();
if (gpu_info == nullptr) {
MS_LOG(ERROR) << "no gpu device info found for TensorRT.";
return mindspore::kLiteError;
return false;
}
device_info_ = gpu_info;
int ret = lite::SetCudaDevice(device_info_);
if (ret != RET_OK) {
return mindspore::kLiteError;
return false;
}
if (runtime_ == nullptr) {
runtime_ = new (std::nothrow) TensorRTRuntime();
if (runtime_ == nullptr) {
MS_LOG(ERROR) << "create TensorRTRuntime failed.";
return mindspore::kLiteError;
return false;
}
}
if (runtime_->Init() != RET_OK) {
MS_LOG(ERROR) << "TensorRTRuntime init failed.";
return mindspore::kLiteError;
return false;
}
runtime_->SetDeviceID(device_info_->GetDeviceID());
auto cuda_ret = cudaStreamCreate(&stream_);
if (cuda_ret != cudaSuccess) {
MS_LOG(ERROR) << "Cuda create stream failed";
return mindspore::kLiteError;
return false;
}
auto cublas_ret = cublasCreate(&cublas_handle_);
if (cublas_ret != CUBLAS_STATUS_SUCCESS) {
MS_LOG(ERROR) << "Cuda create cublas handle failed";
return mindspore::kLiteError;
return false;
}
auto cublaslt_ret = cublasLtCreate(&cublaslt_handle_);
if (cublaslt_ret != CUBLAS_STATUS_SUCCESS) {
MS_LOG(ERROR) << "Cuda create cublaslt handle failed";
return mindspore::kLiteError;
return false;
}
return mindspore::kSuccess;
ret = ParseOptimizationProfile();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Parse input ranges failed.";
return false;
}
return true;
}
int TensorRTExecutor::ParseOptimizationProfile() {
auto gpu_context_it = config_infos_.find(kGPUContext);
if (gpu_context_it == config_infos_.end()) {
MS_LOG(INFO) << "do not have input ranges config.";
return RET_OK;
}
auto &gpu_context = gpu_context_it->second;
ProfileConfigs profile_configs;
if (!ProfileParser::Parse(gpu_context, true, &profile_configs)) {
MS_LOG_WARNING << "Failed to parse profile info from '" << kGPUContext << "'";
return RET_FAILED;
}
trt_profile_configs_ = profile_configs;
return RET_OK;
}
Status TensorRTExecutor::BuildSubGraph(const KernelGraphPtr &kernel_graph) {
@ -490,57 +464,10 @@ Status TensorRTExecutor::BuildSubGraph(const KernelGraphPtr &kernel_graph) {
if (status != kSuccess) {
return status;
}
tensorrt_graph_list_.push_back(TrtGraphContext{tensorrt_ops, inputs_, outputs_, nullptr});
status = UpdateTrtSubGraphInputsDepend();
if (status != kSuccess) {
return status;
}
for (size_t i = 0; i < tensorrt_graph_list_.size(); i++) {
auto &sub_graph = tensorrt_graph_list_[i];
sub_graph.sub_graph = CreateTensorRTGraph(sub_graph.tensorrt_ops, kernel_graph, tensorrt_subgraph_index,
sub_graph.inputs, sub_graph.outputs);
if (!sub_graph.sub_graph) {
MS_LOG(ERROR) << "Create tensorrt graph failed";
return mindspore::kLiteError;
}
}
return mindspore::kSuccess;
}
Status TensorRTExecutor::UpdateTrtSubGraphInputsDepend() {
std::set<TensorInfo> val_set;
for (auto &item : inputs_) {
val_set.emplace(item);
}
std::vector<std::set<TensorInfo>> sub_graphs_outputs;
for (size_t i = 0; i < tensorrt_graph_list_.size(); i++) {
auto &sub_graph = tensorrt_graph_list_[i];
std::set<TensorInfo> sub_graph_tensor_infos;
for (auto &op : sub_graph.tensorrt_ops) {
for (auto &item : op->outputs()) {
sub_graph_tensor_infos.emplace(item);
}
}
sub_graphs_outputs.emplace_back(std::move(sub_graph_tensor_infos));
for (auto &input : sub_graph.inputs) {
if (!val_set.count(input)) {
size_t k = 0;
for (; k < i; k++) {
if (sub_graphs_outputs[k].count(input)) {
tensorrt_graph_list_[k].outputs.push_back(input);
break;
}
}
if (k >= i) {
MS_LOG(ERROR) << "Cannot found input " << input.Name();
return mindspore::kLiteError;
}
}
}
for (auto &item : sub_graph.outputs) {
val_set.emplace(item);
}
tensorrt_graph_ = CreateTensorRTGraph(tensorrt_ops, kernel_graph, tensorrt_subgraph_index, inputs_, outputs_);
if (!tensorrt_graph_) {
MS_LOG(ERROR) << "Create tensorrt graph failed";
return mindspore::kLiteError;
}
return mindspore::kSuccess;
}
@ -583,9 +510,18 @@ std::shared_ptr<TensorRTSubGraph> TensorRTExecutor::CreateTensorRTGraph(const st
const KernelGraphPtr &graph, int index,
const std::vector<TensorInfo> &inputs,
const std::vector<TensorInfo> &outputs) {
auto tensorrt_graph =
std::make_shared<TensorRTSubGraph>(ops, inputs, outputs, context_.get(), device_info_, runtime_, support_resize_,
support_hw_resize_, min_dims_, opt_dims_, max_dims_);
if (!trt_profile_configs_.input_infos.empty()) {
std::vector<std::string> input_names;
std::transform(inputs.begin(), inputs.end(), std::back_inserter(input_names),
[](auto &item) { return item.Name(); });
if (!ProfileParser::ReorderByInputNames(input_names, &trt_profile_configs_)) {
MS_LOG_ERROR << "Reorder profile by input names failed, input names: " << input_names;
return nullptr;
}
}
auto tensorrt_graph = std::make_shared<TensorRTSubGraph>(ops, inputs, outputs, context_.get(), device_info_, runtime_,
support_resize_, support_hw_resize_, trt_profile_configs_);
if (tensorrt_graph == nullptr) {
MS_LOG(ERROR) << "new tensorrt_graph failed.";
return nullptr;
@ -635,10 +571,6 @@ bool TensorRTExecutor::CompileGraph(const FuncGraphPtr &graph, const std::map<st
bool TensorRTExecutor::RunGraph(const FuncGraphPtr &graph, const std::vector<tensor::Tensor> &inputs,
std::vector<tensor::Tensor> *outputs, const std::map<string, string> &compile_options) {
if (tensorrt_graph_list_.empty()) {
MS_LOG(ERROR) << "TensorRTGraph is nullptr.";
return false;
}
if (inputs.size() != inputs_.size()) {
MS_LOG(ERROR) << "Graph inputs size " << inputs_.size() << " != execute outputs size " << inputs.size();
return false;
@ -647,70 +579,20 @@ bool TensorRTExecutor::RunGraph(const FuncGraphPtr &graph, const std::vector<ten
MS_LOG(ERROR) << "Graph outputs size " << inputs_.size() << " != expected outputs size " << outputs->size();
return false;
}
if (tensorrt_graph_list_.size() == 1) {
auto &sub_graph = tensorrt_graph_list_[0];
if (sub_graph.sub_graph == nullptr) {
MS_LOG(ERROR) << "TensorRT subgraph is nullptr.";
return false;
}
return sub_graph.sub_graph->Execute(inputs, outputs) == RET_OK;
if (tensorrt_graph_ == nullptr) {
MS_LOG(ERROR) << "TensorRT subgraph is nullptr.";
return false;
}
std::map<TensorInfo, std::shared_ptr<tensor::Tensor>> tensor_val_map;
for (size_t i = 0; i < inputs.size(); i++) {
tensor_val_map[inputs_[i]] = std::make_shared<tensor::Tensor>(inputs[i]);
}
for (auto &sub_graph : tensorrt_graph_list_) {
std::vector<tensor::Tensor> sub_inputs;
std::vector<tensor::Tensor> sub_outputs;
for (auto &item : sub_graph.inputs) {
auto it = tensor_val_map.find(item);
if (it == tensor_val_map.end()) {
MS_LOG(ERROR) << "Cannot find input tensor " << item.Name() << " in tensor val map";
return false;
}
sub_inputs.push_back(*it->second);
}
if (sub_graph.sub_graph == nullptr) {
MS_LOG(ERROR) << "TensorRT subgraph is nullptr.";
return false;
}
auto ret = sub_graph.sub_graph->Execute(sub_inputs, &sub_outputs);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Failed to execute graph.";
return false;
}
if (sub_graph.outputs.size() != sub_outputs.size()) {
MS_LOG(ERROR) << "Subgraph outputs size " << sub_graph.outputs.size() << " != result outputs size "
<< sub_outputs.size();
return false;
}
for (size_t i = 0; i < sub_graph.outputs.size(); i++) {
tensor_val_map[sub_graph.outputs[i]] = std::make_shared<tensor::Tensor>(sub_outputs[i]);
}
}
outputs->clear();
for (auto &item : outputs_) {
auto it = tensor_val_map.find(item);
if (it == tensor_val_map.end()) {
MS_LOG(ERROR) << "Cannot find input tensor " << item.Name() << " in tensor val map";
return false;
}
outputs->push_back(*it->second);
}
return true;
return tensorrt_graph_->Execute(inputs, outputs) == RET_OK;
}
bool TensorRTExecutor::Resize(const FuncGraphPtr &, const std::vector<tensor::Tensor> &inputs,
const std::vector<std::vector<int64_t>> &new_shapes) {
if (tensorrt_graph_list_.size() == 1) {
auto &sub_graph = tensorrt_graph_list_[0];
if (sub_graph.sub_graph == nullptr) {
MS_LOG(ERROR) << "TensorRT subgraph is nullptr.";
return false;
}
return sub_graph.sub_graph->Resize(inputs, new_shapes) == RET_OK;
if (tensorrt_graph_ == nullptr) {
MS_LOG(ERROR) << "TensorRT subgraph is nullptr.";
return false;
}
return false;
return tensorrt_graph_->Resize(inputs, new_shapes) == RET_OK;
}
std::vector<tensor::Tensor> TensorRTExecutor::GetInputInfos(const FuncGraphPtr &) {
@ -733,9 +615,12 @@ std::vector<tensor::Tensor> TensorRTExecutor::GetOutputInfos(const FuncGraphPtr
return tensors;
}
static std::shared_ptr<device::GraphExecutor> TensorRTGraphExecutorCreator(const std::shared_ptr<Context> &ctx) {
auto executor = std::make_shared<TensorRTExecutor>(ctx);
executor->Init();
static std::shared_ptr<device::GraphExecutor> TensorRTGraphExecutorCreator(const std::shared_ptr<Context> &ctx,
const ConfigInfos &config_infos) {
auto executor = std::make_shared<TensorRTExecutor>(ctx, config_infos);
if (!executor->Init()) {
return nullptr;
}
return executor;
}

View File

@ -43,13 +43,11 @@ struct TrtGraphContext {
class TensorRTExecutor : public LiteGraphExecutor {
public:
TensorRTExecutor(const std::shared_ptr<mindspore::Context> &context, const std::string &cache_model_path,
size_t vocab_size, size_t device_cache_size, const std::map<std::string, std::string> &ms_cache);
explicit TensorRTExecutor(const std::shared_ptr<mindspore::Context> &context);
TensorRTExecutor(const std::shared_ptr<mindspore::Context> &context, const ConfigInfos &config_infos);
~TensorRTExecutor() override;
Status Init();
bool Init();
bool CompileGraph(const FuncGraphPtr &graph, const std::map<string, string> &compile_options) override;
bool RunGraph(const FuncGraphPtr &graph, const std::vector<tensor::Tensor> &inputs,
@ -61,8 +59,9 @@ class TensorRTExecutor : public LiteGraphExecutor {
std::vector<tensor::Tensor> GetOutputInfos(const FuncGraphPtr &) override;
private:
int ParseOptimizationProfile();
Status BuildSubGraph(const KernelGraphPtr &graph);
Status UpdateTrtSubGraphInputsDepend();
TensorRTOp *FindTensorRTOp(const CNodePtr &cnode, const BaseOperatorPtr &base_operator,
const std::vector<TensorInfo> &input_tensors,
@ -74,6 +73,7 @@ class TensorRTExecutor : public LiteGraphExecutor {
const std::vector<TensorInfo> &outputs);
std::shared_ptr<mindspore::Context> context_{nullptr};
ConfigInfos config_infos_;
std::shared_ptr<GPUDeviceInfo> device_info_{nullptr};
TensorRTRuntime *runtime_{nullptr};
bool support_hw_resize_{true};
@ -88,12 +88,9 @@ class TensorRTExecutor : public LiteGraphExecutor {
std::vector<kernel::Kernel> kernel_list_;
std::vector<TrtGraphContext> tensorrt_graph_list_;
std::vector<nvinfer1::Dims> min_dims_;
std::vector<nvinfer1::Dims> opt_dims_;
std::vector<nvinfer1::Dims> max_dims_;
ProfileConfigs trt_profile_configs_;
std::shared_ptr<TensorRTSubGraph> tensorrt_graph_ = nullptr;
std::vector<TensorInfo> inputs_;
std::vector<TensorInfo> outputs_;
};

View File

@ -14,13 +14,27 @@
* limitations under the License.
*/
#include "extendrt/delegate/tensorrt/tensorrt_plugin_impl.h"
#ifdef LITE_CUDA_DISTRIBUTION
#include "extendrt/delegate/tensorrt/distribution/distribution_base.h"
#include "plugin/device/gpu/hal/device/distribution/collective_wrapper.h"
#endif
namespace mindspore::lite {
int TensorRTPluginImpl::GetGPUGroupSize() const { return GetGroupSize(NCCL_WORLD_GROUP); }
int TensorRTPluginImpl::GetGPUGroupSize() const {
#ifdef LITE_CUDA_DISTRIBUTION
return GetGroupSize(NCCL_WORLD_GROUP);
#else
return 1;
#endif
}
int TensorRTPluginImpl::GetRankID() const { return GetRankIDByGroup(NCCL_WORLD_GROUP); }
int TensorRTPluginImpl::GetRankID() const {
#ifdef LITE_CUDA_DISTRIBUTION
return GetRankIDByGroup(NCCL_WORLD_GROUP);
#else
return 0;
#endif
}
} // namespace mindspore::lite
mindspore::lite::TensorRTExecutorPluginImplBase *CreateTensorRTPluginImpl() {

View File

@ -24,6 +24,8 @@
#include <numeric>
#include <functional>
#include <fstream>
#include <limits>
#include <unordered_map>
#include "src/extendrt/delegate/delegate_utils.h"
#include "src/common/utils.h"
@ -32,25 +34,23 @@
#include "ops/strided_slice.h"
#include "ops/expand_dims.h"
#include "ops/fusion/topk_fusion.h"
#include "ops/broadcast_to.h"
namespace mindspore::lite {
TensorRTSubGraph::TensorRTSubGraph(std::vector<TensorRTOp *> ops, const std::vector<TensorInfo> &inputs,
const std::vector<TensorInfo> &outputs, const mindspore::Context *ctx,
std::shared_ptr<GPUDeviceInfo> device_info, TensorRTRuntime *runtime,
bool support_resize, bool support_hw_resize,
const std::vector<nvinfer1::Dims> &min_dims,
const std::vector<nvinfer1::Dims> &opt_dims,
const std::vector<nvinfer1::Dims> &max_dims)
const ProfileConfigs &trt_profile_config)
: inputs_(inputs),
outputs_(outputs),
all_ops_(std::move(ops)),
device_info_(device_info),
runtime_(runtime),
min_dims_(min_dims),
opt_dims_(opt_dims),
max_dims_(max_dims) {
trt_profile_config_(trt_profile_config) {
trt_specific_weight_handled_inner_ = {
ops::kNameTranspose, ops::kNameReshape, ops::kNameStridedSlice, ops::kNameExpandDims, ops::kNameTopKFusion,
ops::kNameTranspose, ops::kNameReshape, ops::kNameStridedSlice,
ops::kNameExpandDims, ops::kNameTopKFusion, ops::kNameBroadcastTo,
};
if (!support_resize) {
input_batchsize_index_ = -1;
@ -86,26 +86,55 @@ TensorRTSubGraph::~TensorRTSubGraph() {
}
}
bool TensorRTSubGraph::IsValidProfileDims() const {
if (trt_profile_config_.profiles.empty()) {
MS_LOG(INFO) << "Number of profiles is 0.";
return false;
}
for (auto &profile : trt_profile_config_.profiles) {
if (profile.inputs.size() != trt_profile_config_.input_infos.size()) {
MS_LOG(WARNING) << "Profile input size " << profile.inputs.size() << " != input shape size "
<< trt_profile_config_.input_infos.size();
return false;
}
for (size_t i = 0; i < profile.inputs.size(); i++) {
const auto &profile_input = profile.inputs[i];
const auto &input_info = trt_profile_config_.input_infos[i];
if (profile_input.min_dims.size() != input_info.input_shape.size()) {
MS_LOG(WARNING) << "Profile input " << input_info.name << " min dims number " << profile_input.min_dims.size()
<< " != input shape dim number " << input_info.input_shape.size();
return false;
}
if (profile_input.max_dims.size() != input_info.input_shape.size()) {
MS_LOG(WARNING) << "Profile input " << input_info.name << " max dims number " << profile_input.max_dims.size()
<< " != input shape dim number " << input_info.input_shape.size();
return false;
}
if (profile_input.opt_dims.size() != input_info.input_shape.size()) {
MS_LOG(WARNING) << "Profile input " << input_info.name << " opt dims number " << profile_input.opt_dims.size()
<< " != input shape dim number " << input_info.input_shape.size();
return false;
}
}
}
return true;
}
int TensorRTSubGraph::Init(cudaStream_t stream, cublasHandle_t cublas_handle, cublasLtHandle_t cublaslt_handle) {
auto ret = GetGraphInOutOps(inputs_, outputs_, &in_ops_, &out_ops_, all_ops_);
if (ret != RET_OK) {
MS_LOG(ERROR) << "Get TensorRT subgraph input and output ops failed.";
return RET_ERROR;
}
profile_ = runtime_->GetBuilder()->createOptimizationProfile();
if (profile_ == nullptr) {
MS_LOG(ERROR) << "createOptimizationProfile failed.";
return RET_ERROR;
}
ctx_ = new (std::nothrow) TensorRTContext();
if (ctx_ == nullptr) {
MS_LOG(ERROR) << "New TensorRTContext failed.";
return RET_OK;
return RET_ERROR;
}
ctx_->SetRuntime(runtime_);
if (!ctx_->Init()) {
MS_LOG(ERROR) << "New TensorRTContext failed.";
return RET_OK;
return RET_ERROR;
}
if (SetDeviceConfig(stream, cublas_handle, cublaslt_handle) != RET_OK) {
MS_LOG(WARNING) << "set tensorrt config failed.";
@ -115,6 +144,20 @@ int TensorRTSubGraph::Init(cudaStream_t stream, cublasHandle_t cublas_handle, cu
MS_LOG(ERROR) << "create Serializer failed.";
return RET_ERROR;
}
using_input_ranges_ = IsValidProfileDims();
if (using_input_ranges_) {
for (size_t i = 0; i != trt_profile_config_.profiles.size(); ++i) {
profiles_.push_back(runtime_->GetBuilder()->createOptimizationProfile());
}
} else {
profiles_.push_back(runtime_->GetBuilder()->createOptimizationProfile());
}
for (size_t i = 0; i != profiles_.size(); ++i) {
if (profiles_[i] == nullptr) {
MS_LOG(ERROR) << "create optimization profile failed.";
return RET_ERROR;
}
}
engine_ = serializer_->GetSerializedEngine();
if (engine_ != nullptr) {
MS_LOG(INFO) << "using serialized engine " << serialize_file_path_;
@ -130,9 +173,11 @@ int TensorRTSubGraph::Init(cudaStream_t stream, cublasHandle_t cublas_handle, cu
int TensorRTSubGraph::BuildEngine() {
// print all network ops
if (this->config_->addOptimizationProfile(profile_) == -1) {
MS_LOG(ERROR) << "addOptimizationProfile failed.";
return RET_ERROR;
for (auto &profile : profiles_) {
if (this->config_->addOptimizationProfile(profile) == -1) {
MS_LOG(ERROR) << "addOptimizationProfile failed.";
return RET_ERROR;
}
}
MS_LOG(INFO) << "build engine for tensorrt network: " << ctx_->network()->getName();
for (int i = 0; i < ctx_->network()->getNbLayers(); i++) {
@ -184,7 +229,8 @@ int TensorRTSubGraph::SetDeviceConfig(cudaStream_t stream, cublasHandle_t cublas
MS_LOG(INFO) << GetRankID() << " tensorrt subgraph stream: " << stream_;
// config setMaxWorkspaceSize to 2047 MB for max limit
config_->setMaxWorkspaceSize(2047 * (1 << 20));
constexpr size_t kWorkspaceSize = 2047 * (1 << 20);
config_->setMaxWorkspaceSize(kWorkspaceSize);
return RET_OK;
}
@ -197,7 +243,10 @@ bool TensorRTSubGraph::IsInt8Mode() {
return false;
}
nvinfer1::ITensor *TensorRTSubGraph::SetTensorRTNetworkInput(const TensorInfo &in_tensor, size_t index) {
nvinfer1::ITensor *TensorRTSubGraph::SetTensorRTNetworkInput(const TensorInfo &in_tensor, int index) {
if (index < 0) {
return nullptr;
}
for (int i = 0; i < ctx_->network()->getNbInputs(); i++) {
if (in_tensor.Name().compare(ctx_->network()->getInput(i)->getName()) == 0) {
MS_LOG(INFO) << "input tensor is already added in network: " << in_tensor.Name();
@ -211,68 +260,48 @@ nvinfer1::ITensor *TensorRTSubGraph::SetTensorRTNetworkInput(const TensorInfo &i
return nullptr;
}
nvinfer1::Dims input_dims;
if (min_dims_.size() != 0 && min_dims_.size() == opt_dims_.size() && min_dims_.size() == max_dims_.size()) {
if (using_input_ranges_) {
input_dims = SetInputDimsProfile(in_tensor, index);
} else {
input_dims = ParseInputDimsProfile(in_tensor);
input_dims = ParseInputDimsProfile(in_tensor, index);
}
MS_LOG(INFO) << "add network input: " << in_tensor.Name();
return ctx_->network()->addInput(in_tensor.Name().c_str(), cuda_dtype, input_dims);
}
nvinfer1::Dims TensorRTSubGraph::SetInputDimsProfile(const TensorInfo &in_tensor, size_t index) {
nvinfer1::Dims input_dims;
input_dims.nbDims = min_dims_[index].nbDims;
for (int i = 0; i != input_dims.nbDims; ++i) {
input_dims.d[i] = (max_dims_[index].d[i] != min_dims_[index].d[i]) ? -1 : min_dims_[index].d[i];
nvinfer1::Dims TensorRTSubGraph::SetInputDimsProfile(const TensorInfo &in_tensor, int index) {
auto input_info = trt_profile_config_.input_infos[index];
auto input_dims = ConvertCudaDims(input_info.input_shape);
DebugDims("input dims", input_dims);
for (size_t i = 0; i < trt_profile_config_.profiles.size(); i++) {
auto &profile = trt_profile_config_.profiles[i];
auto min_dims = ConvertCudaDims(profile.inputs[index].min_dims);
if (!profiles_[i]->setDimensions(input_info.name.c_str(), nvinfer1::OptProfileSelector::kMIN, min_dims)) {
MS_LOG(ERROR) << "setDimensions of kMIN failed for " << input_info.name;
return input_dims;
}
auto opt_dims = ConvertCudaDims(profile.inputs[index].opt_dims);
if (!profiles_[i]->setDimensions(input_info.name.c_str(), nvinfer1::OptProfileSelector::kOPT, opt_dims)) {
MS_LOG(ERROR) << "setDimensions of kOPT failed for " << input_info.name;
return input_dims;
}
auto max_dims = ConvertCudaDims(profile.inputs[index].max_dims);
if (!profiles_[i]->setDimensions(input_info.name.c_str(), nvinfer1::OptProfileSelector::kMAX, max_dims)) {
MS_LOG(ERROR) << "setDimensions of kMAX failed for " << input_info.name;
return input_dims;
}
DebugDims("min dims", min_dims);
DebugDims("opt dims", opt_dims);
DebugDims("max dims", max_dims);
}
if (profile_ == nullptr) {
MS_LOG(ERROR) << "profile is null.";
return input_dims;
}
if (!profile_->setDimensions(in_tensor.Name().c_str(), nvinfer1::OptProfileSelector::kMIN, min_dims_[index])) {
MS_LOG(ERROR) << "setDimensions of kMIN failed for " << in_tensor.Name();
return input_dims;
}
if (!profile_->setDimensions(in_tensor.Name().c_str(), nvinfer1::OptProfileSelector::kOPT, opt_dims_[index])) {
MS_LOG(ERROR) << "setDimensions of kOPT failed for " << in_tensor.Name();
return input_dims;
}
if (!profile_->setDimensions(in_tensor.Name().c_str(), nvinfer1::OptProfileSelector::kMAX, max_dims_[index])) {
MS_LOG(ERROR) << "setDimensions of kMAX failed for " << in_tensor.Name();
return input_dims;
}
DebugDims(min_dims_[index]);
DebugDims(opt_dims_[index]);
DebugDims(max_dims_[index]);
return input_dims;
}
nvinfer1::Dims TensorRTSubGraph::ParseInputDimsProfile(const TensorInfo &in_tensor) {
nvinfer1::Dims TensorRTSubGraph::ParseInputDimsProfile(const TensorInfo &in_tensor, int index) {
nvinfer1::Dims input_dims = ConvertCudaDims(in_tensor.Shape());
if (profile_ == nullptr) {
MS_LOG(ERROR) << "profile is null.";
return input_dims;
}
if (runtime_->GetBatchSize() == 0) {
runtime_->SetBatchSize(input_dims.d[0]);
MS_LOG(INFO) << "batch size init as " << runtime_->GetBatchSize();
if (input_batchsize_index_ != -1) {
input_dims.d[0] = -1; // dynamic batch size with wildcard N, default batchsize is first dims
input_batchsize_index_ = 0;
}
} else {
if (input_batchsize_index_ != -1) {
for (int n = 0; n < input_dims.nbDims; n++) {
if (input_dims.d[n] == runtime_->GetBatchSize()) {
runtime_->SetBatchSize(std::max(input_dims.d[0], runtime_->GetBatchSize()));
// first dims equals to batchsize
input_dims.d[n] = -1;
input_batchsize_index_ = n;
break;
}
}
}
if (input_batchsize_index_ != -1) {
input_dims.d[0] = -1;
}
// only support NHWC HW dim resize
if (input_hw_index_ != -1) {
@ -281,42 +310,50 @@ nvinfer1::Dims TensorRTSubGraph::ParseInputDimsProfile(const TensorInfo &in_tens
input_dims.d[input_hw_index_] = -1;
input_dims.d[input_hw_index_ + 1] = -1;
}
auto shape = in_tensor.Shape();
// We do not need to check the return of setDimension and addOptimizationProfile here as all dims are explicitly set
nvinfer1::Dims input_dims_min = ConvertCudaDims(shape);
nvinfer1::Dims input_dims_min = ConvertCudaDims(in_tensor.Shape());
if (input_batchsize_index_ != -1) {
input_dims_min.d[input_batchsize_index_] = 1;
input_dims_min.d[0] = 1;
if (input_hw_index_ != -1) {
input_dims_min.d[input_hw_index_] = 1;
input_dims_min.d[input_hw_index_ + 1] = 1;
}
}
if (!profile_->setDimensions(in_tensor.Name().c_str(), nvinfer1::OptProfileSelector::kMIN, input_dims_min)) {
if (!profiles_.front()->setDimensions(in_tensor.Name().c_str(), nvinfer1::OptProfileSelector::kMIN, input_dims_min)) {
MS_LOG(ERROR) << "setDimensions of kMIN failed for " << in_tensor.Name();
return input_dims;
}
nvinfer1::Dims input_dims_opt = ConvertCudaDims(shape);
if (!profile_->setDimensions(in_tensor.Name().c_str(), nvinfer1::OptProfileSelector::kOPT, input_dims_opt)) {
nvinfer1::Dims input_dims_opt = ConvertCudaDims(in_tensor.Shape());
if (!profiles_.front()->setDimensions(in_tensor.Name().c_str(), nvinfer1::OptProfileSelector::kOPT, input_dims_opt)) {
MS_LOG(ERROR) << "setDimensions of kOPT failed for " << in_tensor.Name();
return input_dims;
}
nvinfer1::Dims input_dims_max = ConvertCudaDims(shape);
nvinfer1::Dims input_dims_max = ConvertCudaDims(in_tensor.Shape());
// input_dims_max should be the same with input network dims
if (!profile_->setDimensions(in_tensor.Name().c_str(), nvinfer1::OptProfileSelector::kMAX, input_dims_max)) {
if (!profiles_.front()->setDimensions(in_tensor.Name().c_str(), nvinfer1::OptProfileSelector::kMAX, input_dims_max)) {
MS_LOG(ERROR) << "setDimensions of kMAX failed for " << in_tensor.Name();
return input_dims;
}
DebugDims(input_dims);
DebugDims(input_dims_min);
DebugDims(input_dims_opt);
DebugDims(input_dims_max);
if (trt_profile_config_.profiles.empty()) {
ProfileItem profile_item;
profile_item.inputs.resize(inputs_.size());
trt_profile_config_.profiles.push_back(profile_item);
}
auto &profile_item = trt_profile_config_.profiles.back();
profile_item.inputs[index].min_dims = ConvertMSShape(input_dims_min);
profile_item.inputs[index].opt_dims = ConvertMSShape(input_dims_opt);
profile_item.inputs[index].max_dims = ConvertMSShape(input_dims_max);
DebugDims("input min dims", input_dims_min);
DebugDims("input opt dims", input_dims_opt);
DebugDims("input max dims", input_dims_max);
return input_dims;
}
int TensorRTSubGraph::ParseInputsProfile() {
MS_LOG(INFO) << "using serialied engine.";
for (auto in_tensor : inputs()) {
auto dim = ParseInputDimsProfile(in_tensor);
for (size_t i = 0; i < inputs_.size(); i++) {
auto dim = ParseInputDimsProfile(inputs_[i], i);
if (dim.nbDims <= 0) {
MS_LOG(ERROR) << "input dims is invalid.";
return RET_ERROR;
@ -325,6 +362,15 @@ int TensorRTSubGraph::ParseInputsProfile() {
return RET_OK;
}
int TensorRTSubGraph::GetInputIndexByName(const std::string &name) {
for (size_t i = 0; i != inputs().size(); ++i) {
if (inputs()[i].Name() == name) {
return i;
}
}
return -1;
}
int TensorRTSubGraph::BuildTensorRTGraph() {
MS_ASSERT(!all_ops_.empty());
int ret;
@ -338,7 +384,7 @@ int TensorRTSubGraph::BuildTensorRTGraph() {
// Data From CPU
auto in_tensor = cur_op->inputs()[i];
if (IsSubGraphInputTensor(this->inputs(), in_tensor)) {
nvinfer1::ITensor *trt_tensor = SetTensorRTNetworkInput(in_tensor, i);
nvinfer1::ITensor *trt_tensor = SetTensorRTNetworkInput(in_tensor, GetInputIndexByName(in_tensor.Name()));
if (trt_tensor == nullptr) {
MS_LOG(ERROR) << "SetTensorRTNetworkInput failed for " << in_tensor.Name();
return RET_ERROR;
@ -442,7 +488,7 @@ int TensorRTSubGraph::Prepare() {
return RET_ERROR;
}
int binding_num = this->engine_->getNbBindings();
if (binding_num < 0) {
if (binding_num <= 0) {
MS_LOG(ERROR) << "TensorRTSubGraph binding num < 0.";
return RET_ERROR;
}
@ -452,28 +498,36 @@ int TensorRTSubGraph::Prepare() {
return RET_ERROR;
}
for (auto tensor : inputs_) {
auto device_ptr = runtime_->GetAllocator()->MallocDeviceMem(tensor, tensor.DataSize());
profile_index_ = MaxVolumnProfileIndex();
if (this->trt_context_->setOptimizationProfile(profile_index_)) {
MS_LOG(INFO) << "setOptimizationProfile: " << profile_index_;
}
const auto &profile = trt_profile_config_.profiles[profile_index_];
for (size_t i = 0; i != inputs_.size(); ++i) {
auto &tensor = inputs_[i];
auto max_profile_dims = profile.inputs[i].max_dims;
tensor.SetShape(max_profile_dims);
int volumn = std::accumulate(max_profile_dims.begin(), max_profile_dims.end(), 1, std::multiplies<int>());
auto type_size = lite::DataTypeSize(static_cast<enum TypeId>(tensor.DataType()));
auto device_ptr = runtime_->GetAllocator()->MallocDeviceMem(tensor, volumn * type_size);
if (device_ptr == nullptr) {
MS_LOG(ERROR) << "malloc for inputs tensor device memory failed.";
return RET_ERROR;
}
runtime_->SetBatchSize(tensor.Shape()[0]);
int index = this->engine_->getBindingIndex(tensor.Name().c_str());
MS_LOG(INFO) << "device index " << index << " for tensor : " << tensor.Name() << " attr: " << device_ptr;
auto tensor_name = tensor.Name();
trt_in_tensor_name_.push_back(tensor_name);
int index = GetProfileBindingIndex(tensor_name, profile_index_);
MS_LOG(INFO) << "device index " << index << " for tensor : " << tensor_name << " attr: " << device_ptr;
tensor_bindings_[index] = device_ptr;
trt_in_tensor_name_.push_back(tensor.Name());
nvinfer1::Dims input_dims = ConvertCudaDims(tensor.Shape());
nvinfer1::Dims input_dims = ConvertCudaDims(profile.inputs[i].max_dims);
for (int od = 0; od < input_dims.nbDims; od++) {
MS_LOG(DEBUG) << "in tensor " << tensor.Name() << " dims at " << od << " is " << input_dims.d[od];
}
if (!this->trt_context_->setBindingDimensions(index, input_dims)) {
MS_LOG(ERROR) << "invalid input dims of " << tensor.Name();
return RET_ERROR;
}
}
if (!this->trt_context_->allInputDimensionsSpecified()) {
MS_LOG(ERROR) << "input dims need to be specified.";
return RET_ERROR;
@ -486,30 +540,91 @@ int TensorRTSubGraph::Prepare() {
}
}
for (auto &tensor : outputs_) {
int index = this->engine_->getBindingIndex(tensor.Name().c_str());
auto out_dims = trt_context_->getBindingDimensions(index);
int max_index = GetProfileBindingIndex(tensor.Name(), profile_index_);
auto out_dims = trt_context_->getBindingDimensions(max_index);
int elem_num = std::accumulate(out_dims.d, out_dims.d + out_dims.nbDims, 1, std::multiplies<int>());
DebugDims(out_dims);
auto new_shape = lite::ConvertMSShape(out_dims);
MS_LOG(INFO) << "Set output shape of " << tensor.Name() << " to " << new_shape << " by tensorrt binding output";
tensor.SetShape(new_shape);
auto type_size = DataTypeSize(static_cast<enum TypeId>(tensor.DataType()));
DebugDims("out dims", out_dims);
MS_LOG(INFO) << "Set output shape by tensorrt binding output";
tensor.SetShape(lite::ConvertMSShape(out_dims));
auto type_size = lite::DataTypeSize(static_cast<enum TypeId>(tensor.DataType()));
auto device_ptr = runtime_->GetAllocator()->MallocDeviceMem(tensor, elem_num * type_size);
if (device_ptr == nullptr) {
MS_LOG(ERROR) << "malloc for outputs tensor device memory failed.";
return RET_ERROR;
}
tensor_bindings_[index] = device_ptr;
for (size_t j = 0; j != profiles_.size(); ++j) {
int index = GetProfileBindingIndex(tensor.Name(), j);
tensor_bindings_[index] = device_ptr;
}
trt_out_tensor_name_.push_back(tensor.Name());
}
return RET_OK;
}
int TensorRTSubGraph::SelectProfile(const std::vector<ShapeVector> &new_shapes) const {
std::vector<int> profile_index;
for (size_t i = 0; i < profiles_.size(); ++i) {
const auto &profile = trt_profile_config_.profiles[i];
bool condition = true;
for (size_t j = 0; j < trt_in_tensor_name_.size(); ++j) {
auto new_shape = new_shapes[j];
auto profile_input = profile.inputs[j];
if (new_shape.size() != profile_input.max_dims.size()) {
condition = false;
} else {
for (size_t od = 0; od < new_shape.size(); od++) {
if (new_shape[od] < profile_input.min_dims[od] || new_shape[od] > profile_input.max_dims[od]) {
condition = false;
break;
}
}
}
}
if (condition) {
profile_index.push_back(i);
}
}
return profile_index.empty() ? -1 : profile_index.front();
}
size_t TensorRTSubGraph::MaxVolumnProfileIndex() const {
int max_volumn = std::numeric_limits<int>::min();
size_t max_volumn_index = 0;
for (size_t i = 0; i < trt_profile_config_.profiles.size(); ++i) {
const auto &profile = trt_profile_config_.profiles[i];
// depend on the first input tensor
int64_t volumn = std::accumulate(profile.inputs[0].max_dims.begin(), profile.inputs[0].max_dims.end(), 1,
std::multiplies<int64_t>());
if (volumn > max_volumn) {
max_volumn_index = i;
max_volumn = volumn;
}
}
return max_volumn_index;
}
int TensorRTSubGraph::GetProfileBindingIndex(const std::string &name, size_t profile_index) {
std::string binding_name = name;
if (profile_index != 0) {
binding_name += " [profile " + std::to_string(profile_index) + "]";
}
return this->engine_->getBindingIndex(binding_name.c_str());
}
int TensorRTSubGraph::OnNewInputShapes(const std::vector<ShapeVector> &new_shapes) {
if (inputs_.size() != new_shapes.size()) {
MS_LOG(ERROR) << "Graph inputs size " << inputs_.size() << " != resize input size " << new_shapes.size();
return RET_ERROR;
}
auto select_profile_index = SelectProfile(new_shapes);
if (select_profile_index < 0) {
MS_LOG(ERROR) << "Not support input shape " << new_shapes;
return RET_ERROR;
}
profile_index_ = static_cast<size_t>(select_profile_index);
if (this->trt_context_->setOptimizationProfile(profile_index_)) {
MS_LOG(INFO) << "setOptimizationProfile: " << profile_index_;
}
int batch_size = -1;
for (size_t i = 0; i < trt_in_tensor_name_.size(); i++) {
if (inputs_[i].Shape() == new_shapes[i]) {
@ -543,9 +658,8 @@ int TensorRTSubGraph::OnNewInputShapes(const std::vector<ShapeVector> &new_shape
return RET_ERROR;
}
batch_size = new_batch_size;
runtime_->SetBatchSize(batch_size);
int index = this->engine_->getBindingIndex(trt_in_tensor_name_[i].c_str());
int index = GetProfileBindingIndex(trt_in_tensor_name_[i], profile_index_);
// Set actual input size
nvinfer1::Dims input_dims = ConvertCudaDims(inputs_[i].Shape());
for (int od = 0; od < input_dims.nbDims; od++) {
@ -563,8 +677,9 @@ int TensorRTSubGraph::OnNewInputShapes(const std::vector<ShapeVector> &new_shape
}
if (batch_size != -1) {
for (size_t i = 0; i < trt_out_tensor_name_.size(); i++) {
int index = this->engine_->getBindingIndex(trt_out_tensor_name_[i].c_str());
auto index = GetProfileBindingIndex(trt_out_tensor_name_[i], profile_index_);
auto out_dims = trt_context_->getBindingDimensions(index);
DebugDims("out dims", out_dims);
auto new_shape = lite::ConvertMSShape(out_dims);
MS_LOG(INFO) << "Set output shape of " << trt_out_tensor_name_[i] << " to " << new_shape
<< " by tensorrt binding output";
@ -610,13 +725,13 @@ int TensorRTSubGraph::PreExecute(const std::vector<tensor::Tensor> &inputs,
}
runtime_->GetAllocator()->MarkMemValid(trt_tensor_name, true);
}
int index = this->engine_->getBindingIndex(trt_tensor_name.c_str());
int index = GetProfileBindingIndex(trt_tensor_name, profile_index_);
MS_LOG(INFO) << "device index " << index << " for tensor : " << trt_tensor_name << " attr: " << device_ptr;
tensor_bindings_[index] = device_ptr;
}
for (size_t i = 0; i < trt_out_tensor_name_.size(); i++) {
const auto &trt_out_tensor_name = trt_out_tensor_name_[i];
int index = this->engine_->getBindingIndex(trt_out_tensor_name.c_str());
int index = GetProfileBindingIndex(trt_out_tensor_name, profile_index_);
void *device_ptr = nullptr;
if (outputs.size() > i) {
auto &output = outputs[i];
@ -651,16 +766,10 @@ int TensorRTSubGraph::PostExecute(std::vector<tensor::Tensor> *outputs) {
auto has_outputs = !outputs->empty();
for (size_t i = 0; i < trt_out_tensor_name_.size(); i++) {
const auto &trt_out_tensor_name = trt_out_tensor_name_[i];
int index = this->engine_->getBindingIndex(trt_out_tensor_name.c_str());
auto index = GetProfileBindingIndex(trt_out_tensor_name, profile_index_);
// actual output tensor dims
auto out_dims = this->trt_context_->getBindingDimensions(index);
std::vector<int64_t> new_shape = lite::ConvertMSShape(out_dims);
// batchsize resize need set new batch size
if (input_batchsize_index_ != -1) {
if (runtime_->GetBatchSize() != new_shape[output_batchsize_index_]) {
new_shape[output_batchsize_index_] = runtime_->GetBatchSize();
}
}
outputs_[i].SetShape(new_shape);
for (int od = 0; od < out_dims.nbDims; od++) {
MS_LOG(DEBUG) << "out tensor " << trt_out_tensor_name << " dims at " << od << " is " << new_shape[od];
@ -712,20 +821,6 @@ bool TensorRTSubGraph::ValidInputResizeDims(const nvinfer1::Dims &construct_dims
MS_LOG(ERROR) << "invalid resize input.";
return false;
}
if (input_hw_index_ == -1) {
// only NHWC format support HW resize, otherwise only support batchsize resize
for (int d = 0; d < construct_dims.nbDims; d++) {
if (d != input_batchsize_index_ && construct_dims.d[d] != resize_input_shape[d]) {
MS_LOG(ERROR) << "only support dynamic batch size resize input.";
return false;
}
}
} else if ((input_hw_index_ == 1 && construct_dims.d[DIMENSION_3D] != resize_input_shape[DIMENSION_3D]) ||
(input_hw_index_ == DIMENSION_2D && construct_dims.d[1] != resize_input_shape[1])) {
// input may be nhwc || nchw
MS_LOG(ERROR) << "don't support dynamic channel resize input.";
return false;
}
return true;
}

View File

@ -28,6 +28,7 @@
#include "src/extendrt/delegate/tensorrt/op/tensorrt_op.h"
#include "src/extendrt/delegate/parameter_cache/embedding_cache_manager.h"
#include "include/api/context.h"
#include "common/config_infos.h"
namespace mindspore::lite {
using mindspore::lite::RET_ERROR;
@ -42,8 +43,7 @@ class TensorRTSubGraph {
TensorRTSubGraph(std::vector<TensorRTOp *> ops, const std::vector<TensorInfo> &inputs,
const std::vector<TensorInfo> &outputs, const mindspore::Context *ctx,
std::shared_ptr<GPUDeviceInfo> device_info, TensorRTRuntime *runtime, bool support_resize,
bool support_hw_resize, const std::vector<nvinfer1::Dims> &min_dims,
const std::vector<nvinfer1::Dims> &opt_dims, const std::vector<nvinfer1::Dims> &max_dims);
bool support_hw_resize, const ProfileConfigs &trt_profile_config);
~TensorRTSubGraph();
int Prepare();
@ -63,6 +63,7 @@ class TensorRTSubGraph {
std::vector<TensorInfo> &outputs() { return outputs_; }
private:
int GetInputIndexByName(const std::string &name);
int BuildEngine();
int SetDeviceConfig(cudaStream_t stream, cublasHandle_t cublas_handle, cublasLtHandle_t cublaslt_handle);
@ -71,7 +72,7 @@ class TensorRTSubGraph {
bool SupportFP16();
nvinfer1::ITensor *SetTensorRTNetworkInput(const TensorInfo &in_tensor, size_t index);
nvinfer1::ITensor *SetTensorRTNetworkInput(const TensorInfo &in_tensor, int index);
ITensorHelper FindTensorRTInputs(TensorRTOp *cur_op, const TensorInfo &in_tensor);
@ -85,17 +86,21 @@ class TensorRTSubGraph {
int HandleCacheTensor(TensorRTOp *cur_op, const TensorInfo &in_tensor);
nvinfer1::Dims ParseInputDimsProfile(const TensorInfo &in_tensor);
nvinfer1::Dims SetInputDimsProfile(const TensorInfo &in_tensor, size_t index);
nvinfer1::Dims ParseInputDimsProfile(const TensorInfo &in_tensor, int index);
nvinfer1::Dims SetInputDimsProfile(const TensorInfo &in_tensor, int index);
int ParseInputsProfile();
bool ValidInputResizeDims(const nvinfer1::Dims &construct_dims, const std::vector<int64_t> &resize_input_shape);
int PreExecute(const std::vector<tensor::Tensor> &inputs, const std::vector<tensor::Tensor> &outputs);
int PostExecute(std::vector<tensor::Tensor> *outputs);
int OnNewInputShapes(const std::vector<ShapeVector> &inputs);
size_t MaxVolumnProfileIndex() const;
int SelectProfile(const std::vector<ShapeVector> &new_shapes) const;
int GetProfileBindingIndex(const std::string &name, size_t profile_index);
bool ValidInputResizeDims(const nvinfer1::Dims &construct_dims, const std::vector<int64_t> &resize_input_shape);
bool IsValidProfileDims() const;
std::string name_;
std::vector<TensorInfo> inputs_;
std::vector<TensorInfo> outputs_;
@ -122,7 +127,6 @@ class TensorRTSubGraph {
nvinfer1::IBuilderConfig *config_{nullptr};
nvinfer1::ICudaEngine *engine_{nullptr};
nvinfer1::IExecutionContext *trt_context_{nullptr};
nvinfer1::IOptimizationProfile *profile_{nullptr};
TensorRTContext *ctx_;
@ -138,9 +142,10 @@ class TensorRTSubGraph {
std::string serialize_file_path_;
cudaStream_t stream_{nullptr};
std::vector<nvinfer1::Dims> min_dims_;
std::vector<nvinfer1::Dims> opt_dims_;
std::vector<nvinfer1::Dims> max_dims_;
std::vector<nvinfer1::IOptimizationProfile *> profiles_{};
bool using_input_ranges_{false};
ProfileConfigs trt_profile_config_;
size_t profile_index_{0};
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_TENSORRT_SUBGRAPH_H_

View File

@ -75,7 +75,6 @@ nvinfer1::Dims ConvertCudaDims(const TensorInfo &ms_tensor) {
}
return dims;
}
bool SameDims(nvinfer1::Dims dims, const std::vector<int64_t> &shape) {
if (dims.nbDims != static_cast<int>(shape.size())) {
return false;
@ -240,9 +239,9 @@ std::experimental::optional<ActivationParams> TryConvertActivationType(Activatio
{ActivationType::RELU6, ActivationParams{nvinfer1::ActivationType::kCLIP, true, 0, true, 6}},
{ActivationType::RELU1, ActivationParams{nvinfer1::ActivationType::kCLIP, true, 0, true, 1}},
{ActivationType::HARD_TANH, ActivationParams{nvinfer1::ActivationType::kCLIP, true, -1, true, 1}},
{ActivationType::SWISH, ActivationParams{nvinfer1::ActivationType::kSIGMOID, false, 0, false, 0}},
// using plugin
{ActivationType::GELU, ActivationParams{nvinfer1::ActivationType::kTHRESHOLDED_RELU, false, 0, false, 0}}};
{ActivationType::GELU, ActivationParams{nvinfer1::ActivationType::kTHRESHOLDED_RELU, false, 0, false, 0}},
{ActivationType::SWISH, ActivationParams{nvinfer1::ActivationType::kSIGMOID, false, 0, false, 0}}};
return action_map.find(activation_type) != action_map.end()
? std::experimental::optional<ActivationParams>(action_map[activation_type])
: std::experimental::nullopt;
@ -270,6 +269,10 @@ nvinfer1::ITensor *ConvertTensorWithExpandDims(TensorRTContext *ctx, const Tenso
MS_LOG(ERROR) << "network is null for ConvertTensorWithExpandDims";
return nullptr;
}
if (!ms_tensor.IsConst()) {
MS_LOG(ERROR) << "ConvertTensorWithExpandDims from a MSTensor with nullptr data";
return nullptr;
}
auto origin_shape = ms_tensor.Shape();
std::vector<int64_t> convert_shape(expect_shape);
AlignShapeRank(&origin_shape, convert_shape);
@ -295,10 +298,6 @@ nvinfer1::ITensor *ConvertTensorWithExpandDims(TensorRTContext *ctx, const Tenso
return nullptr;
}
nvinfer1::DataType data_type = ConvertDataType(ms_tensor.DataType());
if (!ms_tensor.IsConst()) {
MS_LOG(ERROR) << "ConvertTensorWithExpandDims from a MSTensor with nullptr data";
return nullptr;
}
nvinfer1::Weights weights{data_type, ms_tensor.Data(), ms_tensor.ElementNum()};
nvinfer1::IConstantLayer *constant_tensor = ctx->network()->addConstant(dims, weights);
if (constant_tensor == nullptr) {
@ -709,6 +708,87 @@ int ParseData2Vector(const TensorInfo &ms_tensor, std::vector<float> *dst) {
return RET_OK;
}
nvinfer1::ITensor *ExpandDim(TensorRTContext *ctx, nvinfer1::ITensor *input_tensor, int axis) {
// input has to prepocess to nchw
auto input_dims = input_tensor->getDimensions();
nvinfer1::IShuffleLayer *shuffle_layer = ctx->network()->addShuffle(*input_tensor);
// if expand dim not at last dim and shape is dynamic, change to expanddim at last dim and transpose
bool special_expand = false;
for (int i = 0; i < input_dims.nbDims; i++) {
special_expand = special_expand || input_dims.d[i] == -1;
}
special_expand = special_expand && (axis != -1 && axis != input_dims.nbDims);
if (special_expand) {
std::vector<int64_t> new_shape;
for (int i = 0; i < input_dims.nbDims; i++) {
new_shape.push_back(input_dims.d[i] == -1 ? 0 : input_dims.d[i]);
}
new_shape.push_back(1);
nvinfer1::Dims new_dims = ConvertCudaDims(new_shape);
if (new_dims.nbDims == -1) {
return nullptr;
}
shuffle_layer->setReshapeDimensions(new_dims);
// transpose
nvinfer1::Permutation perm{};
for (int i = 0; i < new_dims.nbDims; i++) {
if (i < axis) {
perm.order[i] = i;
} else if (i == axis) {
perm.order[i] = new_dims.nbDims - 1;
} else {
perm.order[i] = i - 1;
}
}
nvinfer1::IShuffleLayer *trans_layer = ctx->network()->addShuffle(*shuffle_layer->getOutput(0));
if (trans_layer == nullptr) {
MS_LOG(ERROR) << "add transpose layer failed for special expand dims op ";
return nullptr;
}
trans_layer->setFirstTranspose(perm);
return trans_layer->getOutput(0);
} else {
std::vector<int64_t> new_shape;
for (int i = 0; i < input_dims.nbDims; i++) {
if (axis == i) {
new_shape.push_back(1);
}
new_shape.push_back(input_dims.d[i] == -1 ? 0 : input_dims.d[i]);
}
if (axis == -1 || axis == input_dims.nbDims) {
new_shape.push_back(1);
}
nvinfer1::Dims new_dims = ConvertCudaDims(new_shape);
if (new_dims.nbDims == -1) {
return nullptr;
}
shuffle_layer->setReshapeDimensions(new_dims);
return shuffle_layer->getOutput(0);
}
}
nvinfer1::ITensor *Broadcast(TensorRTContext *ctx, nvinfer1::ITensor *input, nvinfer1::ITensor *shape) {
int rank = shape->getDimensions().d[0];
nvinfer1::Dims starts{rank};
std::fill(starts.d, starts.d + rank, 0);
nvinfer1::Dims strides{rank};
std::fill(strides.d, strides.d + rank, 1);
auto slice_layer = ctx->network()->addSlice(*input, starts, {}, strides);
slice_layer->setMode(nvinfer1::SliceMode::kWRAP);
const int INPUT2 = 2;
slice_layer->setInput(INPUT2, *shape);
auto shuffler_output = slice_layer->getOutput(0);
if (shuffler_output == nullptr) {
MS_LOG(ERROR) << "add slice layer failed";
}
return shuffler_output;
}
nvinfer1::ITensor *Reshape(TensorRTContext *ctx, nvinfer1::ITensor *input, const std::vector<int64_t> &shape) {
return Reshape(ctx, input, ConvertCudaDims(shape));
}
@ -723,10 +803,23 @@ nvinfer1::ITensor *Reshape(TensorRTContext *ctx, nvinfer1::ITensor *input, const
return reshape_layer->getOutput(0);
}
void DebugDims(const nvinfer1::Dims &dims) {
MS_LOG(DEBUG) << "nbdim : " << dims.nbDims;
void DebugDims(const std::string &key, const nvinfer1::Dims &dims) {
MS_LOG(DEBUG) << key << ":" << dims.nbDims;
for (int i = 0; i != dims.nbDims; ++i) {
MS_LOG(DEBUG) << dims.d[i];
}
}
template <>
nvinfer1::DataType GetNvinferDataType<float>() {
return nvinfer1::DataType::kFLOAT;
}
template <>
nvinfer1::DataType GetNvinferDataType<int>() {
return nvinfer1::DataType::kINT32;
}
template nvinfer1::DataType GetNvinferDataType<float>();
template nvinfer1::DataType GetNvinferDataType<int>();
} // namespace mindspore::lite

View File

@ -138,7 +138,14 @@ nvinfer1::ITensor *ConvertConstantTensor1D(TensorRTContext *ctx, int *weights_ve
int ParseData2Vector(const TensorInfo &ms_tensor, std::vector<float> *dst);
void DebugDims(const nvinfer1::Dims &dims);
void DebugDims(const std::string &key, const nvinfer1::Dims &dims);
nvinfer1::ITensor *ExpandDim(TensorRTContext *ctx, nvinfer1::ITensor *input_tensor, int axis);
nvinfer1::ITensor *Broadcast(TensorRTContext *ctx, nvinfer1::ITensor *input, nvinfer1::ITensor *shape);
template <typename T>
nvinfer1::DataType GetNvinferDataType();
template <typename T1, typename T2>
bool SameDims(const std::vector<T1> &shape1, const std::vector<T2> &shape2) {

View File

@ -76,11 +76,13 @@ std::vector<std::string> DefaultInferSession::GetOutputNames() { return std::vec
std::vector<std::string> DefaultInferSession::GetInputNames() { return std::vector<std::string>(); }
MutableTensorImplPtr DefaultInferSession::GetOutputByTensorName(const std::string &tensorName) { return nullptr; }
MutableTensorImplPtr DefaultInferSession::GetInputByTensorName(const std::string &name) { return nullptr; }
std::shared_ptr<InferSession> InferSession::CreateSession(const std::shared_ptr<Context> &context) {
std::shared_ptr<InferSession> InferSession::CreateSession(const std::shared_ptr<Context> &context,
const ConfigInfos &config_info) {
HandleContext(context);
auto session_type = SelectSession(context);
MS_LOG(DEBUG) << "Session type " << static_cast<int64_t>(session_type);
return SessionRegistry::GetInstance().GetSession(session_type, context);
return SessionRegistry::GetInstance().GetSession(session_type, context, config_info);
}
void InferSession::HandleContext(const std::shared_ptr<Context> &context) {
@ -148,7 +150,8 @@ SessionType InferSession::SelectSession(const std::shared_ptr<Context> &context)
return kDefaultSession;
}
static std::shared_ptr<InferSession> DefaultSessionCreator(const std::shared_ptr<Context> &ctx) {
static std::shared_ptr<InferSession> DefaultSessionCreator(const std::shared_ptr<Context> &ctx,
const ConfigInfos &config_infos) {
auto session = std::make_shared<DefaultInferSession>(ctx);
session->Init(ctx);
return session;

View File

@ -29,12 +29,14 @@
#include "extendrt/session/type.h"
#include "common/mutable_tensor_impl.h"
#include "extendrt/utils/kernel_graph_utils.h"
#include "src/common/config_infos.h"
namespace mindspore {
class InferSession : public std::enable_shared_from_this<InferSession> {
public:
virtual ~InferSession() = default;
static std::shared_ptr<InferSession> CreateSession(const std::shared_ptr<Context> &context);
static std::shared_ptr<InferSession> CreateSession(const std::shared_ptr<Context> &context,
const ConfigInfos &config_info);
static SessionType SelectSession(const std::shared_ptr<Context> &context);
virtual Status Init(const std::shared_ptr<Context> &context) = 0;
virtual Status CompileGraph(FuncGraphPtr graph, const void *data = nullptr, size_t size = 0) = 0;

View File

@ -181,7 +181,8 @@ MutableTensorImplPtr GraphSinkSession::GetInputByTensorName(const std::string &n
}
return nullptr;
}
static std::shared_ptr<InferSession> DelegateSessionCreator(const std::shared_ptr<Context> &ctx) {
static std::shared_ptr<InferSession> DelegateSessionCreator(const std::shared_ptr<Context> &ctx,
const ConfigInfos &config_infos) {
auto &device_contexts = ctx->MutableDeviceInfo();
if (device_contexts.empty()) {
return nullptr;
@ -189,7 +190,10 @@ static std::shared_ptr<InferSession> DelegateSessionCreator(const std::shared_pt
auto device_type = device_contexts.at(0)->GetDeviceType();
auto provider = device_contexts.at(0)->GetProvider();
auto delegate = DelegateRegistry::GetInstance().GetDelegate(device_type, provider, ctx);
auto delegate = DelegateRegistry::GetInstance().GetDelegate(device_type, provider, ctx, config_infos);
if (delegate == nullptr) {
return nullptr;
}
auto session = std::make_shared<GraphSinkSession>(delegate);
session->Init(ctx);
return session;

View File

@ -26,18 +26,17 @@ SessionRegistry &SessionRegistry::GetInstance() {
return instance;
}
void SessionRegistry::RegSession(
const mindspore::SessionType &session_type,
const std::function<std::shared_ptr<InferSession>(const std::shared_ptr<Context> &)> &creator) {
void SessionRegistry::RegSession(const mindspore::SessionType &session_type, const InferSessionRegFunc &creator) {
session_map_[session_type] = creator;
}
std::shared_ptr<InferSession> SessionRegistry::GetSession(const mindspore::SessionType &session_type,
const std::shared_ptr<Context> &ctx) {
const std::shared_ptr<Context> &ctx,
const ConfigInfos &config_info) {
auto it = session_map_.find(session_type);
if (it == session_map_.end()) {
return nullptr;
}
return it->second(ctx);
return it->second(ctx, config_info);
}
} // namespace mindspore

View File

@ -23,6 +23,9 @@
#include "extendrt/infer_session.h"
#include "include/api/context.h"
namespace mindspore {
using InferSessionRegFunc =
std::function<std::shared_ptr<InferSession>(const std::shared_ptr<Context> &, const ConfigInfos &)>;
class SessionRegistry {
public:
SessionRegistry() = default;
@ -30,21 +33,18 @@ class SessionRegistry {
static SessionRegistry &GetInstance();
void RegSession(const mindspore::SessionType &session_type,
const std::function<std::shared_ptr<InferSession>(const std::shared_ptr<Context> &)> &creator);
void RegSession(const mindspore::SessionType &session_type, const InferSessionRegFunc &creator);
std::shared_ptr<InferSession> GetSession(const mindspore::SessionType &session_type,
const std::shared_ptr<Context> &);
std::shared_ptr<InferSession> GetSession(const mindspore::SessionType &session_type, const std::shared_ptr<Context> &,
const ConfigInfos &);
private:
mindspore::HashMap<SessionType, std::function<std::shared_ptr<InferSession>(const std::shared_ptr<Context> &)>>
session_map_;
mindspore::HashMap<SessionType, InferSessionRegFunc> session_map_;
};
class SessionRegistrar {
public:
SessionRegistrar(const mindspore::SessionType &session_type,
const std::function<std::shared_ptr<InferSession>(const std::shared_ptr<Context> &)> &creator) {
SessionRegistrar(const mindspore::SessionType &session_type, const InferSessionRegFunc &creator) {
SessionRegistry::GetInstance().RegSession(session_type, creator);
}
~SessionRegistrar() = default;

View File

@ -294,7 +294,8 @@ std::vector<tensor::TensorPtr> LiteInferSession::ConvertToTensors(
return tensors;
}
static std::shared_ptr<InferSession> LiteInferSessionCreator(const std::shared_ptr<Context> &ctx) {
static std::shared_ptr<InferSession> LiteInferSessionCreator(const std::shared_ptr<Context> &ctx,
const ConfigInfos &config_infos) {
auto session = std::make_shared<LiteInferSession>();
session->Init(ctx);
return session;

View File

@ -277,7 +277,8 @@ MutableTensorImplPtr SingleOpInferSession::GetInputByTensorName(const std::strin
return nullptr;
}
static std::shared_ptr<InferSession> SingleOpSessionCreator(const std::shared_ptr<Context> &ctx) {
static std::shared_ptr<InferSession> SingleOpSessionCreator(const std::shared_ptr<Context> &ctx,
const ConfigInfos &config_infos) {
auto session = std::make_shared<SingleOpInferSession>();
session->Init(ctx);
return session;

View File

@ -31,6 +31,44 @@
namespace mindspore {
const size_t max_depth = 128;
GraphId KernelGraphUtils::graph_sum_ = 0;
std::vector<AnfNodePtr> KernelGraphUtils::GetKernelGraphOutputs(const KernelGraphPtr &func_graph) {
if (!func_graph) {
return {};
}
std::vector<AnfNodePtr> outputs = func_graph->outputs();
while (true) {
auto has_replace = false;
for (auto it = outputs.begin(); it != outputs.end(); ++it) {
auto output = *it;
std::vector<AnfNodePtr> one_outputs;
if (IsPrimitiveCNode(output, prim::kPrimDepend)) {
auto depend = output->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(depend);
output = depend->input(kRealInputIndexInDepend);
}
if (IsPrimitiveCNode(output, prim::kPrimMakeTuple)) {
auto make_tuple = output->cast<CNodePtr>();
MS_EXCEPTION_IF_NULL(make_tuple);
auto &inputs = make_tuple->inputs();
one_outputs = std::vector<AnfNodePtr>(inputs.begin() + 1, inputs.end());
} else {
one_outputs = {output};
}
if (one_outputs.size() != 1 || one_outputs[0] != output) {
it = outputs.erase(it);
outputs.insert(it, one_outputs.begin(), one_outputs.end());
has_replace = true;
break;
}
}
if (!has_replace) {
break;
}
}
return outputs;
}
KernelGraphPtr KernelGraphUtils::ConstructKernelGraph(const FuncGraphPtr &func_graph,
std::vector<KernelGraphPtr> *all_out_graph,
mindspore::device::DeviceType device_target) {
@ -965,7 +1003,7 @@ void KernelGraphUtils::GetModelOutputsInfo(uint32_t graph_id, std::vector<tensor
VectorRef vector_outputs;
std::map<tensor::TensorPtr, session::KernelWithIndex> tensor_to_node;
session::KernelMapTensor node_to_tensor;
auto anf_outputs = kernel_graph->outputs();
auto anf_outputs = KernelGraphUtils::GetKernelGraphOutputs(kernel_graph);
for (auto &item : anf_outputs) {
MS_EXCEPTION_IF_NULL(item);
MS_LOG(INFO) << "Create node output[" << item->DebugString() << "]";

View File

@ -24,11 +24,12 @@
#include <string>
#include "backend/common/session/kernel_graph.h"
#include "include/api/visible.h"
namespace mindspore {
using GraphId = uint32_t;
using KernelGraph = mindspore::session::KernelGraph;
class KernelGraphUtils : public std::enable_shared_from_this<KernelGraphUtils> {
class MS_API KernelGraphUtils : public std::enable_shared_from_this<KernelGraphUtils> {
public:
KernelGraphUtils() = default;
virtual ~KernelGraphUtils() = default;
@ -37,6 +38,7 @@ class KernelGraphUtils : public std::enable_shared_from_this<KernelGraphUtils> {
static KernelGraphUtils instance;
return instance;
}
static std::vector<AnfNodePtr> GetKernelGraphOutputs(const KernelGraphPtr &func_graph);
KernelGraphPtr ConstructKernelGraph(const FuncGraphPtr &func_graph, std::vector<KernelGraphPtr> *all_out_graph,
mindspore::device::DeviceType device_target);

View File

@ -22,6 +22,7 @@
#include <memory>
#include "extendrt/utils/runtime_utils.h"
#include "extendrt/utils/kernel_graph_utils.h"
#include "src/extendrt/infer_device_address.h"
#include "include/common/utils/anfalgo.h"
@ -88,7 +89,7 @@ void RuntimeUtils::CopyInputTensorsToKernelGraph(const std::vector<tensor::Tenso
void RuntimeUtils::CopyOutputTensorsFromKernelGraph(std::vector<tensor::Tensor> *outputs, KernelGraphPtr kernel_graph) {
MS_EXCEPTION_IF_NULL(kernel_graph);
outputs->clear();
auto graph_outputs = kernel_graph->outputs();
auto graph_outputs = KernelGraphUtils::GetKernelGraphOutputs(kernel_graph);
for (auto graph_output : graph_outputs) {
auto real_output_with_index = common::AnfAlgo::VisitKernelWithReturnType(graph_output, 0);
auto real_output = real_output_with_index.first;

View File

@ -94,7 +94,8 @@ class TensorDefaultImpl : public MutableTensorImpl {
void SetDeviceData(void *data) override { device_data_ = data; }
void *GetDeviceData() override { return device_data_; }
bool IsConst() const override { return false; }
bool IsConst() const override { return is_const_; }
void SetIsConst(bool is_const) { is_const_ = is_const; }
bool IsDevice() const override { return device_data_ != nullptr; }
@ -138,6 +139,8 @@ class TensorDefaultImpl : public MutableTensorImpl {
mutable const void *data_ = nullptr;
bool own_data_ = false;
bool is_const_ = false;
void ResizeData() const {
if (data_ != nullptr && data_ != buffer_.Data()) {
return;