commit
1c8b861898
|
@ -60,8 +60,7 @@ void ParseLine(const std::string &line, std::map<std::string, std::string> *sect
|
|||
}
|
||||
} // namespace
|
||||
|
||||
int GetAllSectionInfoFromConfigFile(const std::string &file,
|
||||
std::map<std::string, std::map<std::string, std::string>> *config) {
|
||||
int GetAllSectionInfoFromConfigFile(const std::string &file, ConfigInfos *config) {
|
||||
if (file.empty() || config == nullptr) {
|
||||
MS_LOG(ERROR) << "input Invalid!check file and config.";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -30,13 +30,14 @@
|
|||
#include <utility>
|
||||
#include "src/common/utils.h"
|
||||
#include "src/common/log_adapter.h"
|
||||
#include "src/common/config_infos.h"
|
||||
#include "ir/dtype/type_id.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
constexpr int MAX_CONFIG_FILE_LENGTH = 1024;
|
||||
int GetAllSectionInfoFromConfigFile(const std::string &file,
|
||||
std::map<std::string, std::map<std::string, std::string>> *config);
|
||||
|
||||
int GetAllSectionInfoFromConfigFile(const std::string &file, ConfigInfos *config);
|
||||
|
||||
void ParserExecutionPlan(const std::map<std::string, std::string> *config_infos,
|
||||
std::map<std::string, TypeId> *data_type_plan);
|
||||
|
|
|
@ -0,0 +1,383 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/common/config_infos.h"
|
||||
#include <vector>
|
||||
#include "src/common/log_adapter.h"
|
||||
#include "src/common/common.h"
|
||||
|
||||
namespace mindspore {
|
||||
std::vector<std::string> ProfileParser::Split(const std::string &str, const std::string &delim) {
|
||||
auto start = 0U;
|
||||
auto end = str.find(delim);
|
||||
std::vector<std::string> substrs;
|
||||
while (end != std::string::npos) {
|
||||
substrs.push_back(str.substr(start, end - start));
|
||||
start = end + delim.length();
|
||||
end = str.find(delim, start);
|
||||
}
|
||||
substrs.push_back(str.substr(start, end));
|
||||
return substrs;
|
||||
}
|
||||
|
||||
bool ProfileParser::StrToInt64(const std::string &str, int64_t *val) {
|
||||
if (str.empty()) {
|
||||
return false;
|
||||
}
|
||||
int64_t symbol = 1;
|
||||
size_t index = 0;
|
||||
if (str[0] == '-') {
|
||||
symbol = -1;
|
||||
index++;
|
||||
}
|
||||
int64_t val_ret = 0;
|
||||
constexpr int64_t decimal_times = 10;
|
||||
for (; index < str.size(); index++) {
|
||||
auto c = str[index];
|
||||
if (c < '0' || c > '9') {
|
||||
return false;
|
||||
}
|
||||
val_ret = val_ret * decimal_times + (c - '0');
|
||||
if (val_ret >= INT32_MAX) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
*val = symbol * val_ret;
|
||||
return true;
|
||||
}
|
||||
|
||||
constexpr int KV_NUM = 2;
|
||||
|
||||
bool ProfileParser::ParseShapeStr(const std::vector<std::string> &str_dims, ShapeVector *shape_ptr) {
|
||||
if (shape_ptr == nullptr) {
|
||||
return false;
|
||||
}
|
||||
if (str_dims.empty()) {
|
||||
MS_LOG_ERROR << "Invalid input shape dim, dims number cannot be 0";
|
||||
return false;
|
||||
}
|
||||
auto &shape = *shape_ptr;
|
||||
shape.resize(str_dims.size());
|
||||
for (size_t i = 0; i != str_dims.size(); ++i) {
|
||||
int64_t dim = 0;
|
||||
if (!StrToInt64(str_dims[i], &dim)) {
|
||||
MS_LOG_ERROR << "Invalid input shape dim, dim value range or format is invalid: " << str_dims[i];
|
||||
return false;
|
||||
}
|
||||
if (dim <= 0 && dim != -1) {
|
||||
MS_LOG_ERROR << "Invalid input shape dim, dim can only be -1 when dim < 0: " << str_dims[i];
|
||||
return false;
|
||||
}
|
||||
shape[i] = dim;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ProfileParser::ParseRangeStr(const std::string &range_str, int64_t *min_ptr, int64_t *max_ptr) {
|
||||
if (min_ptr == nullptr || max_ptr == nullptr) {
|
||||
return false;
|
||||
}
|
||||
auto min_and_max = Split(range_str, "~");
|
||||
auto &min = *min_ptr;
|
||||
auto &max = *max_ptr;
|
||||
constexpr size_t number_range_count = 1;
|
||||
constexpr size_t min_max_range_count = 2;
|
||||
if (min_and_max.size() == number_range_count) {
|
||||
if (!StrToInt64(min_and_max[0], &min)) {
|
||||
MS_LOG_ERROR << "Invalid dynamic dim value range, dim value range or format is invalid: " << range_str
|
||||
<< ". It should be 'min~max' or a number.";
|
||||
return false;
|
||||
}
|
||||
max = min;
|
||||
} else if (min_and_max.size() == min_max_range_count) {
|
||||
if (!StrToInt64(min_and_max[0], &min) || !StrToInt64(min_and_max[1], &max)) {
|
||||
MS_LOG_ERROR << "Invalid dynamic dim value range, dim value range or format is invalid: " << range_str
|
||||
<< ". It should be 'min~max' or a number.";
|
||||
return false;
|
||||
}
|
||||
} else {
|
||||
MS_LOG_ERROR << "Invalid dynamic dim value range, dim value range or format is invalid: " << range_str
|
||||
<< ". It should be 'min~max' or a number.";
|
||||
return false;
|
||||
}
|
||||
if (min > max || min <= 0) {
|
||||
MS_LOG(ERROR) << "Invalid dimension range string format of '" << lite::kDynamicDims << "': " << range_str;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ProfileParser::ParseOptDimStr(const std::string &opt_dim_str, int64_t *opt_ptr) {
|
||||
if (opt_ptr == nullptr) {
|
||||
return false;
|
||||
}
|
||||
auto &opt = *opt_ptr;
|
||||
opt = std::stoll(opt_dim_str);
|
||||
if (!StrToInt64(opt_dim_str, &opt)) {
|
||||
MS_LOG_ERROR << "Invalid opt dim value range, dim value range or format is invalid: " << opt_dim_str
|
||||
<< ". It should be a number.";
|
||||
return false;
|
||||
}
|
||||
if (opt <= 0) {
|
||||
MS_LOG(ERROR) << "Invalid opt dim value range '" << lite::kOptimizeDims << "': " << opt_dim_str;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ProfileParser::ParseInputShape(const std::string &input_shapes_str, ProfileConfigs *profile_configs_ptr) {
|
||||
auto &profile_configs = *profile_configs_ptr;
|
||||
auto input_slices = Split(input_shapes_str, ";");
|
||||
for (auto input_slice : input_slices) {
|
||||
auto name_and_shape = Split(input_slice, ":");
|
||||
if (name_and_shape.size() != KV_NUM) {
|
||||
MS_LOG(ERROR) << "each tensor has name and shape [" << input_slice << "]";
|
||||
return false;
|
||||
}
|
||||
std::string name = name_and_shape[0];
|
||||
std::string shape_str = name_and_shape[1];
|
||||
if (shape_str.front() != '[' || shape_str.back() != ']') {
|
||||
MS_LOG(ERROR) << "shape format check fail.";
|
||||
return false;
|
||||
}
|
||||
constexpr size_t trim_len = 2;
|
||||
shape_str = shape_str.substr(1, shape_str.size() - trim_len);
|
||||
auto str_dims = Split(shape_str, ",");
|
||||
ProfileInputInfo info;
|
||||
info.name = name;
|
||||
ShapeVector &shape = info.input_shape;
|
||||
if (!ParseShapeStr(str_dims, &shape)) {
|
||||
MS_LOG(ERROR) << "Invalid input shape dims: " << shape_str;
|
||||
return false;
|
||||
}
|
||||
info.is_dynamic_shape = std::any_of(shape.begin(), shape.end(), [](auto dim) { return dim < 0; });
|
||||
profile_configs.input_infos.push_back(info);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ProfileParser::ParseDynamicDims(const std::string &dynamic_dims_str, ProfileConfigs *profile_configs_ptr) {
|
||||
auto &profile_configs = *profile_configs_ptr;
|
||||
auto inputs_of_str = Split(dynamic_dims_str, ";");
|
||||
if (inputs_of_str.size() != profile_configs.input_infos.size()) {
|
||||
MS_LOG(ERROR) << "The input count " << inputs_of_str.size() << " in '" << lite::kDynamicDims
|
||||
<< "' != the input count " << profile_configs.input_infos.size() << " '"
|
||||
<< " in '" << lite::kInputShape;
|
||||
return false;
|
||||
}
|
||||
// for every input
|
||||
for (size_t input_index = 0; input_index != inputs_of_str.size(); ++input_index) {
|
||||
auto &info = profile_configs.input_infos[input_index];
|
||||
auto one_input_str = inputs_of_str[input_index];
|
||||
auto profiles_of_str = Split(one_input_str, "],[");
|
||||
if (profiles_of_str.empty()) {
|
||||
MS_LOG(ERROR) << "The profile count of " << input_index << "th input in '" << lite::kDynamicDims << "' is 0";
|
||||
return false;
|
||||
}
|
||||
if (profile_configs.profiles.empty()) {
|
||||
profile_configs.profiles.resize(profiles_of_str.size());
|
||||
for (auto &profile : profile_configs.profiles) {
|
||||
profile.inputs.resize(profile_configs.input_infos.size());
|
||||
}
|
||||
}
|
||||
if (profiles_of_str.size() != profile_configs.profiles.size()) {
|
||||
MS_LOG(ERROR) << "The profile count " << profiles_of_str.size() << " of " << input_index << "th input in '"
|
||||
<< lite::kDynamicDims << "' != profile count " << profile_configs.profiles.size() << " of "
|
||||
<< (input_index - 1) << " th input";
|
||||
return false;
|
||||
}
|
||||
// for every profile in one input, parse input range: min, max
|
||||
for (size_t profile_index = 0; profile_index != profiles_of_str.size(); ++profile_index) {
|
||||
ProfileItem &profile_item = profile_configs.profiles[profile_index];
|
||||
ProfileInputRange &input_range = profile_item.inputs[input_index];
|
||||
|
||||
auto one_profile_str = profiles_of_str[profile_index];
|
||||
while (one_profile_str.front() == '[' || one_profile_str.front() == ' ') {
|
||||
one_profile_str = one_profile_str.substr(1);
|
||||
}
|
||||
while (one_profile_str.back() == ']' || one_profile_str.back() == ' ') {
|
||||
one_profile_str = one_profile_str.substr(0, one_profile_str.size() - 1);
|
||||
}
|
||||
auto dim_ranges = Split(one_profile_str, ",");
|
||||
|
||||
auto &input_shape = info.input_shape;
|
||||
size_t dynamic_nbdims = std::count(input_shape.begin(), input_shape.end(), -1);
|
||||
if (dim_ranges.size() != dynamic_nbdims) {
|
||||
MS_LOG(ERROR) << "Number of dynamic dims in config '" << lite::kDynamicDims << "' " << dim_ranges.size()
|
||||
<< " != that in '" << lite::kInputShape << "' " << dynamic_nbdims << ".";
|
||||
return false;
|
||||
}
|
||||
size_t range_index = 0;
|
||||
input_range.min_dims = input_shape;
|
||||
input_range.max_dims = input_shape;
|
||||
for (size_t i = 0; i < input_shape.size(); ++i) {
|
||||
if (input_shape[i] != -1) {
|
||||
continue;
|
||||
}
|
||||
if (!ParseRangeStr(dim_ranges[range_index++], &input_range.min_dims[i], &input_range.max_dims[i])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
input_range.opt_dims = input_range.min_dims; // default
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ProfileParser::ParseOptDims(const std::string &opt_dims_str, ProfileConfigs *profile_configs_ptr) {
|
||||
auto &profile_configs = *profile_configs_ptr;
|
||||
auto inputs_of_str = Split(opt_dims_str, ";");
|
||||
if (inputs_of_str.size() != profile_configs.input_infos.size()) {
|
||||
MS_LOG(ERROR) << "The input count " << inputs_of_str.size() << " in '" << lite::kOptimizeDims
|
||||
<< "' != the input count " << profile_configs.input_infos.size() << " '"
|
||||
<< " in '" << lite::kInputShape;
|
||||
return false;
|
||||
}
|
||||
// for every input
|
||||
for (size_t input_index = 0; input_index != inputs_of_str.size(); ++input_index) {
|
||||
auto &info = profile_configs.input_infos[input_index];
|
||||
auto one_input_str = inputs_of_str[input_index];
|
||||
auto profiles_of_str = Split(one_input_str, "],[");
|
||||
if (profiles_of_str.size() != profile_configs.profiles.size()) {
|
||||
MS_LOG(ERROR) << "The profile count " << profiles_of_str.size() << " of " << input_index << "th input in '"
|
||||
<< lite::kOptimizeDims << "' != profile count " << profile_configs.profiles.size() << " in '"
|
||||
<< lite::kDynamicDims << "'";
|
||||
return false;
|
||||
}
|
||||
// for every profile in one input, parse input range: min, max
|
||||
for (size_t profile_index = 0; profile_index != profiles_of_str.size(); ++profile_index) {
|
||||
ProfileItem &profile_item = profile_configs.profiles[profile_index];
|
||||
ProfileInputRange &input_range = profile_item.inputs[input_index];
|
||||
|
||||
auto one_profile_str = profiles_of_str[profile_index];
|
||||
while (one_profile_str.front() == '[' || one_profile_str.front() == ' ') {
|
||||
one_profile_str = one_profile_str.substr(1);
|
||||
}
|
||||
while (one_profile_str.back() == ']' || one_profile_str.back() == ' ') {
|
||||
one_profile_str = one_profile_str.substr(0, one_profile_str.size() - 1);
|
||||
}
|
||||
auto opt_dims_vec = Split(one_profile_str, ",");
|
||||
|
||||
auto &input_shape = info.input_shape;
|
||||
size_t dynamic_nbdims = std::count(input_shape.begin(), input_shape.end(), -1);
|
||||
if (opt_dims_vec.size() != dynamic_nbdims) {
|
||||
MS_LOG(ERROR) << "Number of dynamic dims in config '" << lite::kOptimizeDims << "' " << opt_dims_vec.size()
|
||||
<< " != that in '" << lite::kInputShape << "' " << dynamic_nbdims << ".";
|
||||
return false;
|
||||
}
|
||||
size_t dynamic_index = 0;
|
||||
input_range.opt_dims = input_shape;
|
||||
for (size_t i = 0; i < input_shape.size(); ++i) {
|
||||
if (input_shape[i] != -1) {
|
||||
continue;
|
||||
}
|
||||
if (!ParseOptDimStr(opt_dims_vec[dynamic_index++], &input_range.opt_dims[i])) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ProfileParser::Parse(const std::map<std::string, std::string> &context, bool require_opt_when_dym,
|
||||
ProfileConfigs *profile_configs_ptr) {
|
||||
if (profile_configs_ptr == nullptr) {
|
||||
return false;
|
||||
}
|
||||
auto &profile_configs = *profile_configs_ptr;
|
||||
auto get_item = [&context](const std::string &key) -> std::string {
|
||||
auto it = context.find(key);
|
||||
return it != context.end() ? it->second : "";
|
||||
};
|
||||
auto input_shapes = get_item(lite::kInputShape);
|
||||
auto dynamic_dims = get_item(lite::kDynamicDims);
|
||||
auto opt_dims = get_item(lite::kOptimizeDims);
|
||||
if (input_shapes.empty() && dynamic_dims.empty() && opt_dims.empty()) {
|
||||
MS_LOG(INFO) << "Do not found config of input range('" << lite::kInputShape << "')";
|
||||
return true;
|
||||
}
|
||||
if (input_shapes.empty()) {
|
||||
MS_LOG(ERROR) << "Config of '" << lite::kInputShape << " cannot be empty when '" << lite::kInputShape << "' or '"
|
||||
<< lite::kOptimizeDims << "' is not empty";
|
||||
return false;
|
||||
}
|
||||
if (!ParseInputShape(input_shapes, &profile_configs)) {
|
||||
MS_LOG(ERROR) << "parse input shape failed.";
|
||||
return false;
|
||||
}
|
||||
if (dynamic_dims.empty()) {
|
||||
ProfileItem profile_item;
|
||||
for (size_t i = 0; i < profile_configs.input_infos.size(); i++) {
|
||||
if (profile_configs.input_infos[i].is_dynamic_shape) {
|
||||
MS_LOG(ERROR) << "Config of '" << lite::kDynamicDims << "' cannot be empty when " << lite::kInputShape
|
||||
<< " is dynamic";
|
||||
return false;
|
||||
}
|
||||
auto &input_shape = profile_configs.input_infos[i].input_shape;
|
||||
ProfileInputRange input_range;
|
||||
input_range.min_dims = input_shape;
|
||||
input_range.max_dims = input_shape;
|
||||
input_range.opt_dims = input_shape;
|
||||
profile_item.inputs.push_back(input_range);
|
||||
}
|
||||
profile_configs.profiles.push_back(profile_item);
|
||||
return true;
|
||||
}
|
||||
if (!ParseDynamicDims(dynamic_dims, &profile_configs)) {
|
||||
MS_LOG(ERROR) << "parse dynamic dims failed.";
|
||||
return false;
|
||||
}
|
||||
if (require_opt_when_dym) {
|
||||
if (!ParseOptDims(opt_dims, &profile_configs)) {
|
||||
MS_LOG(ERROR) << "parse optimization dims failed.";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ProfileParser::ReorderByInputNames(const std::vector<std::string> &input_names, ProfileConfigs *profile_configs) {
|
||||
if (input_names.size() != profile_configs->input_infos.size()) {
|
||||
MS_LOG_ERROR << "Input name size " << input_names.size() << " != profile config input size "
|
||||
<< profile_configs->input_infos.size();
|
||||
return false;
|
||||
}
|
||||
ProfileConfigs new_profile_configs = *profile_configs;
|
||||
auto &input_infos = profile_configs->input_infos;
|
||||
auto &profiles = profile_configs->profiles;
|
||||
|
||||
for (size_t input_index = 0; input_index < input_names.size(); input_index++) {
|
||||
auto input_name = input_names[input_index];
|
||||
size_t i = 0;
|
||||
for (; i < input_infos.size(); i++) {
|
||||
if (input_infos[i].name == input_name) {
|
||||
new_profile_configs.input_infos[input_index] = input_infos[i];
|
||||
for (size_t profile_index = 0; profile_index < profiles.size(); profile_index++) {
|
||||
new_profile_configs.profiles[profile_index].inputs[input_index] = profiles[profile_index].inputs[i];
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (i >= input_infos.size()) {
|
||||
MS_LOG_ERROR << "Cannot find input " << input_name << " in profile '" << lite::kInputShape << "' config";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
*profile_configs = new_profile_configs;
|
||||
return true;
|
||||
}
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,70 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_COMMON_CONFIG_INFOS_H_
|
||||
#define MINDSPORE_LITE_SRC_COMMON_CONFIG_INFOS_H_
|
||||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include "include/api/visible.h"
|
||||
#include "mindapi/base/shape_vector.h"
|
||||
|
||||
namespace mindspore {
|
||||
using ConfigInfos = std::map<std::string, std::map<std::string, std::string>>;
|
||||
|
||||
struct ProfileInputInfo {
|
||||
std::string name;
|
||||
bool is_dynamic_shape = false;
|
||||
ShapeVector input_shape;
|
||||
};
|
||||
|
||||
struct ProfileInputRange {
|
||||
ShapeVector min_dims;
|
||||
ShapeVector opt_dims;
|
||||
ShapeVector max_dims;
|
||||
};
|
||||
|
||||
struct ProfileItem {
|
||||
std::vector<ProfileInputRange> inputs;
|
||||
};
|
||||
|
||||
struct ProfileConfigs {
|
||||
std::vector<ProfileInputInfo> input_infos;
|
||||
std::vector<ProfileItem> profiles;
|
||||
};
|
||||
|
||||
class MS_API ProfileParser {
|
||||
public:
|
||||
static bool Parse(const std::map<std::string, std::string> &context, bool require_opt_when_dym,
|
||||
ProfileConfigs *profile_configs);
|
||||
|
||||
static bool ReorderByInputNames(const std::vector<std::string> &input_names, ProfileConfigs *profile_configs);
|
||||
|
||||
private:
|
||||
static bool ParseOptDims(const std::string &opt_dims_str, ProfileConfigs *profile_configs);
|
||||
static bool ParseDynamicDims(const std::string &dynamic_dims_str, ProfileConfigs *profile_configs);
|
||||
static bool ParseInputShape(const std::string &input_shapes_str, ProfileConfigs *profile_configs);
|
||||
|
||||
static bool StrToInt64(const std::string &str, int64_t *val);
|
||||
static std::vector<std::string> Split(const std::string &str, const std::string &delim);
|
||||
static bool ParseShapeStr(const std::vector<std::string> &str_dims, ShapeVector *shape_ptr);
|
||||
static bool ParseRangeStr(const std::string &range_str, int64_t *min_ptr, int64_t *max_ptr);
|
||||
static bool ParseOptDimStr(const std::string &opt_dim_str, int64_t *opt_ptr);
|
||||
};
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_COMMON_CONFIG_INFOS_H_
|
|
@ -120,6 +120,7 @@ if(MSLITE_ENABLE_CLOUD_FUSION_INFERENCE)
|
|||
${CCSRC_DIR}/utils/scoped_long_running.cc
|
||||
${CCSRC_DIR}/utils/cse.cc
|
||||
${CCSRC_DIR}/utils/comm_manager.cc
|
||||
${CCSRC_DIR}/utils/signal_util.cc
|
||||
${CORE_DIR}/utils/status.cc
|
||||
)
|
||||
add_library(mindspore-kernel-graph OBJECT ${KERNEL_GRAPH_SRC})
|
||||
|
|
|
@ -216,10 +216,37 @@ Status Model::BindGLTexture2DMemory(const std::map<std::string, unsigned int> &i
|
|||
return kSuccess;
|
||||
}
|
||||
|
||||
Status Model::LoadConfig(const std::vector<char> &config_path) { return kSuccess; }
|
||||
Status Model::LoadConfig(const std::vector<char> &config_path) {
|
||||
std::unique_lock<std::mutex> impl_lock(g_impl_init_lock);
|
||||
if (impl_ != nullptr) {
|
||||
MS_LOG(ERROR) << "impl_ illegal in LoadConfig.";
|
||||
return Status(kLiteFileError, "Illegal operation.");
|
||||
}
|
||||
|
||||
impl_ = std::make_shared<ModelImpl>();
|
||||
if (impl_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Model implement is null.";
|
||||
return Status(kLiteFileError, "Fail to load config file.");
|
||||
}
|
||||
|
||||
auto ret = impl_->LoadConfig(CharToString(config_path));
|
||||
if (ret != kSuccess) {
|
||||
MS_LOG(ERROR) << "impl_ LoadConfig failed,";
|
||||
return Status(kLiteFileError, "Invalid config file.");
|
||||
}
|
||||
return kSuccess;
|
||||
}
|
||||
|
||||
Status Model::UpdateConfig(const std::vector<char> §ion,
|
||||
const std::pair<std::vector<char>, std::vector<char>> &config) {
|
||||
return kSuccess;
|
||||
std::unique_lock<std::mutex> impl_lock(g_impl_init_lock);
|
||||
if (impl_ == nullptr) {
|
||||
impl_ = std::make_shared<ModelImpl>();
|
||||
}
|
||||
if (impl_ != nullptr) {
|
||||
return impl_->UpdateConfig(CharToString(section), {CharToString(config.first), CharToString(config.second)});
|
||||
}
|
||||
MS_LOG(ERROR) << "Model implement is null!";
|
||||
return kLiteFileError;
|
||||
}
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -44,7 +44,7 @@ Status ModelImpl::BuildByBufferImpl(const void *model_data, size_t data_size, Mo
|
|||
// user does not set mindir_path, convert from model_path
|
||||
mindir_path = model_path.substr(0, model_path.rfind("/"));
|
||||
}
|
||||
session_ = InferSession::CreateSession(model_context);
|
||||
session_ = InferSession::CreateSession(model_context, config_info_);
|
||||
if (session_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Create session failed.";
|
||||
return kLiteNullptr;
|
||||
|
@ -254,7 +254,6 @@ Status ModelImpl::Predict() {
|
|||
auto outputs = GetOutputs();
|
||||
return Predict(inputs, &outputs);
|
||||
}
|
||||
|
||||
bool ModelImpl::HasPreprocess() { return graph_->graph_data_->GetPreprocess().empty() ? false : true; }
|
||||
|
||||
Status ModelImpl::Preprocess(const std::vector<std::vector<MSTensor>> &inputs, std::vector<MSTensor> *outputs) {
|
||||
|
@ -345,19 +344,13 @@ Status ModelImpl::PredictWithPreprocess(const std::vector<std::vector<MSTensor>>
|
|||
}
|
||||
|
||||
Status ModelImpl::LoadConfig(const std::string &config_path) {
|
||||
std::map<std::string, std::map<std::string, std::string>> all_config_info;
|
||||
ConfigInfos all_config_info;
|
||||
int ret = lite::GetAllSectionInfoFromConfigFile(config_path, &all_config_info);
|
||||
if (ret != kSuccess) {
|
||||
MS_LOG(ERROR) << "GetAllSectionInfoFromConfigFile fail!ret: " << ret;
|
||||
return kLiteFileError;
|
||||
}
|
||||
config_info_ = all_config_info;
|
||||
std::map<std::string, std::string> config_info = all_config_info[kExecutionPlan];
|
||||
if (config_info.empty()) {
|
||||
MS_LOG(WARNING) << "No valid execution plan info in config file.";
|
||||
return kSuccess;
|
||||
}
|
||||
|
||||
return kSuccess;
|
||||
}
|
||||
|
||||
|
|
|
@ -29,6 +29,8 @@
|
|||
#include "include/common/utils/utils.h"
|
||||
#include "ir/func_graph.h"
|
||||
#include "extendrt/infer_session.h"
|
||||
#include "src/common/config_infos.h"
|
||||
|
||||
#ifndef _WIN32
|
||||
#include <dlfcn.h>
|
||||
#endif
|
||||
|
@ -73,10 +75,11 @@ class ModelImpl {
|
|||
std::shared_ptr<Graph> graph_ = nullptr;
|
||||
std::shared_ptr<InferSession> session_ = nullptr;
|
||||
std::shared_ptr<Context> context_ = nullptr;
|
||||
ConfigInfos config_info_;
|
||||
std::map<std::string, TypeId> execution_plan_;
|
||||
#ifndef _WIN32
|
||||
void *handle_ = nullptr;
|
||||
#endif
|
||||
std::map<std::string, std::map<std::string, std::string>> config_info_;
|
||||
};
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_SRC_EXTENDRT_CXX_API_MODEL_MODEL_IMPL_H_
|
||||
|
|
|
@ -1096,7 +1096,7 @@ Status ModelPool::InitByPath(const std::string &model_path, const std::shared_pt
|
|||
size_t size = 0;
|
||||
graph_buf_ = lite::ReadFile(model_path.c_str(), &size);
|
||||
if (graph_buf_ == nullptr) {
|
||||
MS_LOG(ERROR) << "read ms model failed, model path: " << model_path;
|
||||
MS_LOG(ERROR) << "read model failed, model path: " << model_path;
|
||||
return kLiteNullptr;
|
||||
}
|
||||
auto status = Init(graph_buf_, size, runner_config);
|
||||
|
|
|
@ -24,11 +24,15 @@
|
|||
#include "runtime/hardware/device_context.h"
|
||||
#include "src/extendrt/delegate_graph_executor.h"
|
||||
#include "include/api/context.h"
|
||||
#include "src/common/config_infos.h"
|
||||
|
||||
namespace mindspore {
|
||||
using mindspore::device::GraphExecutor;
|
||||
// TODO(zhaizhiqiang): Wrap graph executor as delegate.
|
||||
// typedef std::shared_ptr<GraphSinkDelegate> (*DelegateCreator)(const std::shared_ptr<Context> &);
|
||||
typedef std::shared_ptr<GraphExecutor> (*DelegateCreator)(const std::shared_ptr<Context> &);
|
||||
using DelegateCreator =
|
||||
std::function<std::shared_ptr<GraphExecutor>(const std::shared_ptr<Context> &, const ConfigInfos &)>;
|
||||
|
||||
class MS_API DelegateRegistry {
|
||||
public:
|
||||
DelegateRegistry() = default;
|
||||
|
@ -48,7 +52,7 @@ class MS_API DelegateRegistry {
|
|||
}
|
||||
|
||||
std::shared_ptr<GraphExecutor> GetDelegate(const mindspore::DeviceType &device_type, const std::string &provider,
|
||||
const std::shared_ptr<Context> &ctx) {
|
||||
const std::shared_ptr<Context> &ctx, const ConfigInfos &config_infos) {
|
||||
// find common delegate
|
||||
auto it = creator_map_.find(device_type);
|
||||
if (it == creator_map_.end()) {
|
||||
|
@ -58,7 +62,7 @@ class MS_API DelegateRegistry {
|
|||
if (creator_it == it->second.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
return creator_it->second(ctx);
|
||||
return creator_it->second(ctx, config_infos);
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
|
@ -26,6 +26,7 @@ set(LITE_SRC
|
|||
${LITE_DIR}/src/common/log.cc
|
||||
${LITE_DIR}/src/common/prim_util.cc
|
||||
${LITE_DIR}/src/common/tensor_util.cc
|
||||
${LITE_DIR}/src/common/config_infos.cc
|
||||
${LITE_DIR}/src/litert/allocator.cc
|
||||
${LITE_DIR}/src/litert/inner_allocator.cc
|
||||
${LITE_DIR}/src/litert/runtime_allocator.cc
|
||||
|
|
|
@ -41,7 +41,9 @@ const int64_t kBufferSize = 1024;
|
|||
} // namespace
|
||||
const char litert_provider[] = "litert";
|
||||
|
||||
LiteRTGraphExecutor::LiteRTGraphExecutor(const std::shared_ptr<mindspore::Context> &context) : context_(context) {
|
||||
LiteRTGraphExecutor::LiteRTGraphExecutor(const std::shared_ptr<mindspore::Context> &context,
|
||||
const ConfigInfos &config_infos)
|
||||
: context_(context), config_infos_(config_infos) {
|
||||
lite_session_ = CreateLiteSession(ContextUtils::Convert(context_.get()));
|
||||
}
|
||||
|
||||
|
@ -355,8 +357,9 @@ bool LiteRTGraphExecutor::IsNeedExtractTensorData(mindspore::schema::MetaGraphT
|
|||
return false;
|
||||
}
|
||||
|
||||
static std::shared_ptr<device::GraphExecutor> LiteRTGraphExecutorCreator(const std::shared_ptr<Context> &ctx) {
|
||||
return std::make_shared<LiteRTGraphExecutor>(ctx);
|
||||
static std::shared_ptr<device::GraphExecutor> LiteRTGraphExecutorCreator(const std::shared_ptr<Context> &ctx,
|
||||
const ConfigInfos &config_infos) {
|
||||
return std::make_shared<LiteRTGraphExecutor>(ctx, config_infos);
|
||||
}
|
||||
|
||||
REG_DELEGATE(kCPU, litert_provider, LiteRTGraphExecutorCreator);
|
||||
|
|
|
@ -28,12 +28,13 @@
|
|||
#include "include/model.h"
|
||||
#include "src/litert/lite_session.h"
|
||||
#include "src/common/helper/infer_helpers.h"
|
||||
#include "src/common/config_infos.h"
|
||||
|
||||
namespace mindspore {
|
||||
class LiteRTGraphExecutor : public LiteGraphExecutor {
|
||||
public:
|
||||
LiteRTGraphExecutor() = default;
|
||||
explicit LiteRTGraphExecutor(const std::shared_ptr<mindspore::Context> &context);
|
||||
LiteRTGraphExecutor(const std::shared_ptr<mindspore::Context> &context, const ConfigInfos &config_infos);
|
||||
~LiteRTGraphExecutor() = default;
|
||||
bool CompileGraph(const FuncGraphPtr &graph, const std::map<string, string> &compile_options) override;
|
||||
bool RunGraph(const FuncGraphPtr &graph, const std::vector<tensor::Tensor> &inputs,
|
||||
|
@ -56,6 +57,7 @@ class LiteRTGraphExecutor : public LiteGraphExecutor {
|
|||
|
||||
private:
|
||||
const std::shared_ptr<mindspore::Context> context_;
|
||||
ConfigInfos config_infos_;
|
||||
lite::LiteGraph lite_graph_;
|
||||
std::shared_ptr<lite::LiteSession> lite_session_;
|
||||
std::shared_ptr<mindspore::infer::helper::InferHelpers> helpers_ = nullptr;
|
||||
|
|
|
@ -39,6 +39,20 @@ __global__ void LogicalOrKernel(const T *input_addr1, const T *input_addr2, T *o
|
|||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void GreaterOrEqualKernal(const T *input1, const T *input2, T *output, int element_cnt) {
|
||||
for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < element_cnt; pos += blockDim.x * gridDim.x) {
|
||||
output[pos] = (input1[pos] >= input2[pos]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void LessOrEqualKernal(const T *input1, const T *input2, T *output, int element_cnt) {
|
||||
for (int pos = blockIdx.x * blockDim.x + threadIdx.x; pos < element_cnt; pos += blockDim.x * gridDim.x) {
|
||||
output[pos] = (input1[pos] <= input2[pos]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void LogicalNot(const T *input1, T *output, int element_cnt, cudaStream_t stream) {
|
||||
LogicalNotKernel<<<GET_BLOCKS(element_cnt), GET_THREADS, 0, stream>>>(input1, output, element_cnt);
|
||||
|
@ -54,6 +68,26 @@ void LogicalOr(const T *input1, const T *input2, T *output, int element_cnt, cud
|
|||
LogicalOrKernel<<<GET_BLOCKS(element_cnt), GET_THREADS, 0, stream>>>(input1, input2, output, element_cnt);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void GreaterOrEqual(const T *input1, const T *input2, T *output, int element_cnt, cudaStream_t stream) {
|
||||
GreaterOrEqualKernal<<<GET_BLOCKS(element_cnt), GET_THREADS, 0, stream>>>(input1, input2, output, element_cnt);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void LessOrEqual(const T *input1, const T *input2, T *output, int element_cnt, cudaStream_t stream) {
|
||||
LessOrEqualKernal<<<GET_BLOCKS(element_cnt), GET_THREADS, 0, stream>>>(input1, input2, output, element_cnt);
|
||||
}
|
||||
|
||||
template void GreaterOrEqual(const float *input1, const float *input2, float *output, int element_cnt,
|
||||
cudaStream_t stream);
|
||||
|
||||
template void GreaterOrEqual(const int *input1, const int *input2, int *output, int element_cnt, cudaStream_t stream);
|
||||
|
||||
template void LessOrEqual(const float *input1, const float *input2, float *output, int element_cnt,
|
||||
cudaStream_t stream);
|
||||
|
||||
template void LessOrEqual(const int *input1, const int *input2, int *output, int element_cnt, cudaStream_t stream);
|
||||
|
||||
template void LogicalNot(const int32_t *input1, int32_t *output, int element_cnt, cudaStream_t stream);
|
||||
|
||||
template void LogicalAnd(const int32_t *input1, const int32_t *input2, int32_t *output, int element_cnt,
|
||||
|
|
|
@ -26,4 +26,10 @@ void LogicalOr(const T *input1, const T *input2, T *output, int element_cnt, cud
|
|||
template <typename T>
|
||||
void LogicalNot(const T *input1, T *output, int element_cnt, cudaStream_t stream);
|
||||
|
||||
template <typename T>
|
||||
void GreaterOrEqual(const T *input1, const T *input2, T *output, int element_cnt, cudaStream_t stream);
|
||||
|
||||
template <typename T>
|
||||
void LessOrEqual(const T *input1, const T *input2, T *output, int element_cnt, cudaStream_t stream);
|
||||
|
||||
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_CDUA_IMPL_LOGICAL_H_
|
||||
|
|
|
@ -90,8 +90,8 @@ int ActivationOptPlugin::RunCudaActivation(const nvinfer1::PluginTensorDesc *inp
|
|||
break;
|
||||
}
|
||||
case (ActivationType::SWISH): {
|
||||
CalSwish(GetDimsVolume(inputDesc[0].dims), static_cast<const float *>(inputs[0]),
|
||||
static_cast<float *>(outputs[0]), stream, device_id_);
|
||||
Swish(GetDimsVolume(inputDesc[0].dims), static_cast<const float *>(inputs[0]), static_cast<float *>(outputs[0]),
|
||||
stream, device_id_);
|
||||
break;
|
||||
}
|
||||
default: {
|
||||
|
|
|
@ -0,0 +1,100 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/extendrt/delegate/tensorrt/op/batchnorm_tensorrt.h"
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
#include "ops/fused_batch_norm.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
int BatchNormTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
|
||||
const std::vector<TensorInfo> &out_tensors) {
|
||||
if (in_tensors.size() != INPUT_SIZE5 && in_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int BatchNormTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
CHECK_NULL_RETURN(ctx->network());
|
||||
auto norm_op = AsOps<ops::FusedBatchNorm>();
|
||||
CHECK_NULL_RETURN(norm_op);
|
||||
epsilon_ = norm_op->get_epsilon();
|
||||
|
||||
ITensorHelper norm_input;
|
||||
int ret = PreprocessInputs2SameDim(ctx, input(ctx, 0), &norm_input);
|
||||
if (ret != RET_OK || norm_input.trt_tensor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "PreprocessInputs2SameDim norm_input failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto expect_shape = ConvertMSShape(norm_input.trt_tensor_->getDimensions());
|
||||
gamma_ = ConvertTensorWithExpandDims(ctx, in_tensors_[1], expect_shape, op_name_ + in_tensors_[1].Name());
|
||||
CHECK_NULL_RETURN(gamma_);
|
||||
beta_ =
|
||||
ConvertTensorWithExpandDims(ctx, in_tensors_[BETA_INDEX], expect_shape, op_name_ + in_tensors_[BETA_INDEX].Name());
|
||||
CHECK_NULL_RETURN(beta_);
|
||||
mean_ =
|
||||
ConvertTensorWithExpandDims(ctx, in_tensors_[MEAN_INDEX], expect_shape, op_name_ + in_tensors_[MEAN_INDEX].Name());
|
||||
CHECK_NULL_RETURN(mean_);
|
||||
var_ =
|
||||
ConvertTensorWithExpandDims(ctx, in_tensors_[VAR_INDEX], expect_shape, op_name_ + in_tensors_[VAR_INDEX].Name());
|
||||
CHECK_NULL_RETURN(var_);
|
||||
|
||||
return RunAsTrtOps(ctx, norm_input);
|
||||
}
|
||||
|
||||
int BatchNormTensorRT::RunAsTrtOps(TensorRTContext *ctx, ITensorHelper norm_input) {
|
||||
// var + min epsilon
|
||||
auto const_epsilon = ConvertScalarToITensor(ctx, norm_input.trt_tensor_->getDimensions().nbDims, &epsilon_,
|
||||
DataType::kNumberTypeFloat32, op_name_ + "_epsilion");
|
||||
CHECK_NULL_RETURN(const_epsilon);
|
||||
auto var_epsilon =
|
||||
ctx->network()->addElementWise(*var_, *const_epsilon, nvinfer1::ElementWiseOperation::kSUM)->getOutput(0);
|
||||
CHECK_NULL_RETURN(var_epsilon);
|
||||
|
||||
// standard deviation
|
||||
auto std_dev = ctx->network()->addUnary(*var_epsilon, nvinfer1::UnaryOperation::kSQRT)->getOutput(0);
|
||||
CHECK_NULL_RETURN(std_dev);
|
||||
|
||||
auto scale = ctx->network()->addElementWise(*gamma_, *std_dev, nvinfer1::ElementWiseOperation::kDIV)->getOutput(0);
|
||||
CHECK_NULL_RETURN(scale);
|
||||
|
||||
auto mean_mul_scale =
|
||||
ctx->network()->addElementWise(*mean_, *scale, nvinfer1::ElementWiseOperation::kPROD)->getOutput(0);
|
||||
CHECK_NULL_RETURN(mean_mul_scale);
|
||||
|
||||
auto bias =
|
||||
ctx->network()->addElementWise(*beta_, *mean_mul_scale, nvinfer1::ElementWiseOperation::kSUB)->getOutput(0);
|
||||
CHECK_NULL_RETURN(bias);
|
||||
|
||||
// scale with bias
|
||||
auto scale_layer =
|
||||
ctx->network()->addElementWise(*norm_input.trt_tensor_, *scale, nvinfer1::ElementWiseOperation::kPROD);
|
||||
this->layer_ = scale_layer;
|
||||
auto scale_out = scale_layer->getOutput(0);
|
||||
CHECK_NULL_RETURN(scale_out);
|
||||
auto beta_out = ctx->network()->addElementWise(*scale_out, *bias, nvinfer1::ElementWiseOperation::kSUM)->getOutput(0);
|
||||
CHECK_NULL_RETURN(beta_out);
|
||||
ctx->RegisterTensor(ITensorHelper{beta_out, Format::NCHW, true}, out_tensors_[0].Name());
|
||||
return RET_OK;
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(ops::kNameFusedBatchNorm, BatchNormTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,50 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_BATCH_NORM_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_BATCH_NORM_TENSORRT_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "src/extendrt/delegate/tensorrt/op/tensorrt_op.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
constexpr int BETA_INDEX = 2;
|
||||
constexpr int MEAN_INDEX = 3;
|
||||
constexpr int VAR_INDEX = 4;
|
||||
|
||||
class BatchNormTensorRT : public TensorRTOp {
|
||||
public:
|
||||
BatchNormTensorRT(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
|
||||
const std::vector<TensorInfo> &out_tensors, std::string name)
|
||||
: TensorRTOp(base_operator, in_tensors, out_tensors, name) {}
|
||||
|
||||
~BatchNormTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
|
||||
const std::vector<TensorInfo> &out_tensors) override;
|
||||
|
||||
private:
|
||||
int RunAsTrtOps(TensorRTContext *ctx, ITensorHelper helper);
|
||||
|
||||
float epsilon_{0.0f};
|
||||
nvinfer1::ITensor *gamma_{nullptr};
|
||||
nvinfer1::ITensor *beta_{nullptr};
|
||||
nvinfer1::ITensor *mean_{nullptr};
|
||||
nvinfer1::ITensor *var_{nullptr};
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_BATCH_NORM_TENSORRT_H_
|
|
@ -29,7 +29,7 @@
|
|||
namespace mindspore::lite {
|
||||
int BatchToSpaceTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
|
||||
const std::vector<TensorInfo> &out_tensors) {
|
||||
if (in_tensors.size() != 3) {
|
||||
if (in_tensors.size() != INPUT_SIZE3) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
@ -38,8 +38,6 @@ int BatchToSpaceTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const
|
|||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
dynamic_shape_params_.support_dynamic_ = false;
|
||||
dynamic_shape_params_.support_hw_dynamic_ = false;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -52,19 +50,17 @@ int BatchToSpaceTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
|||
MS_LOG(ERROR) << "block_h not equal block_w " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
const int *pad_ptr = reinterpret_cast<const int *>(in_tensors_[2].Data());
|
||||
const int *pad_ptr = reinterpret_cast<const int *>(in_tensors_[INPUT_SIZE2].Data());
|
||||
int ph0 = *(pad_ptr + 0);
|
||||
int ph1 = *(pad_ptr + 1);
|
||||
int pw0 = *(pad_ptr + 2);
|
||||
int pw1 = *(pad_ptr + 3);
|
||||
auto in_dims = input_tensor->getDimensions();
|
||||
int in = in_dims.d[0];
|
||||
int ic = in_dims.d[1];
|
||||
int ih = in_dims.d[2];
|
||||
int iw = in_dims.d[3];
|
||||
int pw0 = *(pad_ptr + INPUT_SIZE2);
|
||||
int pw1 = *(pad_ptr + INPUT_SIZE3);
|
||||
|
||||
auto plugin =
|
||||
std::make_shared<BatchToSpacePlugin>(input_tensor->getName(), in, ic, ih, iw, bh, ph0, ph1, pw0, pw1, device_id_);
|
||||
auto plugin = std::make_shared<BatchToSpacePlugin>(input_tensor->getName(), bh, ph0, ph1, pw0, pw1, device_id_);
|
||||
if (plugin == nullptr) {
|
||||
MS_LOG(ERROR) << "add batchtospace plugin failed for" << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
nvinfer1::ITensor *inputTensors[] = {input_tensor};
|
||||
nvinfer1::IPluginV2Layer *space2batch_opt_layer = ctx->network()->addPluginV2(inputTensors, 1, *plugin);
|
||||
if (space2batch_opt_layer == nullptr) {
|
||||
|
@ -93,15 +89,20 @@ int BatchToSpacePlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
|
|||
|
||||
int BatchToSpacePlugin::RunCudaBatchToSpace(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs,
|
||||
void *const *outputs, cudaStream_t stream) {
|
||||
int on = in_ / (bh_ * bh_);
|
||||
int oc = ic_;
|
||||
int oh = ih_ * bh_ - ph0_ - ph1_;
|
||||
int ow = iw_ * bh_ - pw0_ - pw1_;
|
||||
nvinfer1::Dims input_dims = inputDesc[0].dims;
|
||||
int in = input_dims.d[0];
|
||||
int ic = input_dims.d[1];
|
||||
int ih = input_dims.d[2];
|
||||
int iw = input_dims.d[3];
|
||||
int on = in / (bh_ * bh_);
|
||||
int oc = ic;
|
||||
int oh = ih * bh_ - ph0_ - ph1_;
|
||||
int ow = iw * bh_ - pw0_ - pw1_;
|
||||
|
||||
int size = on * oc * oh * ow;
|
||||
|
||||
CalBatchToSpace<float>(size, static_cast<const float *>(inputs[0]), in_, ih_, iw_, ic_, on, oh, ow, oc, ph0_, ph1_,
|
||||
pw0_, pw1_, bh_, static_cast<float *>(outputs[0]), device_id_, stream);
|
||||
CalBatchToSpace<float>(size, static_cast<const float *>(inputs[0]), in, ih, iw, ic, on, oh, ow, oc, ph0_, ph1_, pw0_,
|
||||
pw1_, bh_, static_cast<float *>(outputs[0]), device_id_, stream);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -115,7 +116,7 @@ nvinfer1::IPluginV2DynamicExt *BatchToSpacePlugin::clone() const noexcept {
|
|||
return plugin;
|
||||
}
|
||||
|
||||
size_t BatchToSpacePlugin::getSerializationSize() const noexcept { return sizeof(int) * 9; }
|
||||
size_t BatchToSpacePlugin::getSerializationSize() const noexcept { return sizeof(int) * 5; }
|
||||
|
||||
nvinfer1::DimsExprs BatchToSpacePlugin::getOutputDimensions(int index, const nvinfer1::DimsExprs *inputs,
|
||||
int nbInputDims,
|
||||
|
@ -127,19 +128,15 @@ nvinfer1::DimsExprs BatchToSpacePlugin::getOutputDimensions(int index, const nvi
|
|||
auto bh = exprBuilder.constant(bh_);
|
||||
dims.d[1] = inputs[0].d[1];
|
||||
auto ph_sum = exprBuilder.constant(ph0_ + ph1_);
|
||||
auto pod0 = exprBuilder.operation(nvinfer1::DimensionOperation::kPROD, *inputs[0].d[2], *bh);
|
||||
dims.d[2] = exprBuilder.operation(nvinfer1::DimensionOperation::kSUB, *pod0, *ph_sum);
|
||||
auto pod0 = exprBuilder.operation(nvinfer1::DimensionOperation::kPROD, *inputs[0].d[INPUT_SIZE2], *bh);
|
||||
dims.d[INPUT_SIZE2] = exprBuilder.operation(nvinfer1::DimensionOperation::kSUB, *pod0, *ph_sum);
|
||||
auto pw_sum = exprBuilder.constant(pw0_ + pw1_);
|
||||
auto pod1 = exprBuilder.operation(nvinfer1::DimensionOperation::kPROD, *inputs[0].d[3], *bh);
|
||||
dims.d[3] = exprBuilder.operation(nvinfer1::DimensionOperation::kSUB, *pod1, *pw_sum);
|
||||
auto pod1 = exprBuilder.operation(nvinfer1::DimensionOperation::kPROD, *inputs[0].d[INPUT_SIZE3], *bh);
|
||||
dims.d[INPUT_SIZE3] = exprBuilder.operation(nvinfer1::DimensionOperation::kSUB, *pod1, *pw_sum);
|
||||
return dims;
|
||||
}
|
||||
|
||||
void BatchToSpacePlugin::serialize(void *buffer) const noexcept {
|
||||
SerializeValue(&buffer, &in_, sizeof(int));
|
||||
SerializeValue(&buffer, &ic_, sizeof(int));
|
||||
SerializeValue(&buffer, &ih_, sizeof(int));
|
||||
SerializeValue(&buffer, &iw_, sizeof(int));
|
||||
SerializeValue(&buffer, &bh_, sizeof(int));
|
||||
SerializeValue(&buffer, &ph0_, sizeof(int));
|
||||
SerializeValue(&buffer, &ph1_, sizeof(int));
|
||||
|
|
|
@ -40,13 +40,8 @@ class BatchToSpaceTensorRT : public TensorRTOp {
|
|||
constexpr auto BATCHTOSPACETENSORRT_PLUGIN_NAME{"BatchToSpacePlugin"};
|
||||
class BatchToSpacePlugin : public TensorRTPlugin {
|
||||
public:
|
||||
BatchToSpacePlugin(const std::string name, int in, int ic, int ih, int iw, int bh, int ph0, int ph1, int pw0, int pw1,
|
||||
uint32_t device_id)
|
||||
BatchToSpacePlugin(const std::string name, int bh, int ph0, int ph1, int pw0, int pw1, uint32_t device_id)
|
||||
: TensorRTPlugin(name, std::string(BATCHTOSPACETENSORRT_PLUGIN_NAME), device_id),
|
||||
in_(in),
|
||||
ic_(ic),
|
||||
ih_(ih),
|
||||
iw_(iw),
|
||||
bh_(bh),
|
||||
ph0_(ph0),
|
||||
ph1_(ph1),
|
||||
|
@ -56,23 +51,15 @@ class BatchToSpacePlugin : public TensorRTPlugin {
|
|||
BatchToSpacePlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
|
||||
: TensorRTPlugin(std::string(name), std::string(BATCHTOSPACETENSORRT_PLUGIN_NAME)) {
|
||||
const nvinfer1::PluginField *fields = fc->fields;
|
||||
in_ = static_cast<const int *>(fields[0].data)[0];
|
||||
ic_ = static_cast<const int *>(fields[1].data)[0];
|
||||
ih_ = static_cast<const int *>(fields[2].data)[0];
|
||||
iw_ = static_cast<const int *>(fields[3].data)[0];
|
||||
bh_ = static_cast<const int *>(fields[4].data)[0];
|
||||
ph0_ = static_cast<const int *>(fields[5].data)[0];
|
||||
ph1_ = static_cast<const int *>(fields[6].data)[0];
|
||||
pw0_ = static_cast<const int *>(fields[7].data)[0];
|
||||
pw1_ = static_cast<const int *>(fields[8].data)[0];
|
||||
bh_ = static_cast<const int *>(fields[0].data)[0];
|
||||
ph0_ = static_cast<const int *>(fields[1].data)[0];
|
||||
ph1_ = static_cast<const int *>(fields[2].data)[0];
|
||||
pw0_ = static_cast<const int *>(fields[3].data)[0];
|
||||
pw1_ = static_cast<const int *>(fields[4].data)[0];
|
||||
}
|
||||
|
||||
BatchToSpacePlugin(const char *name, const void *serialData, size_t serialLength)
|
||||
: TensorRTPlugin(std::string(name), std::string(BATCHTOSPACETENSORRT_PLUGIN_NAME)) {
|
||||
DeserializeValue(&serialData, &serialLength, &in_, sizeof(int));
|
||||
DeserializeValue(&serialData, &serialLength, &ic_, sizeof(int));
|
||||
DeserializeValue(&serialData, &serialLength, &ih_, sizeof(int));
|
||||
DeserializeValue(&serialData, &serialLength, &iw_, sizeof(int));
|
||||
DeserializeValue(&serialData, &serialLength, &bh_, sizeof(int));
|
||||
DeserializeValue(&serialData, &serialLength, &ph0_, sizeof(int));
|
||||
DeserializeValue(&serialData, &serialLength, &ph1_, sizeof(int));
|
||||
|
@ -93,15 +80,11 @@ class BatchToSpacePlugin : public TensorRTPlugin {
|
|||
private:
|
||||
int RunCudaBatchToSpace(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs, void *const *outputs,
|
||||
cudaStream_t stream);
|
||||
size_t in_;
|
||||
size_t ic_;
|
||||
size_t ih_;
|
||||
size_t iw_;
|
||||
size_t bh_;
|
||||
size_t ph0_;
|
||||
size_t ph1_;
|
||||
size_t pw0_;
|
||||
size_t pw1_;
|
||||
int bh_;
|
||||
int ph0_;
|
||||
int ph1_;
|
||||
int pw0_;
|
||||
int pw1_;
|
||||
const std::string layer_name_;
|
||||
std::string name_space_;
|
||||
};
|
||||
|
|
|
@ -40,7 +40,7 @@ int ConcateTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::
|
|||
|
||||
return RET_OK;
|
||||
}
|
||||
int ConcateTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
int ConcateTensorRT::CheckParams(TensorRTContext *ctx) {
|
||||
if (ctx == nullptr || ctx->network() == nullptr) {
|
||||
MS_LOG(ERROR) << "context or network is invalid";
|
||||
return RET_ERROR;
|
||||
|
@ -59,6 +59,14 @@ int ConcateTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
|||
<< ", but origin ms tensor has " << in_tensors_.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConcateTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
if (CheckParams(ctx) != RET_OK) {
|
||||
MS_LOG(ERROR) << "Check input tensors failed: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *trt_input_tensors[in_tensors_.size()];
|
||||
int ret = PreProcessInputs(ctx, trt_input_tensors);
|
||||
|
@ -67,8 +75,15 @@ int ConcateTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
|||
return ret;
|
||||
}
|
||||
|
||||
if (type_ == ops::kNameStack) {
|
||||
for (size_t i = 0; i != in_tensors_.size(); ++i) {
|
||||
bool has_rank_0 = false;
|
||||
for (size_t i = 0; i < in_tensors_.size(); ++i) {
|
||||
if (!input(ctx, i).is_tensor) {
|
||||
has_rank_0 = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (type_ == ops::kNameStack && !has_rank_0) {
|
||||
for (size_t i = 0; i < in_tensors_.size(); ++i) {
|
||||
auto shuffle_layer = ctx->network()->addShuffle(*trt_input_tensors[i]);
|
||||
if (shuffle_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addShuffle failed for TensorRT.";
|
||||
|
@ -95,7 +110,7 @@ int ConcateTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
|||
}
|
||||
concate_layer->setName(op_name_.c_str());
|
||||
auto concat_output = concate_layer->getOutput(0);
|
||||
ctx->RegisterTensor(ITensorHelper{concat_output, out_format_, true}, out_tensors_[0].Name());
|
||||
ctx->RegisterTensor(ITensorHelper{concat_output, NCHW, true}, out_tensors_[0].Name());
|
||||
this->layer_ = concate_layer;
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -43,6 +43,7 @@ class ConcateTensorRT : public TensorRTOp {
|
|||
|
||||
private:
|
||||
int PreProcessInputs(TensorRTContext *ctx, nvinfer1::ITensor *trt_input_tensors[]);
|
||||
int CheckParams(TensorRTContext *ctx);
|
||||
|
||||
Format out_format_{Format::NCHW};
|
||||
bool same_format_{true};
|
||||
|
|
|
@ -0,0 +1,81 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/extendrt/delegate/tensorrt/op/constantofshape_tensorrt.h"
|
||||
#include <numeric>
|
||||
#include "src/extendrt/delegate/tensorrt/tensorrt_utils.h"
|
||||
#include "ops/constant_of_shape.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
int ConstantOfShapeTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
|
||||
const std::vector<TensorInfo> &out_tensors) {
|
||||
if (in_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size() << " : " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size() << " : " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto constofshape_op = AsOps<ops::ConstantOfShape>();
|
||||
if (constofshape_op == nullptr) {
|
||||
MS_LOG(ERROR) << "Failed to as operator ConstantOfShape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
DataType data_type = static_cast<DataType>(constofshape_op->get_data_type());
|
||||
if (data_type != DataType::kNumberTypeInt32 && data_type != DataType::kNumberTypeFloat32) {
|
||||
MS_LOG(ERROR) << "Unsupported data type for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ConstantOfShapeTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
auto constofshape_op = AsOps<ops::ConstantOfShape>();
|
||||
if (constofshape_op == nullptr) {
|
||||
MS_LOG(ERROR) << "Failed to as operator ConstantOfShape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto &&value_vector = constofshape_op->get_value();
|
||||
auto value_tensor = ctx->ConvertTo1DTensor(value_vector);
|
||||
CHECK_NULL_RETURN(value_tensor);
|
||||
|
||||
auto unsqueeze_layer = ctx->network()->addShuffle(*value_tensor);
|
||||
CHECK_NULL_RETURN(unsqueeze_layer);
|
||||
|
||||
auto shape = input(ctx, 0).trt_tensor_;
|
||||
int rank = shape->getDimensions().d[0];
|
||||
nvinfer1::Dims unsqueeze{rank};
|
||||
std::fill(unsqueeze.d, unsqueeze.d + rank, 1);
|
||||
unsqueeze_layer->setReshapeDimensions(unsqueeze);
|
||||
unsqueeze_layer->setZeroIsPlaceholder(false);
|
||||
value_tensor = unsqueeze_layer->getOutput(0);
|
||||
CHECK_NULL_RETURN(value_tensor);
|
||||
|
||||
auto out_tensor = Broadcast(ctx, value_tensor, shape);
|
||||
if (static_cast<DataType>(constofshape_op->get_data_type()) == DataType::kNumberTypeInt32) {
|
||||
out_tensor = TRTTensorCast(ctx, out_tensor, nvinfer1::DataType::kINT32, op_name_ + "_cast_out");
|
||||
}
|
||||
|
||||
auto output_helper = ITensorHelper{out_tensor, Format::NCHW, true};
|
||||
ctx->RegisterTensor(output_helper, out_tensors_[0].Name());
|
||||
MS_LOG(DEBUG) << "output " << GetTensorFormat(output_helper);
|
||||
this->layer_ = unsqueeze_layer;
|
||||
return RET_OK;
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(ops::kNameConstantOfShape, ConstantOfShapeTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,38 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_CONSTANT_OF_SHAPE_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_CONSTANT_OF_SHAPE_TENSORRT_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "src/extendrt/delegate/tensorrt/op/tensorrt_op.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
class ConstantOfShapeTensorRT : public TensorRTOp {
|
||||
public:
|
||||
ConstantOfShapeTensorRT(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
|
||||
const std::vector<TensorInfo> &out_tensors, std::string name)
|
||||
: TensorRTOp(base_operator, in_tensors, out_tensors, name) {}
|
||||
|
||||
~ConstantOfShapeTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
|
||||
const std::vector<TensorInfo> &out_tensors) override;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_CONSTANT_OF_SHAPE_TENSORRT_H_
|
|
@ -34,6 +34,7 @@
|
|||
#include "ops/equal.h"
|
||||
#include "ops/less.h"
|
||||
#include "ops/greater.h"
|
||||
#include "ops/floor_mod.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
namespace {
|
||||
|
@ -118,8 +119,12 @@ int ElementWiseTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
|||
MS_LOG(ERROR) << "PreprocessInputTensors failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
nvinfer1::IElementWiseLayer *cal_layer =
|
||||
ctx->network()->addElementWise(*x_input.trt_tensor_, *y_input.trt_tensor_, element_wise_op_);
|
||||
nvinfer1::IElementWiseLayer *cal_layer;
|
||||
if (type_ == ops::kNameFloorMod) {
|
||||
cal_layer = AddFoorMod(ctx, x_input.trt_tensor_, y_input.trt_tensor_);
|
||||
} else {
|
||||
cal_layer = ctx->network()->addElementWise(*x_input.trt_tensor_, *y_input.trt_tensor_, element_wise_op_);
|
||||
}
|
||||
|
||||
if (cal_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addElementWise failed for TensorRT.";
|
||||
|
@ -164,7 +169,8 @@ int ElementWiseTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
|||
MS_LOG(INFO) << "bool result cast to int32" << op_name_;
|
||||
}
|
||||
#endif
|
||||
auto output_helper = ITensorHelper{op_out_tensor, x_input.format_, x_input.same_format_};
|
||||
auto is_tensor = x_input.is_tensor || y_input.is_tensor;
|
||||
auto output_helper = ITensorHelper{op_out_tensor, x_input.format_, x_input.same_format_, is_tensor};
|
||||
ctx->RegisterTensor(output_helper, out_tensors_[0].Name());
|
||||
MS_LOG(DEBUG) << "output " << GetTensorFormat(output_helper);
|
||||
return RET_OK;
|
||||
|
@ -198,6 +204,25 @@ int ElementWiseTensorRT::PreprocessInputTensors(TensorRTContext *ctx, ITensorHel
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
nvinfer1::IElementWiseLayer *ElementWiseTensorRT::AddFoorMod(TensorRTContext *ctx, nvinfer1::ITensor *x0_trt,
|
||||
nvinfer1::ITensor *x1_trt) {
|
||||
nvinfer1::IElementWiseLayer *layer_0 =
|
||||
ctx->network()->addElementWise(*x0_trt, *x1_trt, nvinfer1::ElementWiseOperation::kFLOOR_DIV);
|
||||
layer_0->setName((op_name_ + "_floor_div").c_str());
|
||||
auto result_0 = layer_0->getOutput(0);
|
||||
|
||||
nvinfer1::IElementWiseLayer *layer_1 =
|
||||
ctx->network()->addElementWise(*result_0, *x1_trt, nvinfer1::ElementWiseOperation::kPROD);
|
||||
layer_1->setName((op_name_ + "_prod").c_str());
|
||||
auto result_1 = layer_1->getOutput(0);
|
||||
|
||||
nvinfer1::IElementWiseLayer *layer_2 =
|
||||
ctx->network()->addElementWise(*x0_trt, *result_1, nvinfer1::ElementWiseOperation::kSUB);
|
||||
layer_2->setName((op_name_ + "_sub").c_str());
|
||||
|
||||
return layer_2;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *ElementWiseTensorRT::AddActivation(TensorRTContext *ctx, nvinfer1::ITensor *in_tensor) {
|
||||
ActivationType activation = ActivationType::NO_ACTIVATION;
|
||||
if (type_ == ops::kNameAddFusion) {
|
||||
|
@ -255,11 +280,13 @@ nvinfer1::ITensor *ElementWiseTensorRT::AddActivation(TensorRTContext *ctx, nvin
|
|||
int ElementWiseTensorRT::AddConstTensor(TensorRTContext *ctx) {
|
||||
int const_tensor_index = in_tensors_[0].IsConst() ? 0 : 1;
|
||||
auto expect_shape = ConvertMSShape(input(ctx, 1 - const_tensor_index).trt_tensor_->getDimensions());
|
||||
nvinfer1::ITensor *constant_input =
|
||||
ConvertConstantTensorWithDims(ctx, in_tensors_[const_tensor_index], expect_shape, op_name_);
|
||||
auto &const_tensor = in_tensors_[const_tensor_index];
|
||||
nvinfer1::ITensor *constant_input = ConvertConstantTensorWithDims(ctx, const_tensor, expect_shape, op_name_);
|
||||
CHECK_NULL_RETURN(constant_input);
|
||||
auto const_helper = ITensorHelper{constant_input, input(ctx, 1 - const_tensor_index).format_, true};
|
||||
ctx->RegisterTensor(const_helper, in_tensors_[const_tensor_index].Name());
|
||||
auto const_shape = const_tensor.Shape();
|
||||
auto is_scalar = const_shape.empty() || (const_shape.size() == 1 && const_shape[0] == 1);
|
||||
auto const_helper = ITensorHelper{constant_input, input(ctx, 1 - const_tensor_index).format_, true, !is_scalar};
|
||||
ctx->RegisterTensor(const_helper, const_tensor.Name());
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -273,6 +300,7 @@ REGISTER_TENSORRT_CREATOR(ops::kNameEltwise, ElementWiseTensorRT)
|
|||
REGISTER_TENSORRT_CREATOR(ops::kNameMinimum, ElementWiseTensorRT)
|
||||
REGISTER_TENSORRT_CREATOR(ops::kNameMaximum, ElementWiseTensorRT)
|
||||
REGISTER_TENSORRT_CREATOR(ops::kNameBiasAdd, ElementWiseTensorRT)
|
||||
REGISTER_TENSORRT_CREATOR(ops::kNameFloorMod, ElementWiseTensorRT)
|
||||
#if TRT_VERSION_GE(7, 2)
|
||||
REGISTER_TENSORRT_CREATOR(ops::kNameEqual, ElementWiseTensorRT)
|
||||
REGISTER_TENSORRT_CREATOR(ops::kNameLess, ElementWiseTensorRT)
|
||||
|
|
|
@ -37,6 +37,8 @@ class ElementWiseTensorRT : public TensorRTOp {
|
|||
private:
|
||||
nvinfer1::ITensor *AddActivation(TensorRTContext *ctx, nvinfer1::ITensor *in_tensor);
|
||||
|
||||
nvinfer1::IElementWiseLayer *AddFoorMod(TensorRTContext *ctx, nvinfer1::ITensor *x0_trt, nvinfer1::ITensor *x1_trt);
|
||||
|
||||
int AddConstTensor(TensorRTContext *ctx);
|
||||
|
||||
bool SameTensor(nvinfer1::ITensor *trt_tensor, TensorInfo *ms_tensor);
|
||||
|
|
|
@ -0,0 +1,79 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/extendrt/delegate/tensorrt/op/fill_tensorrt.h"
|
||||
#include <numeric>
|
||||
#include "src/extendrt/delegate/tensorrt/tensorrt_utils.h"
|
||||
#include "ops/fill.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
int FillTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
|
||||
const std::vector<TensorInfo> &out_tensors) {
|
||||
#if TRT_VERSION_GE(8, 2)
|
||||
if (in_tensors.size() != INPUT_SIZE2) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size() << " : " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size() << " : " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
#else
|
||||
MS_LOG(WARNING) << "low TensorRT version don't support fill op, please upgrade TensorRT version to 8.2 or higher";
|
||||
return RET_ERROR;
|
||||
#endif
|
||||
}
|
||||
|
||||
int FillTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
#if TRT_VERSION_GE(8, 2)
|
||||
ITensorHelper fill_input;
|
||||
nvinfer1::FillOperation op = nvinfer1::FillOperation::kLINSPACE;
|
||||
auto *fill_layer = ctx->network()->addFill({}, op);
|
||||
if (fill_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addFill failed for TensorRT : " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
fill_layer->setInput(0, *input(ctx, 1).trt_tensor_);
|
||||
nvinfer1::ITensor *alpha_tensor = nullptr;
|
||||
if (!in_tensors_[0].IsConst()) {
|
||||
alpha_tensor = input(ctx, 0).trt_tensor_;
|
||||
} else {
|
||||
alpha_tensor =
|
||||
ConvertScalarToITensor(ctx, 0, in_tensors_[0].Data(), in_tensors_[0].DataType(), op_name_ + "_alpha");
|
||||
}
|
||||
fill_layer->setInput(1, *alpha_tensor);
|
||||
int nbdims = input(ctx, 1).trt_tensor_->getDimensions().d[0];
|
||||
nvinfer1::ITensor *beta_tensor = nullptr;
|
||||
if (in_tensors_[0].DataType() == DataType::kNumberTypeInt32) {
|
||||
beta_tensor = ctx->ConvertTo1DTensor(std::vector<int>(nbdims, 0));
|
||||
} else {
|
||||
beta_tensor = ctx->ConvertTo1DTensor(std::vector<float>(nbdims, 0.f));
|
||||
}
|
||||
fill_layer->setInput(INPUT_SIZE2, *beta_tensor);
|
||||
|
||||
nvinfer1::ITensor *out_tensor = fill_layer->getOutput(0);
|
||||
ctx->RegisterTensor(ITensorHelper{out_tensor, Format::NCHW, true}, out_tensors_[0].Name());
|
||||
this->layer_ = fill_layer;
|
||||
return RET_OK;
|
||||
#else
|
||||
MS_LOG(WARNING) << "low TensorRT version don't support fill op, please upgrade TensorRT version to 8.2 or higher";
|
||||
return RET_ERROR;
|
||||
#endif
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(ops::kNameFill, FillTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,41 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_FILL_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_FILL_TENSORRT_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include "src/extendrt/delegate/tensorrt/op/tensorrt_op.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
class FillTensorRT : public TensorRTOp {
|
||||
public:
|
||||
FillTensorRT(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
|
||||
const std::vector<TensorInfo> &out_tensors, std::string name)
|
||||
: TensorRTOp(base_operator, in_tensors, out_tensors, name) {}
|
||||
|
||||
~FillTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
|
||||
const std::vector<TensorInfo> &out_tensors) override;
|
||||
|
||||
private:
|
||||
std::vector<float> zeros_;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_FILL_TENSORRT_H_
|
|
@ -88,7 +88,7 @@ int FullyConnectedTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
|||
}
|
||||
|
||||
int FullyConnectedTensorRT::PreprocessInputs(TensorRTContext *ctx, ITensorHelper *fc_input) {
|
||||
auto ret = PreprocessInputs2SameDim(ctx, input(ctx, !in_tensors_[1].IsConst()), fc_input);
|
||||
auto ret = PreprocessInputs2SameDim(ctx, input(ctx, in_tensors_[1].IsConst() ? 0 : 1), fc_input);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "PreprocessInputs2SameDim failed for " << op_name_;
|
||||
return ret;
|
||||
|
|
|
@ -85,7 +85,7 @@ int GatherTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
|||
nvinfer1::ITensor *op_output = gather_layer->getOutput(0);
|
||||
auto old_shape = ConvertMSShape(op_output->getDimensions());
|
||||
// keep shape
|
||||
if (in_tensors_[1].Shape().empty() && old_shape.size() > 1) {
|
||||
if (indices_tensor.trt_tensor_->getDimensions().nbDims == 0 && old_shape.size() > 1) {
|
||||
auto squeeze = ctx->network()->addShuffle(*op_output);
|
||||
if (squeeze == nullptr) {
|
||||
MS_LOG(ERROR) << "add output squeeze failed for " << op_name_;
|
||||
|
|
|
@ -0,0 +1,111 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/extendrt/delegate/tensorrt/op/greaterorequal_tensorrt.h"
|
||||
#include <cuda_runtime.h>
|
||||
#include <numeric>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
#include <unordered_map>
|
||||
#include "NvInferRuntimeCommon.h"
|
||||
#include "src/extendrt/delegate/tensorrt/tensorrt_utils.h"
|
||||
#include "src/extendrt/delegate/tensorrt/cuda_impl/logical.cuh"
|
||||
#include "ops/greater_equal.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
int GreaterorequalTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
|
||||
const std::vector<TensorInfo> &out_tensors) {
|
||||
if (in_tensors.size() != INPUT_SIZE2) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int GreaterorequalTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
if (ctx == nullptr || ctx->network() == nullptr) {
|
||||
MS_LOG(ERROR) << "network or input tensor is invalid";
|
||||
return RET_ERROR;
|
||||
}
|
||||
nvinfer1::ITensor *inputTensors[] = {input(ctx, 0).trt_tensor_, input(ctx, 1).trt_tensor_};
|
||||
auto plugin = std::make_shared<GreaterorequalPlugin>(op_name_, schema::PrimitiveType_GreaterEqual);
|
||||
if (plugin == nullptr) {
|
||||
MS_LOG(ERROR) << "create GreaterorequalPlugin failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
nvinfer1::IPluginV2Layer *greaterorequal_layer = ctx->network()->addPluginV2(inputTensors, 2, *plugin);
|
||||
this->layer_ = greaterorequal_layer;
|
||||
|
||||
nvinfer1::ITensor *op_out_tensor = greaterorequal_layer->getOutput(0);
|
||||
if (op_out_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "greaterorequal out tensor is nullptr.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
ctx->RegisterTensor(ITensorHelper{op_out_tensor, input(ctx, 0).format_, input(ctx, 0).same_format_},
|
||||
out_tensors_[0].Name());
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
REGISTER_TENSORRT_PLUGIN(GreaterorequalPluginCreater);
|
||||
template class TensorRTPluginCreater<GreaterorequalPlugin>;
|
||||
template <class T>
|
||||
nvinfer1::PluginFieldCollection TensorRTPluginCreater<T>::field_collection_{};
|
||||
template <class T>
|
||||
std::vector<nvinfer1::PluginField> TensorRTPluginCreater<T>::fields_;
|
||||
|
||||
int GreaterorequalPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
|
||||
const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs,
|
||||
void *const *outputs, void *workspace, cudaStream_t stream) noexcept {
|
||||
return RunCudaGreaterorequal(inputDesc, inputs, outputs, stream);
|
||||
}
|
||||
|
||||
int GreaterorequalPlugin::RunCudaGreaterorequal(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs,
|
||||
void *const *outputs, cudaStream_t stream) {
|
||||
if (inputDesc->type == nvinfer1::DataType::kINT32) {
|
||||
GreaterOrEqual(static_cast<const int *>(inputs[0]), static_cast<const int *>(inputs[1]),
|
||||
static_cast<int *>(outputs[0]), GetDimsVolume(inputDesc[0].dims), stream);
|
||||
} else if (inputDesc->type == nvinfer1::DataType::kFLOAT) {
|
||||
GreaterOrEqual(static_cast<const float *>(inputs[0]), static_cast<const float *>(inputs[1]),
|
||||
static_cast<float *>(outputs[0]), GetDimsVolume(inputDesc[0].dims), stream);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "unsupported equal data type";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt *GreaterorequalPlugin::clone() const noexcept {
|
||||
auto *plugin = new (std::nothrow) GreaterorequalPlugin(*this);
|
||||
if (plugin == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc greaterorequal plugin failed";
|
||||
return nullptr;
|
||||
}
|
||||
plugin->setPluginNamespace(name_space_.c_str());
|
||||
return plugin;
|
||||
}
|
||||
|
||||
size_t GreaterorequalPlugin::getSerializationSize() const noexcept { return sizeof(schema::PrimitiveType); }
|
||||
|
||||
void GreaterorequalPlugin::serialize(void *buffer) const noexcept {
|
||||
SerializeValue(&buffer, &primitive_type_, sizeof(schema::PrimitiveType));
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(ops::kNameGreaterEqual, GreaterorequalTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,77 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_GREATEROREQUAL_PLUGIN_H_
|
||||
#define MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_GREATEROREQUAL_PLUGIN_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "src/extendrt/delegate/tensorrt/op/tensorrt_op.h"
|
||||
#include "src/extendrt/delegate/tensorrt/op/tensorrt_plugin.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
class GreaterorequalTensorRT : public TensorRTOp {
|
||||
public:
|
||||
GreaterorequalTensorRT(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
|
||||
const std::vector<TensorInfo> &out_tensors, std::string name)
|
||||
: TensorRTOp(base_operator, in_tensors, out_tensors, name) {}
|
||||
|
||||
~GreaterorequalTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
|
||||
const std::vector<TensorInfo> &out_tensors) override;
|
||||
};
|
||||
|
||||
constexpr auto GREATEROREQUAL_PLUGIN_NAME{"GreaterorequalPlugin"};
|
||||
class GreaterorequalPlugin : public TensorRTPlugin {
|
||||
public:
|
||||
GreaterorequalPlugin(const std::string name, schema::PrimitiveType primitive_type)
|
||||
: TensorRTPlugin(name, std::string(GREATEROREQUAL_PLUGIN_NAME)), primitive_type_(primitive_type) {}
|
||||
|
||||
GreaterorequalPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
|
||||
: TensorRTPlugin(std::string(name), std::string(GREATEROREQUAL_PLUGIN_NAME)) {
|
||||
const nvinfer1::PluginField *fields = fc->fields;
|
||||
primitive_type_ = static_cast<const schema::PrimitiveType *>(fields[0].data)[0];
|
||||
}
|
||||
|
||||
GreaterorequalPlugin(const char *name, const void *serialData, size_t serialLength)
|
||||
: TensorRTPlugin(std::string(name), std::string(GREATEROREQUAL_PLUGIN_NAME)) {
|
||||
DeserializeValue(&serialData, &serialLength, &primitive_type_, sizeof(schema::PrimitiveType));
|
||||
}
|
||||
|
||||
GreaterorequalPlugin() = delete;
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt *clone() const noexcept override;
|
||||
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
|
||||
const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
|
||||
size_t getSerializationSize() const noexcept override;
|
||||
void serialize(void *buffer) const noexcept override;
|
||||
|
||||
private:
|
||||
int RunCudaGreaterorequal(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs,
|
||||
void *const *outputs, cudaStream_t stream);
|
||||
const std::string layer_name_;
|
||||
std::string name_space_;
|
||||
schema::PrimitiveType primitive_type_;
|
||||
};
|
||||
class GreaterorequalPluginCreater : public TensorRTPluginCreater<GreaterorequalPlugin> {
|
||||
public:
|
||||
GreaterorequalPluginCreater() : TensorRTPluginCreater(std::string(GREATEROREQUAL_PLUGIN_NAME)) {}
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_GREATEROREQUAL_PLUGIN_H_
|
|
@ -0,0 +1,110 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/extendrt/delegate/tensorrt/op/lessorequal_tensorrt.h"
|
||||
#include <cuda_runtime.h>
|
||||
#include <numeric>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
#include <unordered_map>
|
||||
#include "src/extendrt/delegate/tensorrt/tensorrt_utils.h"
|
||||
#include "NvInferRuntimeCommon.h"
|
||||
#include "src/extendrt/delegate/tensorrt/cuda_impl/logical.cuh"
|
||||
#include "ops/less_equal.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
int LessorequalTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
|
||||
const std::vector<TensorInfo> &out_tensors) {
|
||||
if (in_tensors.size() != INPUT_SIZE2) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int LessorequalTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
if (ctx == nullptr || ctx->network() == nullptr) {
|
||||
MS_LOG(ERROR) << "network or input tensor is invalid";
|
||||
return RET_ERROR;
|
||||
}
|
||||
nvinfer1::ITensor *inputTensors[] = {input(ctx, 0).trt_tensor_, input(ctx, 1).trt_tensor_};
|
||||
auto plugin = std::make_shared<LessorequalPlugin>(op_name_, schema::PrimitiveType_LessEqual);
|
||||
if (plugin == nullptr) {
|
||||
MS_LOG(ERROR) << "create LessorequalPlugin failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
nvinfer1::IPluginV2Layer *lessorequal_layer = ctx->network()->addPluginV2(inputTensors, 2, *plugin);
|
||||
this->layer_ = lessorequal_layer;
|
||||
nvinfer1::ITensor *op_out_tensor = lessorequal_layer->getOutput(0);
|
||||
if (op_out_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "lessorequal out tensor is nullptr.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
ctx->RegisterTensor(ITensorHelper{op_out_tensor, input(ctx, 0).format_, input(ctx, 0).same_format_},
|
||||
out_tensors_[0].Name());
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
REGISTER_TENSORRT_PLUGIN(LessorequalPluginCreater);
|
||||
template class TensorRTPluginCreater<LessorequalPlugin>;
|
||||
template <class T>
|
||||
nvinfer1::PluginFieldCollection TensorRTPluginCreater<T>::field_collection_{};
|
||||
template <class T>
|
||||
std::vector<nvinfer1::PluginField> TensorRTPluginCreater<T>::fields_;
|
||||
|
||||
int LessorequalPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
|
||||
const nvinfer1::PluginTensorDesc *outputDesc, const void *const *inputs,
|
||||
void *const *outputs, void *workspace, cudaStream_t stream) noexcept {
|
||||
return RunCudaLessorequal(inputDesc, inputs, outputs, stream);
|
||||
}
|
||||
|
||||
int LessorequalPlugin::RunCudaLessorequal(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs,
|
||||
void *const *outputs, cudaStream_t stream) {
|
||||
if (inputDesc->type == nvinfer1::DataType::kINT32) {
|
||||
LessOrEqual(static_cast<const int *>(inputs[0]), static_cast<const int *>(inputs[1]),
|
||||
static_cast<int *>(outputs[0]), GetDimsVolume(inputDesc[0].dims), stream);
|
||||
} else if (inputDesc->type == nvinfer1::DataType::kFLOAT) {
|
||||
LessOrEqual(static_cast<const float *>(inputs[0]), static_cast<const float *>(inputs[1]),
|
||||
static_cast<float *>(outputs[0]), GetDimsVolume(inputDesc[0].dims), stream);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "unsupported equal data type";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt *LessorequalPlugin::clone() const noexcept {
|
||||
auto *plugin = new (std::nothrow) LessorequalPlugin(*this);
|
||||
if (plugin == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc lessorequal plugin failed";
|
||||
return nullptr;
|
||||
}
|
||||
plugin->setPluginNamespace(name_space_.c_str());
|
||||
return plugin;
|
||||
}
|
||||
|
||||
size_t LessorequalPlugin::getSerializationSize() const noexcept { return sizeof(schema::PrimitiveType); }
|
||||
|
||||
void LessorequalPlugin::serialize(void *buffer) const noexcept {
|
||||
SerializeValue(&buffer, &primitive_type_, sizeof(schema::PrimitiveType));
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(ops::kNameLessEqual, LessorequalTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,77 @@
|
|||
/**
|
||||
* Copyright 2022 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_LESSOREQUAL_PLUGIN_H_
|
||||
#define MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_LESSOREQUAL_PLUGIN_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "src/extendrt/delegate/tensorrt/op/tensorrt_op.h"
|
||||
#include "src/extendrt/delegate/tensorrt/op/tensorrt_plugin.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
class LessorequalTensorRT : public TensorRTOp {
|
||||
public:
|
||||
LessorequalTensorRT(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
|
||||
const std::vector<TensorInfo> &out_tensors, std::string name)
|
||||
: TensorRTOp(base_operator, in_tensors, out_tensors, name) {}
|
||||
|
||||
~LessorequalTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
|
||||
const std::vector<TensorInfo> &out_tensors) override;
|
||||
};
|
||||
|
||||
constexpr auto LESSOREQUAL_PLUGIN_NAME{"LessorequalPlugin"};
|
||||
class LessorequalPlugin : public TensorRTPlugin {
|
||||
public:
|
||||
LessorequalPlugin(const std::string name, schema::PrimitiveType primitive_type)
|
||||
: TensorRTPlugin(name, std::string(LESSOREQUAL_PLUGIN_NAME)), primitive_type_(primitive_type) {}
|
||||
|
||||
LessorequalPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
|
||||
: TensorRTPlugin(std::string(name), std::string(LESSOREQUAL_PLUGIN_NAME)) {
|
||||
const nvinfer1::PluginField *fields = fc->fields;
|
||||
primitive_type_ = static_cast<const schema::PrimitiveType *>(fields[0].data)[0];
|
||||
}
|
||||
|
||||
LessorequalPlugin(const char *name, const void *serialData, size_t serialLength)
|
||||
: TensorRTPlugin(std::string(name), std::string(LESSOREQUAL_PLUGIN_NAME)) {
|
||||
DeserializeValue(&serialData, &serialLength, &primitive_type_, sizeof(schema::PrimitiveType));
|
||||
}
|
||||
|
||||
LessorequalPlugin() = delete;
|
||||
|
||||
nvinfer1::IPluginV2DynamicExt *clone() const noexcept override;
|
||||
int enqueue(const nvinfer1::PluginTensorDesc *inputDesc, const nvinfer1::PluginTensorDesc *outputDesc,
|
||||
const void *const *inputs, void *const *outputs, void *workspace, cudaStream_t stream) noexcept override;
|
||||
size_t getSerializationSize() const noexcept override;
|
||||
void serialize(void *buffer) const noexcept override;
|
||||
|
||||
private:
|
||||
int RunCudaLessorequal(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs, void *const *outputs,
|
||||
cudaStream_t stream);
|
||||
const std::string layer_name_;
|
||||
std::string name_space_;
|
||||
schema::PrimitiveType primitive_type_;
|
||||
};
|
||||
class LessorequalPluginCreater : public TensorRTPluginCreater<LessorequalPlugin> {
|
||||
public:
|
||||
LessorequalPluginCreater() : TensorRTPluginCreater(std::string(LESSOREQUAL_PLUGIN_NAME)) {}
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_LESSOREQUAL_PLUGIN_H_
|
|
@ -281,6 +281,7 @@ nvinfer1::ITensor *LSTMTensorRT::ConcateAll(TensorRTContext *ctx, std::vector<nv
|
|||
return nullptr;
|
||||
}
|
||||
concat->setAxis(axis);
|
||||
this->layer_ = concat;
|
||||
return concat->getOutput(0);
|
||||
}
|
||||
|
||||
|
|
|
@ -103,7 +103,6 @@ class LSTMTensorRT : public TensorRTOp {
|
|||
int *state_weight_offset, int *bias_offset, LstmWeights *lstm_weights,
|
||||
const LstmState &next_state);
|
||||
|
||||
// nvinfer1::INetworkDefinition *network_{nullptr};
|
||||
nvinfer1::ITensor *input_data_{nullptr};
|
||||
nvinfer1::ITensor *sequence_size_input_{nullptr};
|
||||
nvinfer1::ITensor *op_data_out_{nullptr};
|
||||
|
|
|
@ -235,8 +235,8 @@ nvinfer1::ITensor *MatMulTensorRT::AddBias(TensorRTContext *ctx, nvinfer1::ITens
|
|||
if (in_tensors_.size() == kBiasIndex + 1) {
|
||||
nvinfer1::ITensor *bias = nullptr;
|
||||
if (in_tensors_[kBiasIndex].Shape().size() < static_cast<size_t>(out_tensor->getDimensions().nbDims)) {
|
||||
std::vector<int64_t> expect_dims(out_tensors_[0].Shape());
|
||||
expect_dims[0] = out_tensor->getDimensions().d[0];
|
||||
std::vector<int64_t> expect_dims(input_tensor->getDimensions().nbDims, 1);
|
||||
expect_dims[expect_dims.size() - 1] = in_tensors_[kBiasIndex].Shape().back();
|
||||
bias = ConvertTensorWithExpandDims(ctx, in_tensors_[kBiasIndex], expect_dims, op_name_);
|
||||
} else if (in_tensors_[kBiasIndex].Shape().size() == static_cast<size_t>(out_tensor->getDimensions().nbDims)) {
|
||||
bias = ConvertConstantTensor(ctx, in_tensors_[kBiasIndex], op_name_);
|
||||
|
|
|
@ -80,7 +80,6 @@ int PadTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
|||
int h_post;
|
||||
int w_pre;
|
||||
int w_post;
|
||||
// MS_LOG(INFO) << in_tensors_[0].format();
|
||||
if (SameDims(pad_input->getDimensions(), in_tensors_[0].Shape())) {
|
||||
// NCHW: 0: N_pre, 1: N_post, 2: C_pre, 3: C_post, 4: H_pre, 5: H_post, 6: W_pre, 7: W_post
|
||||
if (*padding_data != 0 || *(padding_data + 1) != 0 || *(padding_data + 2) != 0 || *(padding_data + 3) != 0) {
|
||||
|
|
|
@ -61,7 +61,7 @@ int ReduceTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
|||
CHECK_NULL_RETURN(reduce_input);
|
||||
}
|
||||
|
||||
uint32_t reduceAxis = GetAxis();
|
||||
uint32_t reduceAxis = GetAxis(ctx);
|
||||
auto reduce_operation_opt = TryConvertTRTReduceMode(reduce_op->get_mode());
|
||||
if (!reduce_operation_opt) {
|
||||
MS_LOG(WARNING) << "invalid reduce for TensorRT, need check: " << static_cast<int>(reduce_op->get_mode());
|
||||
|
@ -88,7 +88,7 @@ int ReduceTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
uint32_t ReduceTensorRT::GetAxis() {
|
||||
uint32_t ReduceTensorRT::GetAxis(TensorRTContext *ctx) {
|
||||
// axis
|
||||
uint32_t reduceAxis = 0;
|
||||
auto axis_tensor = this->in_tensors_[1];
|
||||
|
@ -102,7 +102,8 @@ uint32_t ReduceTensorRT::GetAxis() {
|
|||
auto axis_data = reinterpret_cast<const int *>(axis_tensor.Data());
|
||||
CHECK_NULL_RETURN(axis_data);
|
||||
for (int i = 0; i < axis_tensor.ElementNum(); i++) {
|
||||
int format_axis_data = (*axis_data == -1) ? in_tensors_[0].Shape().size() - 1 : *axis_data;
|
||||
auto input_0 = input(ctx, 0).trt_tensor_;
|
||||
int format_axis_data = (*axis_data == -1) ? input_0->getDimensions().nbDims - 1 : *axis_data;
|
||||
MS_LOG(DEBUG) << op_name_ << " reduceAxis at index : " << *axis_data;
|
||||
reduceAxis |= 1u << format_axis_data;
|
||||
axis_data++;
|
||||
|
|
|
@ -38,7 +38,7 @@ class ReduceTensorRT : public TensorRTOp {
|
|||
const std::vector<TensorInfo> &out_tensors) override;
|
||||
|
||||
private:
|
||||
uint32_t GetAxis();
|
||||
uint32_t GetAxis(TensorRTContext *ctx);
|
||||
Format out_format_;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -40,7 +40,16 @@ int ResizeTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::v
|
|||
MS_LOG(ERROR) << "convert failed " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
dynamic_shape_params_.support_hw_dynamic_ = false;
|
||||
if (resize_op_->get_coordinate_transform_mode() == CoordinateTransformMode::ALIGN_CORNERS &&
|
||||
resize_op_->get_method() == ResizeMethod::LINEAR) {
|
||||
MS_LOG(ERROR) << "Resize op do not support coordinate_transform_mode == ALIGN_CORNERS when method == LINEAR "
|
||||
<< op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
dynamic_shape_params_.support_hw_dynamic_ =
|
||||
(resize_op_->get_new_height() > 0 && resize_op_->get_new_width() > 0) ? false : true;
|
||||
dynamic_shape_params_.support_hw_dynamic_ &= resize_op_->get_method() != ResizeMethod::LINEAR;
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -53,16 +62,7 @@ int ResizeTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
|||
nvinfer1::ITensor *resize_in_tensor = input(ctx, 0).trt_tensor_;
|
||||
MS_LOG(DEBUG) << "origin input " << GetTensorFormat(input(ctx, 0));
|
||||
|
||||
MS_LOG(DEBUG) << "after transpose input " << GetTensorFormat(resize_in_tensor, Format::NCHW, true);
|
||||
|
||||
auto method = resize_op_->get_method();
|
||||
nvinfer1::ITensor *output_tensor = nullptr;
|
||||
if (method == ResizeMethod::LINEAR) {
|
||||
MS_LOG(INFO) << "using plugin for resize";
|
||||
output_tensor = RunPlugin(ctx, resize_in_tensor);
|
||||
} else {
|
||||
output_tensor = RunTensorRT(ctx, resize_in_tensor);
|
||||
}
|
||||
nvinfer1::ITensor *output_tensor = RunTensorRT(ctx, resize_in_tensor);
|
||||
if (output_tensor == nullptr) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
@ -73,26 +73,35 @@ int ResizeTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
|||
}
|
||||
|
||||
nvinfer1::ITensor *ResizeTensorRT::RunPlugin(TensorRTContext *ctx, nvinfer1::ITensor *resize_in_tensor) {
|
||||
constexpr int non_const_resize_input_count = 2;
|
||||
if (ReadyInputsNumber(ctx) == non_const_resize_input_count) {
|
||||
std::vector<int> resize_shape;
|
||||
if (ReadyInputsNumber(ctx) == INPUT_SIZE2) {
|
||||
MS_LOG(ERROR) << "dynamic input is not support!";
|
||||
return nullptr;
|
||||
} else {
|
||||
const int *resize_ptr = reinterpret_cast<const int *>(in_tensors_[1].Data());
|
||||
std::vector<int> resize_shape;
|
||||
for (int i = 0; i != in_tensors_[1].ElementNum(); ++i) {
|
||||
resize_shape.push_back(*(resize_ptr + i));
|
||||
if (in_tensors_.size() == 1) {
|
||||
resize_shape.push_back(static_cast<float>(resize_op_->get_new_height()));
|
||||
resize_shape.push_back(static_cast<float>(resize_op_->get_new_width()));
|
||||
} else {
|
||||
const int *resize_ptr = reinterpret_cast<const int *>(in_tensors_[1].Data());
|
||||
for (int i = 0; i != in_tensors_[1].ElementNum(); ++i) {
|
||||
resize_shape.push_back(*(resize_ptr + i));
|
||||
}
|
||||
if (resize_shape.size() != INPUT_SIZE2) {
|
||||
MS_LOG(ERROR) << "Do not support resize number more than 2";
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
constexpr int resize_hw_dims_count = 2;
|
||||
if (resize_shape.size() != resize_hw_dims_count) {
|
||||
MS_LOG(ERROR) << "Do not support resize number more than 2";
|
||||
bool using_half_pixel = (resize_op_->get_coordinate_transform_mode() == CoordinateTransformMode::HALF_PIXEL);
|
||||
auto plugin = std::make_shared<ResizeLinear2DPlugin>(resize_in_tensor->getName(), resize_shape[0], resize_shape[1],
|
||||
using_half_pixel, device_id_);
|
||||
if (plugin == nullptr) {
|
||||
MS_LOG(ERROR) << "add ResizeLinear2D plugin failed for " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
auto plugin =
|
||||
std::make_shared<ResizeLinear2DPlugin>(resize_in_tensor->getName(), resize_shape[0], resize_shape[1], device_id_);
|
||||
nvinfer1::ITensor *inputTensors[] = {resize_in_tensor};
|
||||
nvinfer1::IPluginV2Layer *resize_layer = ctx->network()->addPluginV2(inputTensors, 1, *plugin);
|
||||
if (resize_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add spacetobatch op failed for TensorRT.";
|
||||
MS_LOG(ERROR) << "add resize op failed for TensorRT.";
|
||||
return nullptr;
|
||||
}
|
||||
resize_layer->setName(op_name_.c_str());
|
||||
|
@ -124,18 +133,38 @@ nvinfer1::ITensor *ResizeTensorRT::RunTensorRT(TensorRTContext *ctx, nvinfer1::I
|
|||
|
||||
int ResizeTensorRT::SetOutputDims(TensorRTContext *ctx, nvinfer1::ITensor *resize_in_tensor,
|
||||
nvinfer1::IResizeLayer *resize_layer) {
|
||||
auto in_dims = resize_in_tensor->getDimensions();
|
||||
nvinfer1::Dims in_dims = resize_in_tensor->getDimensions();
|
||||
if (in_tensors_.size() == 1 && in_dims.nbDims == DIMENSION_4D) {
|
||||
// hw is static, but has dynamic batch size
|
||||
float scales[DIMENSION_4D]{1, 1, 1, 1};
|
||||
scales[kNCHW_H] = static_cast<float>(resize_op_->get_new_height()) / static_cast<float>(in_dims.d[kNCHW_H]);
|
||||
scales[kNCHW_W] = static_cast<float>(resize_op_->get_new_width()) / static_cast<float>(in_dims.d[kNCHW_W]);
|
||||
resize_layer->setScales(scales, DIMENSION_4D);
|
||||
nvinfer1::Dims4 new_dims(in_dims.d[0], in_dims.d[1], resize_op_->get_new_height(),
|
||||
resize_op_->get_new_width()); // nchw
|
||||
resize_layer->setOutputDimensions(new_dims); // static shape
|
||||
} else {
|
||||
auto shape_value_tensor = in_tensors_[1];
|
||||
if (!shape_value_tensor.IsConst() && in_tensors_.size() >= INPUT_SIZE2) {
|
||||
// dynamic output shape
|
||||
resize_layer->setInput(1, *input(ctx, 1).trt_tensor_);
|
||||
auto shape_tensor = input(ctx, 1).trt_tensor_;
|
||||
if (shape_tensor->getDimensions().d[0] == INPUT_SIZE4) {
|
||||
resize_layer->setInput(1, *shape_tensor);
|
||||
} else {
|
||||
auto in_tensor_shape = ctx->network()->addShape(*resize_in_tensor)->getOutput(0);
|
||||
CHECK_NULL_RETURN(in_tensor_shape);
|
||||
nvinfer1::Dims start_dims{1, {0}};
|
||||
nvinfer1::Dims size_dims{1, {2}};
|
||||
nvinfer1::Dims stride_dims{1, {1}};
|
||||
auto nc = ctx->network()->addSlice(*in_tensor_shape, start_dims, size_dims, stride_dims)->getOutput(0);
|
||||
CHECK_NULL_RETURN(nc);
|
||||
|
||||
nvinfer1::ITensor *trt_input_tensors[INPUT_SIZE2];
|
||||
trt_input_tensors[0] = nc;
|
||||
trt_input_tensors[1] = shape_tensor;
|
||||
|
||||
auto concat_layer = ctx->network()->addConcatenation(trt_input_tensors, INPUT_SIZE2);
|
||||
concat_layer->setAxis(0);
|
||||
auto nchw = concat_layer->getOutput(0);
|
||||
CHECK_NULL_RETURN(nchw);
|
||||
nchw = TRTTensorCast(ctx, nchw, nvinfer1::DataType::kINT32, op_name_ + "_input_nchw_to_int32");
|
||||
resize_layer->setInput(1, *nchw);
|
||||
}
|
||||
} else {
|
||||
std::vector<float> out_shape;
|
||||
ParseValueFromShapeTensor(ctx, shape_value_tensor, &out_shape);
|
||||
|
@ -170,7 +199,6 @@ int ResizeTensorRT::SetOutputDims(TensorRTContext *ctx, nvinfer1::ITensor *resiz
|
|||
float scales[DIMENSION_4D]{1, 1, 1, 1};
|
||||
scales[kNCHW_H] = out_shape[kNCHW_H];
|
||||
scales[kNCHW_W] = out_shape[kNCHW_W];
|
||||
MS_LOG(WARNING) << scales[0] << " " << scales[1] << " " << scales[2] << " " << scales[3];
|
||||
resize_layer->setScales(scales, DIMENSION_4D);
|
||||
}
|
||||
}
|
||||
|
@ -237,12 +265,25 @@ int ResizeTensorRT::SetParams(nvinfer1::IResizeLayer *resize_layer) {
|
|||
}
|
||||
resize_layer->setResizeMode(method_map.at(method));
|
||||
|
||||
// unsupported for trt6, but support setCoordinateTransformation() in version8
|
||||
auto coordinate_transform_mode = resize_op_->get_coordinate_transform_mode();
|
||||
// unsupported for trt6, but support setCoordinateTransformation() in version8
|
||||
#if TRT_VERSION_GE(8, 0)
|
||||
std::map<CoordinateTransformMode, nvinfer1::ResizeCoordinateTransformation> transform_map = {
|
||||
{CoordinateTransformMode::ASYMMETRIC, nvinfer1::ResizeCoordinateTransformation::kASYMMETRIC},
|
||||
{CoordinateTransformMode::ALIGN_CORNERS, nvinfer1::ResizeCoordinateTransformation::kALIGN_CORNERS},
|
||||
{CoordinateTransformMode::HALF_PIXEL, nvinfer1::ResizeCoordinateTransformation::kHALF_PIXEL}};
|
||||
auto transform_it = transform_map.find(coordinate_transform_mode);
|
||||
if (transform_it == transform_map.end()) {
|
||||
MS_LOG(ERROR) << op_name_ << " not support resize coordinate transform mode " << coordinate_transform_mode;
|
||||
return RET_ERROR;
|
||||
}
|
||||
resize_layer->setCoordinateTransformation(transform_it->second);
|
||||
#else
|
||||
if (coordinate_transform_mode != CoordinateTransformMode::ASYMMETRIC) {
|
||||
MS_LOG(WARNING) << op_name_ << " has coordinate_transform_mode may not supported: "
|
||||
<< static_cast<int>(coordinate_transform_mode);
|
||||
}
|
||||
#endif
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -265,8 +306,8 @@ int ResizeLinear2DPlugin::RunCudaResizeLinear2D(const nvinfer1::PluginTensorDesc
|
|||
float h_scale = dims.d[kNCHW_H] / static_cast<float>(resize_h_);
|
||||
float w_scale = dims.d[kNCHW_W] / static_cast<float>(resize_w_);
|
||||
CalResizeBilinear(static_cast<const float *>(inputs[0]), dims.d[0], dims.d[1], dims.d[kNCHW_H], dims.d[kNCHW_W],
|
||||
resize_h_, resize_w_, h_scale, w_scale, false, static_cast<float *>(outputs[0]), device_id_,
|
||||
stream);
|
||||
resize_h_, resize_w_, h_scale, w_scale, using_half_pixel_, static_cast<float *>(outputs[0]),
|
||||
device_id_, stream);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -280,7 +321,7 @@ nvinfer1::IPluginV2DynamicExt *ResizeLinear2DPlugin::clone() const noexcept {
|
|||
return plugin;
|
||||
}
|
||||
|
||||
size_t ResizeLinear2DPlugin::getSerializationSize() const noexcept { return sizeof(int) * 2; }
|
||||
size_t ResizeLinear2DPlugin::getSerializationSize() const noexcept { return sizeof(int) * INPUT_SIZE2 + sizeof(bool); }
|
||||
|
||||
nvinfer1::DimsExprs ResizeLinear2DPlugin::getOutputDimensions(int32_t index, const nvinfer1::DimsExprs *inputs,
|
||||
int nbInputDims,
|
||||
|
@ -299,6 +340,7 @@ nvinfer1::DimsExprs ResizeLinear2DPlugin::getOutputDimensions(int32_t index, con
|
|||
void ResizeLinear2DPlugin::serialize(void *buffer) const noexcept {
|
||||
SerializeValue(&buffer, &resize_h_, sizeof(int));
|
||||
SerializeValue(&buffer, &resize_w_, sizeof(int));
|
||||
SerializeValue(&buffer, &using_half_pixel_, sizeof(bool));
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(ops::kNameResize, ResizeTensorRT)
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -41,10 +41,11 @@ class ResizeTensorRT : public TensorRTOp {
|
|||
const std::vector<TensorInfo> &out_tensors) override;
|
||||
|
||||
private:
|
||||
int SetOutputDims(TensorRTContext *ctx, nvinfer1::ITensor *resize_in_tensor, nvinfer1::IResizeLayer *resize_layer);
|
||||
nvinfer1::ITensor *RunPlugin(TensorRTContext *ctx, nvinfer1::ITensor *resize_in_tensor);
|
||||
nvinfer1::ITensor *RunTensorRT(TensorRTContext *ctx, nvinfer1::ITensor *resize_in_tensor);
|
||||
|
||||
int SetOutputDims(TensorRTContext *ctx, nvinfer1::ITensor *resize_in_tensor, nvinfer1::IResizeLayer *resize_layer);
|
||||
|
||||
void ParseValueFromShapeTensor(TensorRTContext *ctx, const TensorInfo &shape_value_tensor,
|
||||
std::vector<float> *out_shape);
|
||||
|
||||
|
@ -61,22 +62,25 @@ class ResizeTensorRT : public TensorRTOp {
|
|||
constexpr auto RESIZELINEAR2D_PLUGIN_NAME{"ResizeLinear2DPlugin"};
|
||||
class ResizeLinear2DPlugin : public TensorRTPlugin {
|
||||
public:
|
||||
ResizeLinear2DPlugin(const std::string name, int resize_h, int resize_w, uint32_t device_id)
|
||||
ResizeLinear2DPlugin(const std::string name, int resize_h, int resize_w, bool using_half_pixel, uint32_t device_id)
|
||||
: TensorRTPlugin(name, std::string(RESIZELINEAR2D_PLUGIN_NAME), device_id),
|
||||
resize_h_(resize_h),
|
||||
resize_w_(resize_w) {}
|
||||
resize_w_(resize_w),
|
||||
using_half_pixel_(using_half_pixel) {}
|
||||
|
||||
ResizeLinear2DPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
|
||||
: TensorRTPlugin(std::string(name), std::string(RESIZELINEAR2D_PLUGIN_NAME)) {
|
||||
const nvinfer1::PluginField *fields = fc->fields;
|
||||
resize_h_ = static_cast<const int *>(fields[0].data)[0];
|
||||
resize_w_ = static_cast<const int *>(fields[1].data)[0];
|
||||
using_half_pixel_ = static_cast<const bool *>(fields[2].data)[0];
|
||||
}
|
||||
|
||||
ResizeLinear2DPlugin(const char *name, const void *serialData, size_t serialLength)
|
||||
: TensorRTPlugin(std::string(name), std::string(RESIZELINEAR2D_PLUGIN_NAME)) {
|
||||
DeserializeValue(&serialData, &serialLength, &resize_h_, sizeof(int));
|
||||
DeserializeValue(&serialData, &serialLength, &resize_w_, sizeof(int));
|
||||
DeserializeValue(&serialData, &serialLength, &using_half_pixel_, sizeof(bool));
|
||||
}
|
||||
|
||||
ResizeLinear2DPlugin() = delete;
|
||||
|
@ -94,6 +98,7 @@ class ResizeLinear2DPlugin : public TensorRTPlugin {
|
|||
void *const *outputs, cudaStream_t stream);
|
||||
int resize_h_;
|
||||
int resize_w_;
|
||||
bool using_half_pixel_;
|
||||
const std::string layer_name_;
|
||||
std::string name_space_;
|
||||
};
|
||||
|
|
|
@ -48,7 +48,7 @@ class ScaleTensorRT : public TensorRTOp {
|
|||
|
||||
Format out_format_;
|
||||
|
||||
bool out_same_format_{false};
|
||||
bool out_same_format_{true};
|
||||
|
||||
nvinfer1::ScaleMode mode_;
|
||||
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include "src/extendrt/delegate/tensorrt/op/shape_tensorrt.h"
|
||||
#include "ops/shape.h"
|
||||
#include "ops/dynamic_shape.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
int ShapeTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
|
||||
|
@ -32,8 +33,6 @@ int ShapeTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::ve
|
|||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
dynamic_shape_params_.support_dynamic_ = false;
|
||||
dynamic_shape_params_.support_hw_dynamic_ = false;
|
||||
return RET_OK;
|
||||
}
|
||||
int ShapeTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
|
@ -55,4 +54,5 @@ int ShapeTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
|||
return RET_OK;
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(ops::kNameShape, ShapeTensorRT)
|
||||
REGISTER_TENSORRT_CREATOR(ops::kNameDynamicShape, ShapeTensorRT)
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -72,6 +72,11 @@ int ShuffleTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::
|
|||
MS_LOG(ERROR) << "Unsupported shape tensor of " << type_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
} else if (type_ == ops::kNameBroadcastTo) {
|
||||
if (in_tensors.size() != INPUT_SIZE2) {
|
||||
MS_LOG(ERROR) << "PrimitiveType_Transpose Unsupported in_tensors size: " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
} else {
|
||||
MS_LOG(ERROR) << "Unsupported op type:" << type_;
|
||||
return RET_ERROR;
|
||||
|
@ -123,12 +128,19 @@ int ShuffleTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
|||
MS_LOG(ERROR) << "Unsupported op type for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (ret == RET_OK) {
|
||||
auto output_helper = ITensorHelper{shuffler_output_, out_format_, true};
|
||||
ctx->RegisterTensor(output_helper, out_tensors_[0].Name());
|
||||
MS_LOG(DEBUG) << "output " << GetTensorFormat(output_helper);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "AddOp failed for " << op_name_;
|
||||
return ret;
|
||||
}
|
||||
return ret;
|
||||
|
||||
if (shuffler_output_ == nullptr) {
|
||||
MS_LOG(ERROR) << "output tensor create failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto output_helper = ITensorHelper{shuffler_output_, out_format_, true};
|
||||
ctx->RegisterTensor(output_helper, out_tensors_[0].Name());
|
||||
MS_LOG(DEBUG) << "output " << GetTensorFormat(output_helper);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int ShuffleTensorRT::InputTensorPreprocess(TensorRTContext *ctx) {
|
||||
|
@ -189,7 +201,7 @@ int ShuffleTensorRT::AddUnsqueezeOp(nvinfer1::IShuffleLayer *shuffle_layer) {
|
|||
nvinfer1::ITensor *expand_input = shuffler_input_;
|
||||
if (input(ctx_, 0).is_tensor == true) {
|
||||
for (size_t i = 0; i < param_axis_.size(); i++) {
|
||||
expand_input = ExpandDim(shuffle_layer, expand_input, param_axis_[i]);
|
||||
expand_input = ExpandDim(ctx_, expand_input, param_axis_[i]);
|
||||
}
|
||||
}
|
||||
shuffler_output_ = expand_input;
|
||||
|
@ -242,8 +254,12 @@ int ShuffleTensorRT::AddReshapeOp(nvinfer1::IShuffleLayer *shuffle_layer) {
|
|||
auto &shape_tensor = in_tensors_[1];
|
||||
if (shape_tensor.IsConst()) {
|
||||
// static shuffle layer
|
||||
shuffle_layer->setReshapeDimensions(
|
||||
InferReshapeDims(shuffler_input_->getDimensions(), in_tensors_[0].Shape(), out_tensors_[0].Shape()));
|
||||
nvinfer1::Dims reshape_dims{static_cast<int>(shape_tensor.ElementNum())};
|
||||
const int *shape_ptr = reinterpret_cast<const int *>(shape_tensor.Data());
|
||||
for (int i = 0; i != shape_tensor.ElementNum(); ++i) {
|
||||
reshape_dims.d[i] = *(shape_ptr + i);
|
||||
}
|
||||
shuffle_layer->setReshapeDimensions(reshape_dims);
|
||||
} else {
|
||||
if (in_tensors_.size() != INPUT_SIZE2) {
|
||||
MS_LOG(ERROR) << "invalid shape tensor for reshape " << op_name_;
|
||||
|
@ -270,6 +286,10 @@ int ShuffleTensorRT::AddFlattenOp(nvinfer1::IShuffleLayer *shuffle_layer) {
|
|||
}
|
||||
|
||||
int ShuffleTensorRT::AddExpandDimsOp(nvinfer1::IShuffleLayer *shuffle_layer) {
|
||||
if (!input(ctx_, 0).is_tensor) {
|
||||
shuffler_output_ = shuffler_input_;
|
||||
return RET_OK;
|
||||
}
|
||||
int axis;
|
||||
if (in_tensors_[1].DataType() == DataType::kNumberTypeInt64) {
|
||||
auto axis_data = static_cast<const int64_t *>(in_tensors_[1].Data());
|
||||
|
@ -281,114 +301,44 @@ int ShuffleTensorRT::AddExpandDimsOp(nvinfer1::IShuffleLayer *shuffle_layer) {
|
|||
MS_LOG(WARNING) << op_name_ << " axis tensor data type is " << static_cast<int>(in_tensors_[1].DataType());
|
||||
return RET_ERROR;
|
||||
}
|
||||
shuffler_output_ = ExpandDim(shuffle_layer, shuffler_input_, axis);
|
||||
shuffler_output_ = ExpandDim(ctx_, shuffler_input_, axis);
|
||||
return shuffler_output_ == nullptr ? RET_ERROR : RET_OK;
|
||||
}
|
||||
|
||||
int ShuffleTensorRT::AddBroadcastToOp(nvinfer1::IShuffleLayer *shuffle_layer) {
|
||||
if (out_tensors_[0].ElementNum() != in_tensors_[0].ElementNum() &&
|
||||
out_tensors_[0].Shape().size() == in_tensors_[0].Shape().size()) {
|
||||
MS_LOG(WARNING) << "broadcast element cnt changes, ignore broadcast for " << op_name_;
|
||||
shuffle_layer->setReshapeDimensions(shuffler_input_->getDimensions());
|
||||
MS_LOG(WARNING) << "here " << op_name_;
|
||||
} else if (out_tensors_[0].ElementNum() == in_tensors_[0].ElementNum()) {
|
||||
nvinfer1::Dims new_dims = ConvertCudaDims(out_tensors_[0].Shape());
|
||||
if (new_dims.nbDims == -1) {
|
||||
MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
new_dims.d[0] = shuffler_input_->getDimensions().d[0];
|
||||
shuffle_layer->setReshapeDimensions(new_dims);
|
||||
MS_LOG(WARNING) << "here " << op_name_;
|
||||
if (!in_tensors_[1].IsConst()) {
|
||||
auto input_shape_tensor = input(ctx_, 1).trt_tensor_;
|
||||
shuffler_output_ = Broadcast(ctx_, shuffler_input_, input_shape_tensor);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "broadcast needs check for " << op_name_;
|
||||
std::vector<int> input_shape;
|
||||
const int *shape_ptr = reinterpret_cast<const int *>(in_tensors_[1].Data());
|
||||
for (int i = 0; i != in_tensors_[1].ElementNum(); ++i) {
|
||||
input_shape.push_back(*(shape_ptr + i));
|
||||
}
|
||||
|
||||
nvinfer1::Dims in_tensor_dims = shuffler_input_->getDimensions();
|
||||
auto input_shape_tensor = ctx_->ConvertTo1DTensor(input_shape);
|
||||
|
||||
while (in_tensor_dims.nbDims < static_cast<int64_t>(input_shape.size())) {
|
||||
shuffler_input_ = ExpandDim(ctx_, shuffler_input_, 0);
|
||||
if (shuffler_input_->getDimensions().nbDims == -1) {
|
||||
MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
shuffle_layer->setReshapeDimensions(shuffler_input_->getDimensions());
|
||||
shuffler_input_ = shuffle_layer->getOutput(0);
|
||||
in_tensor_dims = shuffler_input_->getDimensions();
|
||||
}
|
||||
|
||||
auto size_tensor = ctx_->network()->addShape(*shuffler_input_)->getOutput(0);
|
||||
size_tensor = ctx_->network()
|
||||
->addElementWise(*input_shape_tensor, *size_tensor, nvinfer1::ElementWiseOperation::kMAX)
|
||||
->getOutput(0);
|
||||
shuffler_output_ = Broadcast(ctx_, shuffler_input_, size_tensor);
|
||||
}
|
||||
shuffler_output_ = shuffle_layer->getOutput(0);
|
||||
return shuffler_output_ == nullptr ? RET_ERROR : RET_OK;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *ShuffleTensorRT::ExpandDim(nvinfer1::IShuffleLayer *shuffle_layer, nvinfer1::ITensor *input_tensor,
|
||||
int axis) {
|
||||
auto input_dims = input_tensor->getDimensions();
|
||||
// if expand dim not at last dim and shape is dynamic, change to expanddim at last dim and transpose
|
||||
bool special_expand = false;
|
||||
for (int i = 0; i < input_dims.nbDims; i++) {
|
||||
special_expand = special_expand || input_dims.d[i] == -1;
|
||||
}
|
||||
special_expand = special_expand && (axis != -1 && axis != input_dims.nbDims - 1);
|
||||
|
||||
if (special_expand) {
|
||||
std::vector<int64_t> new_shape;
|
||||
for (int i = 0; i < input_dims.nbDims; i++) {
|
||||
new_shape.push_back(input_dims.d[i] == -1 ? 0 : input_dims.d[i]);
|
||||
}
|
||||
new_shape.push_back(1);
|
||||
nvinfer1::Dims new_dims = ConvertCudaDims(new_shape);
|
||||
if (new_dims.nbDims == -1) {
|
||||
MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
shuffle_layer->setReshapeDimensions(new_dims);
|
||||
// transpose
|
||||
nvinfer1::Permutation perm{};
|
||||
for (int i = 0; i < new_dims.nbDims; i++) {
|
||||
if (i < axis) {
|
||||
perm.order[i] = i;
|
||||
} else if (i == axis) {
|
||||
perm.order[i] = new_dims.nbDims - 1;
|
||||
} else {
|
||||
perm.order[i] = i - 1;
|
||||
}
|
||||
}
|
||||
nvinfer1::IShuffleLayer *trans_layer = ctx_->network()->addShuffle(*shuffle_layer->getOutput(0));
|
||||
if (trans_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add transpose layer failed for special expand dims op " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
trans_layer->setFirstTranspose(perm);
|
||||
return trans_layer->getOutput(0);
|
||||
} else {
|
||||
std::vector<int64_t> new_shape;
|
||||
for (int i = 0; i < input_dims.nbDims; i++) {
|
||||
if (axis == i) {
|
||||
new_shape.push_back(1);
|
||||
}
|
||||
new_shape.push_back(input_dims.d[i] == -1 ? 0 : input_dims.d[i]);
|
||||
}
|
||||
if (axis == -1 || axis == input_dims.nbDims) {
|
||||
new_shape.push_back(1);
|
||||
}
|
||||
nvinfer1::Dims new_dims = ConvertCudaDims(new_shape);
|
||||
if (new_dims.nbDims == -1) {
|
||||
MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
shuffle_layer->setReshapeDimensions(new_dims);
|
||||
return shuffle_layer->getOutput(0);
|
||||
}
|
||||
}
|
||||
|
||||
nvinfer1::Dims ShuffleTensorRT::InferReshapeDims(const nvinfer1::Dims &input_dims,
|
||||
const std::vector<int64_t> &ms_input_shape,
|
||||
const std::vector<int64_t> &ms_output_shape) {
|
||||
// tensorrt support infer shape of 0 and -1
|
||||
nvinfer1::Dims reshape_dims = ConvertCudaDims(ms_output_shape);
|
||||
if (reshape_dims.nbDims == -1) {
|
||||
MS_LOG(ERROR) << "ConvertCudaDims failed for " << op_name_;
|
||||
return reshape_dims;
|
||||
}
|
||||
for (int i = 0; i < reshape_dims.nbDims; i++) {
|
||||
if (input_dims.d[i] == -1) {
|
||||
if (ms_input_shape[i] == ms_output_shape[i]) {
|
||||
reshape_dims.d[i] = 0;
|
||||
} else {
|
||||
reshape_dims.d[i] = -1;
|
||||
}
|
||||
}
|
||||
MS_LOG(DEBUG) << "reshape infer_index " << i << " value: " << reshape_dims.d[i];
|
||||
}
|
||||
return reshape_dims;
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(ops::kNameUnsqueeze, ShuffleTensorRT)
|
||||
REGISTER_TENSORRT_CREATOR(ops::kNameSqueeze, ShuffleTensorRT)
|
||||
REGISTER_TENSORRT_CREATOR(ops::kNameReshape, ShuffleTensorRT)
|
||||
|
|
|
@ -42,9 +42,6 @@ class ShuffleTensorRT : public TensorRTOp {
|
|||
int AddFlattenOp(nvinfer1::IShuffleLayer *shuffle_layer);
|
||||
int AddExpandDimsOp(nvinfer1::IShuffleLayer *shuffle_layer);
|
||||
int AddBroadcastToOp(nvinfer1::IShuffleLayer *shuffle_layer);
|
||||
nvinfer1::ITensor *ExpandDim(nvinfer1::IShuffleLayer *shuffle_layer, nvinfer1::ITensor *input_tensor, int axis);
|
||||
nvinfer1::Dims InferReshapeDims(const nvinfer1::Dims &input_dims, const std::vector<int64_t> &ms_input_shape,
|
||||
const std::vector<int64_t> &ms_output_shape);
|
||||
|
||||
Format out_format_ = Format::NCHW;
|
||||
nvinfer1::ITensor *shuffler_input_{nullptr};
|
||||
|
|
|
@ -71,24 +71,32 @@ class StrideSliceTensorRTUtil final : public SliceTensorRTUtil {
|
|||
}
|
||||
int axis_value = *(static_cast<const int *>(in_tensors.at(axis_index).Data()));
|
||||
int start_value = *(static_cast<const int *>(begin.Data()));
|
||||
int size_value = *(static_cast<const int *>(end.Data()));
|
||||
int end_value = *(static_cast<const int *>(end.Data()));
|
||||
int stride_value = *(static_cast<const int *>(stride.Data()));
|
||||
auto input_dims = helper.trt_tensor_->getDimensions();
|
||||
start_dims.nbDims = input_dims.nbDims;
|
||||
size_dims.nbDims = input_dims.nbDims;
|
||||
stride_dims = nvinfer1::Dims{size_dims.nbDims, {}};
|
||||
std::fill(start_dims.d, start_dims.d + start_dims.nbDims, 0);
|
||||
std::fill(stride_dims.d, stride_dims.d + stride_dims.nbDims, 1);
|
||||
if (start_value < 0) {
|
||||
start_value = input_dims.d[axis_value] + start_value;
|
||||
}
|
||||
for (int i = 0; i < start_dims.nbDims; i++) {
|
||||
if (i == axis_value) {
|
||||
start_dims.d[i] = start_value;
|
||||
size_dims.d[i] = size_value >= 0 ? std::min(size_value, input_dims.d[i]) - start_dims.d[i]
|
||||
: size_value + input_dims.d[i] - start_dims.d[i];
|
||||
stride_dims.d[i] = stride_value;
|
||||
if (end_value >= 0) {
|
||||
size_dims.d[i] = std::min(end_value, input_dims.d[i]) - start_dims.d[i];
|
||||
} else if (end_value >= -input_dims.d[i]) {
|
||||
size_dims.d[i] = end_value + input_dims.d[i] - start_dims.d[i];
|
||||
} else {
|
||||
size_dims.d[i] = input_dims.d[i];
|
||||
}
|
||||
} else {
|
||||
start_dims.d[i] = 0;
|
||||
size_dims.d[i] = helper.trt_tensor_->getDimensions().d[i];
|
||||
}
|
||||
}
|
||||
|
||||
int stride_value = *(static_cast<const int *>(stride.Data()));
|
||||
stride_dims = nvinfer1::Dims{size_dims.nbDims, {}};
|
||||
std::fill(stride_dims.d, stride_dims.d + stride_dims.nbDims, stride_value);
|
||||
}
|
||||
return std::make_tuple(start_dims, size_dims, stride_dims);
|
||||
}
|
||||
|
@ -103,7 +111,7 @@ class StrideSliceTensorRTUtil final : public SliceTensorRTUtil {
|
|||
shape.erase(shape.begin() + i);
|
||||
}
|
||||
}
|
||||
return Reshape(ctx, input, shape);
|
||||
return shape.empty() ? nullptr : Reshape(ctx, input, shape);
|
||||
}
|
||||
return input;
|
||||
}
|
||||
|
@ -136,8 +144,8 @@ class SliceFusionTensorRTUtil final : public SliceTensorRTUtil {
|
|||
const auto &begin = in_tensors.at(1);
|
||||
const auto &size = in_tensors.at(SIZE_INDEX);
|
||||
|
||||
auto start_dims = lite::ConvertCudaDims(begin.Data(), begin.ElementNum());
|
||||
auto size_dims = lite::ConvertCudaDims(size.Data(), size.ElementNum());
|
||||
auto start_dims = lite::ConvertCudaDims(begin);
|
||||
auto size_dims = lite::ConvertCudaDims(size);
|
||||
for (int i = 0; i != size_dims.nbDims; ++i) {
|
||||
if (size_dims.d[i] == -1) {
|
||||
size_dims.d[i] = helper.trt_tensor_->getDimensions().d[i];
|
||||
|
@ -278,17 +286,16 @@ int SliceTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
|||
this->layer_ = slice_layer;
|
||||
slice_layer->setName(op_name_.c_str());
|
||||
nvinfer1::ITensor *out_tensor = slice_layer->getOutput(0);
|
||||
out_tensor = util_->PostProcess(ctx, out_tensor, in_tensors_, out_tensors_);
|
||||
if (out_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "output tensor create failed";
|
||||
return RET_ERROR;
|
||||
auto post_tensor = util_->PostProcess(ctx, out_tensor, in_tensors_, out_tensors_);
|
||||
bool rank_0 = false;
|
||||
if (post_tensor == nullptr) {
|
||||
rank_0 = true;
|
||||
post_tensor = out_tensor;
|
||||
}
|
||||
auto helper = ITensorHelper{out_tensor, slice_input.format_, slice_input.same_format_};
|
||||
auto helper = ITensorHelper{post_tensor, slice_input.format_, slice_input.same_format_, !rank_0};
|
||||
ctx->RegisterTensor(helper, out_tensors_[0].Name());
|
||||
MS_LOG(DEBUG) << "slice output : " << GetTensorFormat(helper);
|
||||
return RET_OK;
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(ops::kNameStridedSlice, SliceTensorRT)
|
||||
REGISTER_TENSORRT_CREATOR(ops::kNameSliceFusion, SliceTensorRT)
|
||||
REGISTER_TENSORRT_CREATOR(ops::kNameCrop, SliceTensorRT)
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -0,0 +1,131 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/extendrt/delegate/tensorrt/op/slicefusion_tensorrt.h"
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
#include "src/extendrt/delegate/tensorrt/tensorrt_utils.h"
|
||||
#include "ops/fusion/slice_fusion.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
nvinfer1::ITensor *SliceFusionTensorRT::GetDynamicSliceSize(TensorRTContext *ctx, nvinfer1::ITensor *input,
|
||||
nvinfer1::Dims start_dims, nvinfer1::Dims size_dims) {
|
||||
auto in_tensor_shape = ctx->network()->addShape(*input)->getOutput(0);
|
||||
if (in_tensor_shape == nullptr) {
|
||||
MS_LOG(ERROR) << "add shape layer of input failed!";
|
||||
return nullptr;
|
||||
}
|
||||
std::vector<nvinfer1::ITensor *> shape_tensors;
|
||||
auto input_dims = input->getDimensions();
|
||||
std::vector<int> input_shape_vec;
|
||||
for (int i = 0; i != input_dims.nbDims; ++i) {
|
||||
if (input_dims.d[i] == -1) {
|
||||
if (!input_shape_vec.empty()) {
|
||||
shape_tensors.push_back(ctx->ConvertTo1DTensor(input_shape_vec));
|
||||
input_shape_vec.clear();
|
||||
}
|
||||
auto starts = nvinfer1::Dims{1, {i}};
|
||||
auto size = nvinfer1::Dims{1, {1}};
|
||||
auto strides = nvinfer1::Dims{1, {1}};
|
||||
auto slice_layer = ctx->network()->addSlice(*in_tensor_shape, starts, size, strides);
|
||||
if (slice_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add slice layer failed";
|
||||
return nullptr;
|
||||
}
|
||||
auto start_tensor = ctx->ConvertTo1DTensor(start_dims.d[i]);
|
||||
shape_tensors.push_back(
|
||||
ctx->network()
|
||||
->addElementWise(*slice_layer->getOutput(0), *start_tensor, nvinfer1::ElementWiseOperation::kSUB)
|
||||
->getOutput(0));
|
||||
} else {
|
||||
input_shape_vec.push_back(size_dims.d[i]);
|
||||
}
|
||||
}
|
||||
if (!input_shape_vec.empty()) {
|
||||
shape_tensors.push_back(ctx->ConvertTo1DTensor(input_shape_vec));
|
||||
}
|
||||
nvinfer1::ITensor *concat_tensors[shape_tensors.size()];
|
||||
for (size_t i = 0; i != shape_tensors.size(); ++i) {
|
||||
concat_tensors[i] = shape_tensors[i];
|
||||
}
|
||||
auto concat_layer = ctx->network()->addConcatenation(concat_tensors, shape_tensors.size());
|
||||
if (concat_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add concat layer failed!";
|
||||
return nullptr;
|
||||
}
|
||||
concat_layer->setAxis(0);
|
||||
|
||||
return concat_layer->getOutput(0);
|
||||
}
|
||||
|
||||
int SliceFusionTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
|
||||
const std::vector<TensorInfo> &out_tensors) {
|
||||
if (in_tensors.size() != SLICE_INPUT_SIZE) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
dynamic_shape_params_.support_hw_dynamic_ = false;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int SliceFusionTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
ITensorHelper slice_input;
|
||||
int ret = PreprocessInputs2SameDim(ctx, input(ctx, 0), &slice_input);
|
||||
if (ret != RET_OK || slice_input.trt_tensor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "PreprocessInputs2SameDim input tensor failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
const auto &begin = in_tensors_.at(1);
|
||||
const auto &size = in_tensors_.at(SIZE_INDEX);
|
||||
|
||||
auto start_dims = lite::ConvertCudaDims(begin);
|
||||
auto size_dims = lite::ConvertCudaDims(size);
|
||||
nvinfer1::ITensor *size_tensor = nullptr;
|
||||
for (int i = 0; i != size_dims.nbDims; ++i) {
|
||||
if (size_dims.d[i] == -1 && !IsDynamicInput(ctx, 0)) {
|
||||
size_dims.d[i] = slice_input.trt_tensor_->getDimensions().d[i];
|
||||
}
|
||||
}
|
||||
if (IsDynamicInput(ctx, 0)) {
|
||||
size_tensor = GetDynamicSliceSize(ctx, slice_input.trt_tensor_, start_dims, size_dims);
|
||||
size_dims = nvinfer1::Dims{-1};
|
||||
}
|
||||
auto stride_dims = lite::ConvertCudaDims(1, begin.ElementNum());
|
||||
|
||||
nvinfer1::ISliceLayer *slice_layer =
|
||||
ctx->network()->addSlice(*slice_input.trt_tensor_, start_dims, size_dims, stride_dims);
|
||||
if (slice_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add Slice op failed for TensorRT: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (size_tensor != nullptr) {
|
||||
slice_layer->setInput(INPUT_SIZE2, *size_tensor);
|
||||
}
|
||||
this->layer_ = slice_layer;
|
||||
slice_layer->setName(op_name_.c_str());
|
||||
nvinfer1::ITensor *out_tensor = slice_layer->getOutput(0);
|
||||
auto helper = ITensorHelper{out_tensor, slice_input.format_, slice_input.same_format_};
|
||||
ctx->RegisterTensor(helper, out_tensors_[0].Name());
|
||||
MS_LOG(DEBUG) << "slice output : " << GetTensorFormat(helper);
|
||||
return RET_OK;
|
||||
}
|
||||
REGISTER_TENSORRT_CREATOR(ops::kNameSliceFusion, SliceFusionTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,44 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_SLICE_FUSION_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_SLICE_FUSION_TENSORRT_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "src/extendrt/delegate/tensorrt/op/tensorrt_op.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
constexpr int SIZE_INDEX = 2;
|
||||
constexpr int SLICE_INPUT_SIZE = 3;
|
||||
class SliceFusionTensorRT : public TensorRTOp {
|
||||
public:
|
||||
SliceFusionTensorRT(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
|
||||
const std::vector<TensorInfo> &out_tensors, std::string name)
|
||||
: TensorRTOp(base_operator, in_tensors, out_tensors, name) {}
|
||||
|
||||
~SliceFusionTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
|
||||
const std::vector<TensorInfo> &out_tensors) override;
|
||||
|
||||
private:
|
||||
nvinfer1::ITensor *GetDynamicSliceSize(TensorRTContext *ctx, nvinfer1::ITensor *input, nvinfer1::Dims,
|
||||
nvinfer1::Dims);
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_SLICE_FUSION_TENSORRT_H_
|
|
@ -29,7 +29,7 @@
|
|||
namespace mindspore::lite {
|
||||
int SpaceToBatchTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
|
||||
const std::vector<TensorInfo> &out_tensors) {
|
||||
if (in_tensors.size() != 3) {
|
||||
if (in_tensors.size() != INPUT_SIZE3) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
@ -38,8 +38,6 @@ int SpaceToBatchTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const
|
|||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
dynamic_shape_params_.support_dynamic_ = false;
|
||||
dynamic_shape_params_.support_hw_dynamic_ = false;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -52,19 +50,17 @@ int SpaceToBatchTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
|||
MS_LOG(ERROR) << "block_h not equal block_w " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
const int *pad_ptr = reinterpret_cast<const int *>(in_tensors_[2].Data());
|
||||
const int *pad_ptr = reinterpret_cast<const int *>(in_tensors_[INPUT_SIZE2].Data());
|
||||
int ph0 = *(pad_ptr + 0);
|
||||
int ph1 = *(pad_ptr + 1);
|
||||
int pw0 = *(pad_ptr + 2);
|
||||
int pw1 = *(pad_ptr + 3);
|
||||
auto in_dims = input_tensor->getDimensions();
|
||||
int in = in_dims.d[0];
|
||||
int ic = in_dims.d[1];
|
||||
int ih = in_dims.d[2];
|
||||
int iw = in_dims.d[3];
|
||||
int pw0 = *(pad_ptr + INPUT_SIZE2);
|
||||
int pw1 = *(pad_ptr + INPUT_SIZE3);
|
||||
|
||||
auto plugin =
|
||||
std::make_shared<SpaceToBatchPlugin>(input_tensor->getName(), in, ic, ih, iw, bh, ph0, ph1, pw0, pw1, device_id_);
|
||||
auto plugin = std::make_shared<SpaceToBatchPlugin>(input_tensor->getName(), bh, ph0, ph1, pw0, pw1, device_id_);
|
||||
if (plugin == nullptr) {
|
||||
MS_LOG(ERROR) << "add spacetobatch plugin failed for" << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
nvinfer1::ITensor *inputTensors[] = {input_tensor};
|
||||
nvinfer1::IPluginV2Layer *space2batch_opt_layer = ctx->network()->addPluginV2(inputTensors, 1, *plugin);
|
||||
if (space2batch_opt_layer == nullptr) {
|
||||
|
@ -93,15 +89,20 @@ int SpaceToBatchPlugin::enqueue(const nvinfer1::PluginTensorDesc *inputDesc,
|
|||
|
||||
int SpaceToBatchPlugin::RunCudaSpaceToBatch(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs,
|
||||
void *const *outputs, cudaStream_t stream) {
|
||||
int on = in_ * bh_ * bh_;
|
||||
int oc = ic_;
|
||||
int oh = (ih_ + ph0_ + ph1_) / bh_;
|
||||
int ow = (iw_ + pw0_ + pw1_) / bh_;
|
||||
nvinfer1::Dims input_dims = inputDesc[0].dims;
|
||||
int in = input_dims.d[0];
|
||||
int ic = input_dims.d[1];
|
||||
int ih = input_dims.d[2];
|
||||
int iw = input_dims.d[3];
|
||||
int on = in * bh_ * bh_;
|
||||
int oc = ic;
|
||||
int oh = (ih + ph0_ + ph1_) / bh_;
|
||||
int ow = (iw + pw0_ + pw1_) / bh_;
|
||||
|
||||
int size = in_ * ic_ * ih_ * iw_;
|
||||
int size = in * ic * ih * iw;
|
||||
|
||||
CalSpaceToBatch<float>(size, static_cast<const float *>(inputs[0]), in_, ih_, iw_, ic_, on, oh, ow, oc, ph0_, ph1_,
|
||||
pw0_, pw1_, bh_, static_cast<float *>(outputs[0]), device_id_, stream);
|
||||
CalSpaceToBatch<float>(size, static_cast<const float *>(inputs[0]), in, ih, iw, ic, on, oh, ow, oc, ph0_, ph1_, pw0_,
|
||||
pw1_, bh_, static_cast<float *>(outputs[0]), device_id_, stream);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -115,7 +116,7 @@ nvinfer1::IPluginV2DynamicExt *SpaceToBatchPlugin::clone() const noexcept {
|
|||
return plugin;
|
||||
}
|
||||
|
||||
size_t SpaceToBatchPlugin::getSerializationSize() const noexcept { return sizeof(int) * 9; }
|
||||
size_t SpaceToBatchPlugin::getSerializationSize() const noexcept { return sizeof(int) * 5; }
|
||||
|
||||
nvinfer1::DimsExprs SpaceToBatchPlugin::getOutputDimensions(int32_t index, const nvinfer1::DimsExprs *inputs,
|
||||
int nbInputDims,
|
||||
|
@ -127,19 +128,15 @@ nvinfer1::DimsExprs SpaceToBatchPlugin::getOutputDimensions(int32_t index, const
|
|||
dims.d[1] = inputs[0].d[1];
|
||||
auto bh = exprBuilder.constant(bh_);
|
||||
auto ph_sum = exprBuilder.constant(ph0_ + ph1_);
|
||||
auto sum0 = exprBuilder.operation(nvinfer1::DimensionOperation::kSUM, *inputs[0].d[2], *ph_sum);
|
||||
dims.d[2] = exprBuilder.operation(nvinfer1::DimensionOperation::kFLOOR_DIV, *sum0, *bh);
|
||||
auto sum0 = exprBuilder.operation(nvinfer1::DimensionOperation::kSUM, *inputs[0].d[INPUT_SIZE2], *ph_sum);
|
||||
dims.d[INPUT_SIZE2] = exprBuilder.operation(nvinfer1::DimensionOperation::kFLOOR_DIV, *sum0, *bh);
|
||||
auto pw_sum = exprBuilder.constant(pw0_ + pw1_);
|
||||
auto sum1 = exprBuilder.operation(nvinfer1::DimensionOperation::kSUM, *inputs[0].d[3], *pw_sum);
|
||||
dims.d[3] = exprBuilder.operation(nvinfer1::DimensionOperation::kFLOOR_DIV, *sum1, *bh);
|
||||
auto sum1 = exprBuilder.operation(nvinfer1::DimensionOperation::kSUM, *inputs[0].d[INPUT_SIZE3], *pw_sum);
|
||||
dims.d[INPUT_SIZE3] = exprBuilder.operation(nvinfer1::DimensionOperation::kFLOOR_DIV, *sum1, *bh);
|
||||
return dims;
|
||||
}
|
||||
|
||||
void SpaceToBatchPlugin::serialize(void *buffer) const noexcept {
|
||||
SerializeValue(&buffer, &in_, sizeof(int));
|
||||
SerializeValue(&buffer, &ic_, sizeof(int));
|
||||
SerializeValue(&buffer, &ih_, sizeof(int));
|
||||
SerializeValue(&buffer, &iw_, sizeof(int));
|
||||
SerializeValue(&buffer, &bh_, sizeof(int));
|
||||
SerializeValue(&buffer, &ph0_, sizeof(int));
|
||||
SerializeValue(&buffer, &ph1_, sizeof(int));
|
||||
|
|
|
@ -40,13 +40,8 @@ class SpaceToBatchTensorRT : public TensorRTOp {
|
|||
constexpr auto SPACETOTENSORRT_PLUGIN_NAME{"SpaceToBatchPlugin"};
|
||||
class SpaceToBatchPlugin : public TensorRTPlugin {
|
||||
public:
|
||||
SpaceToBatchPlugin(const std::string name, int in, int ic, int ih, int iw, int bh, int ph0, int ph1, int pw0, int pw1,
|
||||
uint32_t device_id)
|
||||
SpaceToBatchPlugin(const std::string name, int bh, int ph0, int ph1, int pw0, int pw1, uint32_t device_id)
|
||||
: TensorRTPlugin(name, std::string(SPACETOTENSORRT_PLUGIN_NAME), device_id),
|
||||
in_(in),
|
||||
ic_(ic),
|
||||
ih_(ih),
|
||||
iw_(iw),
|
||||
bh_(bh),
|
||||
ph0_(ph0),
|
||||
ph1_(ph1),
|
||||
|
@ -56,23 +51,15 @@ class SpaceToBatchPlugin : public TensorRTPlugin {
|
|||
SpaceToBatchPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc)
|
||||
: TensorRTPlugin(std::string(name), std::string(SPACETOTENSORRT_PLUGIN_NAME)) {
|
||||
const nvinfer1::PluginField *fields = fc->fields;
|
||||
in_ = static_cast<const int *>(fields[0].data)[0];
|
||||
ic_ = static_cast<const int *>(fields[1].data)[0];
|
||||
ih_ = static_cast<const int *>(fields[2].data)[0];
|
||||
iw_ = static_cast<const int *>(fields[3].data)[0];
|
||||
bh_ = static_cast<const int *>(fields[4].data)[0];
|
||||
ph0_ = static_cast<const int *>(fields[5].data)[0];
|
||||
ph1_ = static_cast<const int *>(fields[6].data)[0];
|
||||
pw0_ = static_cast<const int *>(fields[7].data)[0];
|
||||
pw1_ = static_cast<const int *>(fields[8].data)[0];
|
||||
bh_ = static_cast<const int *>(fields[0].data)[0];
|
||||
ph0_ = static_cast<const int *>(fields[1].data)[0];
|
||||
ph1_ = static_cast<const int *>(fields[2].data)[0];
|
||||
pw0_ = static_cast<const int *>(fields[3].data)[0];
|
||||
pw1_ = static_cast<const int *>(fields[4].data)[0];
|
||||
}
|
||||
|
||||
SpaceToBatchPlugin(const char *name, const void *serialData, size_t serialLength)
|
||||
: TensorRTPlugin(std::string(name), std::string(SPACETOTENSORRT_PLUGIN_NAME)) {
|
||||
DeserializeValue(&serialData, &serialLength, &in_, sizeof(int));
|
||||
DeserializeValue(&serialData, &serialLength, &ic_, sizeof(int));
|
||||
DeserializeValue(&serialData, &serialLength, &ih_, sizeof(int));
|
||||
DeserializeValue(&serialData, &serialLength, &iw_, sizeof(int));
|
||||
DeserializeValue(&serialData, &serialLength, &bh_, sizeof(int));
|
||||
DeserializeValue(&serialData, &serialLength, &ph0_, sizeof(int));
|
||||
DeserializeValue(&serialData, &serialLength, &ph1_, sizeof(int));
|
||||
|
@ -93,15 +80,11 @@ class SpaceToBatchPlugin : public TensorRTPlugin {
|
|||
private:
|
||||
int RunCudaSpaceToBatch(const nvinfer1::PluginTensorDesc *inputDesc, const void *const *inputs, void *const *outputs,
|
||||
cudaStream_t stream);
|
||||
size_t in_;
|
||||
size_t ic_;
|
||||
size_t ih_;
|
||||
size_t iw_;
|
||||
size_t bh_;
|
||||
size_t ph0_;
|
||||
size_t ph1_;
|
||||
size_t pw0_;
|
||||
size_t pw1_;
|
||||
int bh_;
|
||||
int ph0_;
|
||||
int ph1_;
|
||||
int pw0_;
|
||||
int pw1_;
|
||||
const std::string layer_name_;
|
||||
std::string name_space_;
|
||||
};
|
||||
|
|
|
@ -24,19 +24,53 @@
|
|||
namespace mindspore::lite {
|
||||
int SplitTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
|
||||
const std::vector<TensorInfo> &out_tensors) {
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != 1 && in_tensors.size() != INPUT_SIZE2) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
dynamic_shape_params_.support_dynamic_ = false;
|
||||
dynamic_shape_params_.support_hw_dynamic_ = false;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *SplitTensorRT::GetDynamicSliceSize(TensorRTContext *ctx, nvinfer1::ITensor *input, size_t i) {
|
||||
auto in_tensor_shape = ctx->network()->addShape(*input)->getOutput(0);
|
||||
if (in_tensor_shape == nullptr) {
|
||||
MS_LOG(ERROR) << "add shape layer of input failed!";
|
||||
return nullptr;
|
||||
}
|
||||
auto len_tensor = ctx->ConvertTo1DTensor(static_cast<int>(size_splits_[i]));
|
||||
if (len_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "convert 1d tensor failed!";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *concat_input_tensors[INPUT_SIZE2];
|
||||
concat_input_tensors[0] = in_tensor_shape;
|
||||
concat_input_tensors[1] = len_tensor;
|
||||
auto concat_layer = ctx->network()->addConcatenation(concat_input_tensors, INPUT_SIZE2);
|
||||
if (concat_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add concat layer failed!";
|
||||
return nullptr;
|
||||
}
|
||||
concat_layer->setAxis(0);
|
||||
auto shape_and_len = concat_layer->getOutput(0);
|
||||
if (shape_and_len == nullptr) {
|
||||
MS_LOG(ERROR) << "get concat layer result failed!";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::vector<int> gather_slices(input->getDimensions().nbDims);
|
||||
std::iota(gather_slices.begin(), gather_slices.end(), 0);
|
||||
gather_slices[axis_] = gather_slices.size();
|
||||
auto gather_slices_tensor = ctx->ConvertTo1DTensor(gather_slices);
|
||||
nvinfer1::IGatherLayer *gather_layer = ctx->network()->addGather(*shape_and_len, *gather_slices_tensor, 0);
|
||||
if (gather_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add gather layer failed!";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return gather_layer->getOutput(0);
|
||||
}
|
||||
|
||||
int SplitTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
ITensorHelper split_input;
|
||||
int ret = PreprocessInputs2SameDim(ctx, input(ctx, 0), &split_input);
|
||||
|
@ -78,16 +112,24 @@ int SplitTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
|||
for (int i = 0; i != output_num_; ++i) {
|
||||
nvinfer1::Dims start_dims = lite::ConvertCudaDims(0, input_nbdims);
|
||||
start_dims.d[axis_] = axis_dim_index;
|
||||
nvinfer1::Dims size_dims{-1};
|
||||
nvinfer1::ITensor *size_tensor = nullptr;
|
||||
if (!IsDynamicInput(ctx, 0)) {
|
||||
size_dims = split_input.trt_tensor_->getDimensions();
|
||||
size_dims.d[axis_] = size_splits_[i];
|
||||
} else {
|
||||
size_tensor = GetDynamicSliceSize(ctx, split_input.trt_tensor_, i);
|
||||
}
|
||||
axis_dim_index += size_splits_[i];
|
||||
|
||||
nvinfer1::Dims size_dims = split_input.trt_tensor_->getDimensions();
|
||||
size_dims.d[axis_] = size_splits_[i];
|
||||
|
||||
slice_layer = ctx->network()->addSlice(*split_input.trt_tensor_, start_dims, size_dims, one_dims);
|
||||
if (slice_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add Slice op failed for TensorRT: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (size_tensor != nullptr) {
|
||||
slice_layer->setInput(INPUT_SIZE2, *size_tensor);
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *out_tensor = slice_layer->getOutput(0);
|
||||
if (type_ == ops::kNameUnstack) {
|
||||
|
|
|
@ -35,10 +35,11 @@ class SplitTensorRT : public TensorRTOp {
|
|||
const std::vector<TensorInfo> &out_tensors) override;
|
||||
|
||||
private:
|
||||
nvinfer1::ITensor *GetDynamicSliceSize(TensorRTContext *ctx, nvinfer1::ITensor *input, size_t i);
|
||||
int ParseParams(const ITensorHelper &helper);
|
||||
int64_t axis_;
|
||||
int64_t output_num_;
|
||||
std::vector<int64_t> size_splits_;
|
||||
int output_num_;
|
||||
std::vector<int> size_splits_;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_SPLIT_TENSORRT_H_
|
||||
|
|
|
@ -0,0 +1,187 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/extendrt/delegate/tensorrt/op/strideslice_tensorrt.h"
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
#include "src/extendrt/delegate/tensorrt/tensorrt_utils.h"
|
||||
#include "ops/strided_slice.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
nvinfer1::ITensor *StrideSliceTensorRT::GetDynamicSliceSize(TensorRTContext *ctx, nvinfer1::ITensor *input,
|
||||
int size_dim, int axis) {
|
||||
auto in_tensor_shape = ctx->network()->addShape(*input)->getOutput(0);
|
||||
if (in_tensor_shape == nullptr) {
|
||||
MS_LOG(ERROR) << "add shape layer of input failed!";
|
||||
return nullptr;
|
||||
}
|
||||
auto len_tensor = ctx->ConvertTo1DTensor(static_cast<int>(size_dim));
|
||||
if (len_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "convert 1d tensor failed!";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *concat_input_tensors[INPUT_SIZE2];
|
||||
concat_input_tensors[0] = in_tensor_shape;
|
||||
concat_input_tensors[1] = len_tensor;
|
||||
auto concat_layer = ctx->network()->addConcatenation(concat_input_tensors, INPUT_SIZE2);
|
||||
if (concat_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add concat layer failed!";
|
||||
return nullptr;
|
||||
}
|
||||
concat_layer->setAxis(0);
|
||||
auto shape_and_len = concat_layer->getOutput(0);
|
||||
if (shape_and_len == nullptr) {
|
||||
MS_LOG(ERROR) << "get concat layer result failed!";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::vector<int> gather_slices(input->getDimensions().nbDims);
|
||||
std::iota(gather_slices.begin(), gather_slices.end(), 0);
|
||||
gather_slices[axis] = gather_slices.size();
|
||||
auto gather_slices_tensor = ctx->ConvertTo1DTensor(gather_slices);
|
||||
nvinfer1::IGatherLayer *gather_layer = ctx->network()->addGather(*shape_and_len, *gather_slices_tensor, 0);
|
||||
if (gather_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add gather layer failed!";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return gather_layer->getOutput(0);
|
||||
}
|
||||
|
||||
int StrideSliceTensorRT::IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
|
||||
const std::vector<TensorInfo> &out_tensors) {
|
||||
if (in_tensors.size() < HAS_AXIS - 1) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (!in_tensors.at(BEGINS_INDEX).IsConst() || !in_tensors.at(ENDS_INDEX).IsConst()) {
|
||||
MS_LOG(ERROR) << "invalid input tensor for: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
nvinfer1::ILayer *StrideSliceTensorRT::MakeLayer(TensorRTContext *ctx, const ITensorHelper &slice_input) {
|
||||
const auto &begin = in_tensors_.at(BEGINS_INDEX);
|
||||
const auto &stride = in_tensors_.back();
|
||||
const auto &end = in_tensors_.at(ENDS_INDEX);
|
||||
|
||||
auto input_dims = slice_input.trt_tensor_->getDimensions();
|
||||
nvinfer1::Dims start_dims{input_dims};
|
||||
nvinfer1::Dims size_dims{input_dims};
|
||||
nvinfer1::Dims stride_dims{input_dims};
|
||||
|
||||
int64_t axis_index = in_tensors_.size() == HAS_AXIS ? AXIS_INDEX : -1;
|
||||
nvinfer1::ITensor *size_tensor = nullptr;
|
||||
if (begin.ElementNum() == input_dims.nbDims) {
|
||||
start_dims = lite::ConvertCudaDims(begin);
|
||||
auto end_dims = lite::ConvertCudaDims(end);
|
||||
for (int i = 0; i < size_dims.nbDims; i++) {
|
||||
size_dims.d[i] = end_dims.d[i] - start_dims.d[i];
|
||||
}
|
||||
stride_dims = lite::ConvertCudaDims(stride);
|
||||
} else {
|
||||
if (begin.ElementNum() != 1 || end.ElementNum() != 1 || stride.ElementNum() != 1) {
|
||||
MS_LOG(ERROR)
|
||||
<< "Only support element number of begin, end and stride to be 1 when this number < input dims number, op: "
|
||||
<< op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
int axis_value = axis_index == -1 ? 0 : *(static_cast<const int *>(in_tensors_.at(axis_index).Data()));
|
||||
int start_value = *(static_cast<const int *>(begin.Data()));
|
||||
int end_value = *(static_cast<const int *>(end.Data()));
|
||||
int stride_value = *(static_cast<const int *>(stride.Data()));
|
||||
std::fill(start_dims.d, start_dims.d + start_dims.nbDims, 0);
|
||||
std::fill(stride_dims.d, stride_dims.d + stride_dims.nbDims, 1);
|
||||
size_dims = slice_input.trt_tensor_->getDimensions();
|
||||
if (start_value < 0) {
|
||||
start_value = input_dims.d[axis_value] + start_value;
|
||||
}
|
||||
for (int i = 0; i < start_dims.nbDims; i++) {
|
||||
if (i == axis_value) {
|
||||
start_dims.d[i] = start_value;
|
||||
stride_dims.d[i] = stride_value;
|
||||
if (end_value >= 0) {
|
||||
size_dims.d[i] = std::min(end_value, input_dims.d[i]) - start_dims.d[i];
|
||||
} else if (end_value >= -input_dims.d[i]) {
|
||||
size_dims.d[i] = end_value + input_dims.d[i] - start_dims.d[i];
|
||||
} else {
|
||||
size_dims.d[i] = input_dims.d[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
if (IsDynamicInput(ctx, 0)) {
|
||||
size_tensor = GetDynamicSliceSize(ctx, slice_input.trt_tensor_, size_dims.d[axis_value], axis_value);
|
||||
size_dims = nvinfer1::Dims{-1};
|
||||
}
|
||||
}
|
||||
nvinfer1::ISliceLayer *slice_layer =
|
||||
ctx->network()->addSlice(*slice_input.trt_tensor_, start_dims, size_dims, stride_dims);
|
||||
if (slice_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add Slice op failed for TensorRT: " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
if (size_tensor != nullptr) {
|
||||
slice_layer->setInput(INPUT_SIZE2, *size_tensor);
|
||||
}
|
||||
return slice_layer;
|
||||
}
|
||||
|
||||
int StrideSliceTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
ITensorHelper slice_input;
|
||||
int ret = PreprocessInputs2SameDim(ctx, input(ctx, 0), &slice_input);
|
||||
if (ret != RET_OK || slice_input.trt_tensor_ == nullptr) {
|
||||
MS_LOG(ERROR) << "PreprocessInputs2SameDim input tensor failed for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
auto slice_layer = MakeLayer(ctx, slice_input);
|
||||
if (slice_layer == nullptr) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
this->layer_ = slice_layer;
|
||||
slice_layer->setName(op_name_.c_str());
|
||||
nvinfer1::ITensor *out_tensor = slice_layer->getOutput(0);
|
||||
auto shape = ConvertMSShape(out_tensor->getDimensions());
|
||||
bool rank_0 = false;
|
||||
|
||||
shrink_axis_ = AsOps<ops::StridedSlice>()->get_shrink_axis_mask();
|
||||
if (shrink_axis_ != 0) {
|
||||
for (int i = shape.size() - 1; i >= 0; --i) {
|
||||
int mask = 1 << i;
|
||||
if ((shrink_axis_ & mask) != 0) {
|
||||
shape.erase(shape.begin() + i);
|
||||
}
|
||||
}
|
||||
if (!shape.empty()) {
|
||||
out_tensor = Reshape(ctx, out_tensor, shape);
|
||||
} else {
|
||||
rank_0 = true;
|
||||
}
|
||||
}
|
||||
auto helper = ITensorHelper{out_tensor, slice_input.format_, slice_input.same_format_, !rank_0};
|
||||
ctx->RegisterTensor(helper, out_tensors_[0].Name());
|
||||
MS_LOG(DEBUG) << "slice output : " << GetTensorFormat(helper);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
REGISTER_TENSORRT_CREATOR(ops::kNameStridedSlice, StrideSliceTensorRT)
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,48 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_STRIDE_SLICE_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_STRIDE_SLICE_TENSORRT_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <tuple>
|
||||
#include <memory>
|
||||
#include "src/extendrt/delegate/tensorrt/op/tensorrt_op.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
constexpr int BEGINS_INDEX = 1;
|
||||
constexpr int ENDS_INDEX = 2;
|
||||
constexpr int HAS_AXIS = 5;
|
||||
constexpr int AXIS_INDEX = 3;
|
||||
class StrideSliceTensorRT : public TensorRTOp {
|
||||
public:
|
||||
StrideSliceTensorRT(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
|
||||
const std::vector<TensorInfo> &out_tensors, std::string name)
|
||||
: TensorRTOp(base_operator, in_tensors, out_tensors, name) {}
|
||||
|
||||
~StrideSliceTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(TensorRTContext *ctx) override;
|
||||
|
||||
int IsSupport(const BaseOperatorPtr &base_operator, const std::vector<TensorInfo> &in_tensors,
|
||||
const std::vector<TensorInfo> &out_tensors) override;
|
||||
|
||||
private:
|
||||
nvinfer1::ITensor *GetDynamicSliceSize(TensorRTContext *ctx, nvinfer1::ITensor *input, int, int);
|
||||
nvinfer1::ILayer *MakeLayer(TensorRTContext *ctx, const ITensorHelper &slice_input);
|
||||
size_t shrink_axis_;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_STRIDE_SLICE_TENSORRT_H_
|
|
@ -87,6 +87,11 @@ int TensorRTOp::ReadyInputsNumber(TensorRTContext *ctx) const {
|
|||
|
||||
bool TensorRTOp::IsShapeKnown() { return true; }
|
||||
|
||||
bool TensorRTOp::IsDynamicInput(TensorRTContext *ctx, size_t k) {
|
||||
nvinfer1::Dims dims = input(ctx, k).trt_tensor_->getDimensions();
|
||||
return std::any_of(dims.d, dims.d + dims.nbDims, [](int d) { return d == -1; });
|
||||
}
|
||||
|
||||
int TensorRTOp::Prepare(void **network_tensor_bindings, nvinfer1::ICudaEngine *engine) {
|
||||
if (op_binding_tensor_.size() != 0) {
|
||||
MS_LOG(ERROR) << "need special op Prepare for " << op_name_;
|
||||
|
|
|
@ -40,6 +40,7 @@ namespace mindspore::lite {
|
|||
constexpr int INPUT_SIZE2 = 2;
|
||||
constexpr int INPUT_SIZE3 = 3;
|
||||
constexpr int INPUT_SIZE4 = 4;
|
||||
constexpr int INPUT_SIZE5 = 5;
|
||||
|
||||
struct BindingHelper {
|
||||
std::string name_;
|
||||
|
@ -115,6 +116,7 @@ class TensorRTOp {
|
|||
nvinfer1::ILayer *layer() { return layer_; }
|
||||
|
||||
bool GetSupportInputBool();
|
||||
bool IsDynamicInput(TensorRTContext *ctx, size_t k);
|
||||
|
||||
void SetSupportInputBool(bool support_input_bool);
|
||||
template <class OpsT>
|
||||
|
|
|
@ -87,12 +87,12 @@ class TensorRTPluginCreater : public nvinfer1::IPluginCreator {
|
|||
|
||||
const char *getPluginNamespace() const noexcept override { return name_space_.c_str(); }
|
||||
|
||||
nvinfer1::IPluginV2 *createPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc) noexcept {
|
||||
nvinfer1::IPluginV2 *createPlugin(const char *name, const nvinfer1::PluginFieldCollection *fc) noexcept override {
|
||||
return new (std::nothrow) T(name, fc);
|
||||
}
|
||||
|
||||
nvinfer1::IPluginV2 *deserializePlugin(const char *name, const void *serialData, size_t serialLength) noexcept {
|
||||
return new (std::nothrow) T(name, serialData, serialLength);
|
||||
nvinfer1::IPluginV2 *deserializePlugin(const char *name, const void *data, size_t len) noexcept override {
|
||||
return new (std::nothrow) T(name, data, len);
|
||||
}
|
||||
|
||||
protected:
|
||||
|
|
|
@ -21,6 +21,7 @@
|
|||
#include <functional>
|
||||
#include "include/api/kernel.h"
|
||||
#include "src/common/utils.h"
|
||||
#include "src/extendrt/utils/tensor_default_impl.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
class TensorInfoImpl {
|
||||
|
@ -29,46 +30,24 @@ class TensorInfoImpl {
|
|||
TensorInfoImpl(const std::string &name, mindspore::DataType type, const std::vector<int64_t> &shape,
|
||||
mindspore::Format format, const void *data, size_t data_len,
|
||||
const mindspore::tensor::TensorPtr &tensor_val)
|
||||
: name_(name),
|
||||
dType_(type),
|
||||
shape_(shape),
|
||||
format_(format),
|
||||
data_(data),
|
||||
data_len_(data_len),
|
||||
tensor_val_(tensor_val) {
|
||||
is_const_ = (data_ != nullptr);
|
||||
if (data_ == nullptr || data_len_ == 0) {
|
||||
auto ele_num = ElementNum();
|
||||
auto type_size = item_size();
|
||||
temp_data_.resize(ele_num * type_size);
|
||||
data_ = temp_data_.data();
|
||||
data_len_ = temp_data_.size();
|
||||
}
|
||||
}
|
||||
void SetShape(const std::vector<int64_t> &shape) {
|
||||
shape_ = shape;
|
||||
auto new_elem_num = ElementNum();
|
||||
auto type_size = item_size();
|
||||
auto data_size = new_elem_num * type_size;
|
||||
if (data_size != temp_data_.size() && data_ == temp_data_.data()) {
|
||||
temp_data_.resize(data_size);
|
||||
data_ = temp_data_.data();
|
||||
data_len_ = data_size;
|
||||
}
|
||||
: tensor_impl_(name, type, shape), tensor_val_(tensor_val) {
|
||||
tensor_impl_.SetFormat(format);
|
||||
auto is_const = (data != nullptr);
|
||||
tensor_impl_.SetIsConst(is_const);
|
||||
SetData(data, data_len);
|
||||
}
|
||||
|
||||
int64_t ElementNum() const { return std::accumulate(shape_.begin(), shape_.end(), 1, std::multiplies<int64_t>()); }
|
||||
|
||||
size_t item_size() const { return DataTypeSize(static_cast<enum TypeId>(dType_)); }
|
||||
|
||||
std::string name_;
|
||||
mindspore::DataType dType_ = mindspore::DataType::kTypeUnknown;
|
||||
std::vector<int64_t> shape_;
|
||||
mindspore::Format format_ = DEFAULT_FORMAT;
|
||||
const void *data_ = nullptr;
|
||||
size_t data_len_ = 0;
|
||||
bool is_const_ = false;
|
||||
std::vector<uint8_t> temp_data_;
|
||||
size_t item_size() const { return DataTypeSize(static_cast<enum TypeId>(tensor_impl_.DataType())); }
|
||||
void SetData(const void *data, size_t data_len) {
|
||||
if (data != nullptr && data_len != 0) {
|
||||
if (tensor_impl_.DataSize() != data_len) {
|
||||
MS_LOG_WARNING << "Tensor expect data size " << tensor_impl_.DataSize() << " != data len " << data_len
|
||||
<< ", shape: " << tensor_impl_.Shape() << ", dtype: " << tensor_impl_.DataType();
|
||||
}
|
||||
tensor_impl_.SetData(const_cast<void *>(data), false);
|
||||
}
|
||||
}
|
||||
TensorDefaultImpl tensor_impl_;
|
||||
mindspore::tensor::TensorPtr tensor_val_ = nullptr;
|
||||
};
|
||||
|
||||
|
@ -82,21 +61,21 @@ std::string TensorInfo::Name() const {
|
|||
if (impl_ == nullptr) {
|
||||
return "";
|
||||
}
|
||||
return impl_->name_;
|
||||
return impl_->tensor_impl_.Name();
|
||||
}
|
||||
|
||||
mindspore::DataType TensorInfo::DataType() const {
|
||||
if (impl_ == nullptr) {
|
||||
return mindspore::DataType::kTypeUnknown;
|
||||
}
|
||||
return impl_->dType_;
|
||||
return impl_->tensor_impl_.DataType();
|
||||
}
|
||||
|
||||
mindspore::Format TensorInfo::format() const {
|
||||
if (impl_ == nullptr) {
|
||||
return DEFAULT_FORMAT;
|
||||
}
|
||||
return impl_->format_;
|
||||
return impl_->tensor_impl_.Format();
|
||||
}
|
||||
|
||||
const std::vector<int64_t> &TensorInfo::Shape() const {
|
||||
|
@ -104,21 +83,21 @@ const std::vector<int64_t> &TensorInfo::Shape() const {
|
|||
if (impl_ == nullptr) {
|
||||
return empty_shape;
|
||||
}
|
||||
return impl_->shape_;
|
||||
return impl_->tensor_impl_.Shape();
|
||||
}
|
||||
|
||||
const void *TensorInfo::Data() const {
|
||||
if (impl_ == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
return impl_->data_;
|
||||
return impl_->tensor_impl_.Data().get();
|
||||
}
|
||||
|
||||
void *TensorInfo::MutableData() {
|
||||
if (impl_ == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
return const_cast<void *>(impl_->data_);
|
||||
return const_cast<void *>(impl_->tensor_impl_.MutableData());
|
||||
}
|
||||
|
||||
size_t TensorInfo::DataSize() const {
|
||||
|
@ -132,7 +111,7 @@ bool TensorInfo::IsConst() const {
|
|||
if (impl_ == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
return impl_->is_const_ && impl_->data_ != nullptr;
|
||||
return impl_->tensor_impl_.IsConst();
|
||||
}
|
||||
|
||||
size_t TensorInfo::item_size() const {
|
||||
|
@ -146,22 +125,21 @@ void TensorInfo::SetShape(const std::vector<int64_t> &shape) {
|
|||
if (impl_ == nullptr) {
|
||||
return;
|
||||
}
|
||||
impl_->SetShape(shape);
|
||||
impl_->tensor_impl_.SetShape(shape);
|
||||
}
|
||||
|
||||
void TensorInfo::SetData(const void *data, size_t data_len) {
|
||||
if (impl_ == nullptr) {
|
||||
return;
|
||||
}
|
||||
impl_->data_ = data;
|
||||
impl_->data_len_ = data_len;
|
||||
impl_->SetData(data, data_len);
|
||||
}
|
||||
|
||||
int64_t TensorInfo::ElementNum() const {
|
||||
if (impl_ == nullptr) {
|
||||
return 0;
|
||||
}
|
||||
return impl_->ElementNum();
|
||||
return impl_->tensor_impl_.ElementNum();
|
||||
}
|
||||
|
||||
TensorInfo &TensorInfo::operator=(const TensorInfo &other) {
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
*/
|
||||
|
||||
#include "src/extendrt/delegate/tensorrt/tensorrt_context.h"
|
||||
#include "src/extendrt/delegate/tensorrt/tensorrt_utils.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
TensorRTContext::~TensorRTContext() {
|
||||
|
@ -22,6 +23,9 @@ TensorRTContext::~TensorRTContext() {
|
|||
network_->destroy();
|
||||
network_ = nullptr;
|
||||
}
|
||||
for (auto ptr : owner_memorys_) {
|
||||
free(ptr);
|
||||
}
|
||||
}
|
||||
|
||||
bool TensorRTContext::Init() {
|
||||
|
@ -72,4 +76,31 @@ ITensorHelper TensorRTContext::MsName2Tensor(const std::string &ms_name) {
|
|||
MS_LOG(WARNING) << "Get Tensorrt tensor by ms_tensor: " << ms_name << " fail!";
|
||||
return {};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
nvinfer1::ITensor *TensorRTContext::ConvertTo1DTensor(T value) {
|
||||
return ConvertTo1DTensor(std::vector<T>{value});
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
nvinfer1::ITensor *TensorRTContext::ConvertTo1DTensor(const std::vector<T> &values) {
|
||||
void *ptr = malloc(values.size() * sizeof(T));
|
||||
const T *begin = &values[0];
|
||||
memcpy(ptr, reinterpret_cast<const void *>(begin), values.size() * sizeof(T));
|
||||
owner_memorys_.push_back(ptr);
|
||||
|
||||
nvinfer1::Weights weights{GetNvinferDataType<T>(), ptr, static_cast<int64_t>(values.size())};
|
||||
nvinfer1::Dims dims{1, {static_cast<int>(values.size())}};
|
||||
nvinfer1::IConstantLayer *constant_tensor = network()->addConstant(dims, weights);
|
||||
if (constant_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "create constant_tensor failed.";
|
||||
return nullptr;
|
||||
}
|
||||
return constant_tensor->getOutput(0);
|
||||
}
|
||||
|
||||
template nvinfer1::ITensor *TensorRTContext::ConvertTo1DTensor(int value);
|
||||
template nvinfer1::ITensor *TensorRTContext::ConvertTo1DTensor(float value);
|
||||
template nvinfer1::ITensor *TensorRTContext::ConvertTo1DTensor(const std::vector<int> &values);
|
||||
template nvinfer1::ITensor *TensorRTContext::ConvertTo1DTensor(const std::vector<float> &values);
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include <NvInfer.h>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include "src/extendrt/delegate/tensorrt/tensorrt_runtime.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
|
@ -42,11 +43,17 @@ class TensorRTContext {
|
|||
bool HasTensor(const std::string &name) const;
|
||||
ITensorHelper MsName2Tensor(const std::string &ms_name);
|
||||
|
||||
template <typename T>
|
||||
nvinfer1::ITensor *ConvertTo1DTensor(T value);
|
||||
template <typename T>
|
||||
nvinfer1::ITensor *ConvertTo1DTensor(const std::vector<T> &values);
|
||||
|
||||
private:
|
||||
int counter_{0};
|
||||
nvinfer1::INetworkDefinition *network_{nullptr};
|
||||
std::unordered_map<std::string, ITensorHelper> ms_name2trt_tensor_;
|
||||
TensorRTRuntime *runtime_{nullptr};
|
||||
std::vector<void *> owner_memorys_;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_TENSORRT_CONTEXT_H_
|
||||
|
|
|
@ -22,43 +22,18 @@
|
|||
#include <map>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include "ccsrc/kernel/kernel.h"
|
||||
#include <algorithm>
|
||||
#include "src/extendrt/delegate/delegate_utils.h"
|
||||
#include "ccsrc/kernel/common_utils.h"
|
||||
#include "ccsrc/backend/common/optimizer/helper.h"
|
||||
#include "ccsrc/include/common/utils/convert_utils.h"
|
||||
#include "src/extendrt/utils/kernel_graph_utils.h"
|
||||
#include "common/config_infos.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
namespace {
|
||||
const char tensorrt_provider[] = "tensorrt";
|
||||
|
||||
std::vector<std::string> Split(const std::string &str, const std::string &delim) {
|
||||
auto start = 0U;
|
||||
auto end = str.find(delim);
|
||||
std::vector<std::string> substrs;
|
||||
while (end != std::string::npos) {
|
||||
substrs.push_back(str.substr(start, end - start));
|
||||
start = end + delim.length();
|
||||
end = str.find(delim, start);
|
||||
}
|
||||
substrs.push_back(str.substr(start, end));
|
||||
return substrs;
|
||||
}
|
||||
std::vector<nvinfer1::Dims> StringToDims(const std::string &str) {
|
||||
std::vector<nvinfer1::Dims> all_dims;
|
||||
std::vector<std::string> all_str_dims = Split(str, ";");
|
||||
for (const std::string &str_dims : all_str_dims) {
|
||||
std::vector<std::string> str_dims_vec = Split(str_dims, ",");
|
||||
nvinfer1::Dims dims;
|
||||
dims.nbDims = str_dims_vec.size();
|
||||
for (size_t i = 0; i != str_dims_vec.size(); ++i) {
|
||||
dims.d[i] = std::stoi(str_dims_vec[i]);
|
||||
}
|
||||
all_dims.push_back(dims);
|
||||
}
|
||||
return all_dims;
|
||||
}
|
||||
|
||||
struct NodeWithOutputIndex {
|
||||
NodeWithOutputIndex() = default;
|
||||
NodeWithOutputIndex(session::KernelWithIndex kernel_index, TensorInfo tensor_info)
|
||||
|
@ -308,7 +283,7 @@ Status GetModelOutputsInfo(KernelGraphPtr kernel_graph, std::vector<NodeWithOutp
|
|||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
MS_EXCEPTION_IF_NULL(tensor_info_list_ptr);
|
||||
auto &tensor_info_list = *tensor_info_list_ptr;
|
||||
auto outputs = kernel_graph->outputs();
|
||||
auto outputs = KernelGraphUtils::GetKernelGraphOutputs(kernel_graph);
|
||||
// find parameters of graph inputs
|
||||
for (size_t i = 0; i < outputs.size(); ++i) {
|
||||
auto output = outputs[i];
|
||||
|
@ -335,31 +310,8 @@ Status GetModelOutputsInfo(KernelGraphPtr kernel_graph, std::vector<NodeWithOutp
|
|||
return kSuccess;
|
||||
}
|
||||
} // namespace
|
||||
TensorRTExecutor::TensorRTExecutor(const std::shared_ptr<mindspore::Context> &context,
|
||||
const std::string &cache_model_path, size_t vocab_size, size_t device_cache_size,
|
||||
const std::map<std::string, std::string> &ms_cache)
|
||||
: context_(context),
|
||||
cache_model_path_(cache_model_path),
|
||||
vocab_size_(vocab_size),
|
||||
device_cache_size_(device_cache_size) {
|
||||
if (ms_cache.find("serialize_path") != ms_cache.end()) {
|
||||
serialize_path_ = ms_cache.at("serialize_path");
|
||||
}
|
||||
if (ms_cache.find("min_dims") != ms_cache.end()) {
|
||||
min_dims_ = StringToDims(ms_cache.at("min_dims"));
|
||||
}
|
||||
if (ms_cache.find("opt_dims") != ms_cache.end()) {
|
||||
opt_dims_ = StringToDims(ms_cache.at("opt_dims"));
|
||||
}
|
||||
if (ms_cache.find("max_dims") != ms_cache.end()) {
|
||||
max_dims_ = StringToDims(ms_cache.at("max_dims"));
|
||||
}
|
||||
if (min_dims_.size() != opt_dims_.size() || min_dims_.size() != max_dims_.size()) {
|
||||
MS_LOG(WARNING) << "number of min_dims, opt_dims, max_dims are not equal";
|
||||
}
|
||||
}
|
||||
|
||||
TensorRTExecutor::TensorRTExecutor(const std::shared_ptr<mindspore::Context> &context) : context_(context) {}
|
||||
TensorRTExecutor::TensorRTExecutor(const std::shared_ptr<mindspore::Context> &context, const ConfigInfos &config_infos)
|
||||
: context_(context), config_infos_(config_infos) {}
|
||||
|
||||
TensorRTExecutor::~TensorRTExecutor() {
|
||||
if (runtime_ != nullptr) {
|
||||
|
@ -387,12 +339,13 @@ bool IsHardwareSupport() {
|
|||
return true;
|
||||
}
|
||||
|
||||
Status TensorRTExecutor::Init() {
|
||||
bool TensorRTExecutor::Init() {
|
||||
if (!IsHardwareSupport()) {
|
||||
return mindspore::kLiteNotSupport;
|
||||
return false;
|
||||
}
|
||||
if (context_ == nullptr) {
|
||||
MS_LOG_ERROR << "Context cannot be nullptr";
|
||||
return false;
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<DeviceInfoContext>> device_list = context_->MutableDeviceInfo();
|
||||
|
@ -401,50 +354,71 @@ Status TensorRTExecutor::Init() {
|
|||
});
|
||||
if (iter == device_list.end()) {
|
||||
MS_LOG(ERROR) << "no gpu device info found for TensorRT.";
|
||||
return mindspore::kLiteError;
|
||||
return false;
|
||||
}
|
||||
auto gpu_info = (*iter)->Cast<GPUDeviceInfo>();
|
||||
if (gpu_info == nullptr) {
|
||||
MS_LOG(ERROR) << "no gpu device info found for TensorRT.";
|
||||
return mindspore::kLiteError;
|
||||
return false;
|
||||
}
|
||||
device_info_ = gpu_info;
|
||||
int ret = lite::SetCudaDevice(device_info_);
|
||||
if (ret != RET_OK) {
|
||||
return mindspore::kLiteError;
|
||||
return false;
|
||||
}
|
||||
if (runtime_ == nullptr) {
|
||||
runtime_ = new (std::nothrow) TensorRTRuntime();
|
||||
if (runtime_ == nullptr) {
|
||||
MS_LOG(ERROR) << "create TensorRTRuntime failed.";
|
||||
return mindspore::kLiteError;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (runtime_->Init() != RET_OK) {
|
||||
MS_LOG(ERROR) << "TensorRTRuntime init failed.";
|
||||
return mindspore::kLiteError;
|
||||
return false;
|
||||
}
|
||||
runtime_->SetDeviceID(device_info_->GetDeviceID());
|
||||
|
||||
auto cuda_ret = cudaStreamCreate(&stream_);
|
||||
if (cuda_ret != cudaSuccess) {
|
||||
MS_LOG(ERROR) << "Cuda create stream failed";
|
||||
return mindspore::kLiteError;
|
||||
return false;
|
||||
}
|
||||
|
||||
auto cublas_ret = cublasCreate(&cublas_handle_);
|
||||
if (cublas_ret != CUBLAS_STATUS_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Cuda create cublas handle failed";
|
||||
return mindspore::kLiteError;
|
||||
return false;
|
||||
}
|
||||
|
||||
auto cublaslt_ret = cublasLtCreate(&cublaslt_handle_);
|
||||
if (cublaslt_ret != CUBLAS_STATUS_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Cuda create cublaslt handle failed";
|
||||
return mindspore::kLiteError;
|
||||
return false;
|
||||
}
|
||||
|
||||
return mindspore::kSuccess;
|
||||
ret = ParseOptimizationProfile();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Parse input ranges failed.";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int TensorRTExecutor::ParseOptimizationProfile() {
|
||||
auto gpu_context_it = config_infos_.find(kGPUContext);
|
||||
if (gpu_context_it == config_infos_.end()) {
|
||||
MS_LOG(INFO) << "do not have input ranges config.";
|
||||
return RET_OK;
|
||||
}
|
||||
auto &gpu_context = gpu_context_it->second;
|
||||
ProfileConfigs profile_configs;
|
||||
if (!ProfileParser::Parse(gpu_context, true, &profile_configs)) {
|
||||
MS_LOG_WARNING << "Failed to parse profile info from '" << kGPUContext << "'";
|
||||
return RET_FAILED;
|
||||
}
|
||||
trt_profile_configs_ = profile_configs;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
Status TensorRTExecutor::BuildSubGraph(const KernelGraphPtr &kernel_graph) {
|
||||
|
@ -490,57 +464,10 @@ Status TensorRTExecutor::BuildSubGraph(const KernelGraphPtr &kernel_graph) {
|
|||
if (status != kSuccess) {
|
||||
return status;
|
||||
}
|
||||
tensorrt_graph_list_.push_back(TrtGraphContext{tensorrt_ops, inputs_, outputs_, nullptr});
|
||||
status = UpdateTrtSubGraphInputsDepend();
|
||||
if (status != kSuccess) {
|
||||
return status;
|
||||
}
|
||||
for (size_t i = 0; i < tensorrt_graph_list_.size(); i++) {
|
||||
auto &sub_graph = tensorrt_graph_list_[i];
|
||||
sub_graph.sub_graph = CreateTensorRTGraph(sub_graph.tensorrt_ops, kernel_graph, tensorrt_subgraph_index,
|
||||
sub_graph.inputs, sub_graph.outputs);
|
||||
if (!sub_graph.sub_graph) {
|
||||
MS_LOG(ERROR) << "Create tensorrt graph failed";
|
||||
return mindspore::kLiteError;
|
||||
}
|
||||
}
|
||||
return mindspore::kSuccess;
|
||||
}
|
||||
|
||||
Status TensorRTExecutor::UpdateTrtSubGraphInputsDepend() {
|
||||
std::set<TensorInfo> val_set;
|
||||
for (auto &item : inputs_) {
|
||||
val_set.emplace(item);
|
||||
}
|
||||
std::vector<std::set<TensorInfo>> sub_graphs_outputs;
|
||||
for (size_t i = 0; i < tensorrt_graph_list_.size(); i++) {
|
||||
auto &sub_graph = tensorrt_graph_list_[i];
|
||||
std::set<TensorInfo> sub_graph_tensor_infos;
|
||||
for (auto &op : sub_graph.tensorrt_ops) {
|
||||
for (auto &item : op->outputs()) {
|
||||
sub_graph_tensor_infos.emplace(item);
|
||||
}
|
||||
}
|
||||
sub_graphs_outputs.emplace_back(std::move(sub_graph_tensor_infos));
|
||||
|
||||
for (auto &input : sub_graph.inputs) {
|
||||
if (!val_set.count(input)) {
|
||||
size_t k = 0;
|
||||
for (; k < i; k++) {
|
||||
if (sub_graphs_outputs[k].count(input)) {
|
||||
tensorrt_graph_list_[k].outputs.push_back(input);
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (k >= i) {
|
||||
MS_LOG(ERROR) << "Cannot found input " << input.Name();
|
||||
return mindspore::kLiteError;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto &item : sub_graph.outputs) {
|
||||
val_set.emplace(item);
|
||||
}
|
||||
tensorrt_graph_ = CreateTensorRTGraph(tensorrt_ops, kernel_graph, tensorrt_subgraph_index, inputs_, outputs_);
|
||||
if (!tensorrt_graph_) {
|
||||
MS_LOG(ERROR) << "Create tensorrt graph failed";
|
||||
return mindspore::kLiteError;
|
||||
}
|
||||
return mindspore::kSuccess;
|
||||
}
|
||||
|
@ -583,9 +510,18 @@ std::shared_ptr<TensorRTSubGraph> TensorRTExecutor::CreateTensorRTGraph(const st
|
|||
const KernelGraphPtr &graph, int index,
|
||||
const std::vector<TensorInfo> &inputs,
|
||||
const std::vector<TensorInfo> &outputs) {
|
||||
auto tensorrt_graph =
|
||||
std::make_shared<TensorRTSubGraph>(ops, inputs, outputs, context_.get(), device_info_, runtime_, support_resize_,
|
||||
support_hw_resize_, min_dims_, opt_dims_, max_dims_);
|
||||
if (!trt_profile_configs_.input_infos.empty()) {
|
||||
std::vector<std::string> input_names;
|
||||
std::transform(inputs.begin(), inputs.end(), std::back_inserter(input_names),
|
||||
[](auto &item) { return item.Name(); });
|
||||
if (!ProfileParser::ReorderByInputNames(input_names, &trt_profile_configs_)) {
|
||||
MS_LOG_ERROR << "Reorder profile by input names failed, input names: " << input_names;
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
auto tensorrt_graph = std::make_shared<TensorRTSubGraph>(ops, inputs, outputs, context_.get(), device_info_, runtime_,
|
||||
support_resize_, support_hw_resize_, trt_profile_configs_);
|
||||
if (tensorrt_graph == nullptr) {
|
||||
MS_LOG(ERROR) << "new tensorrt_graph failed.";
|
||||
return nullptr;
|
||||
|
@ -635,10 +571,6 @@ bool TensorRTExecutor::CompileGraph(const FuncGraphPtr &graph, const std::map<st
|
|||
|
||||
bool TensorRTExecutor::RunGraph(const FuncGraphPtr &graph, const std::vector<tensor::Tensor> &inputs,
|
||||
std::vector<tensor::Tensor> *outputs, const std::map<string, string> &compile_options) {
|
||||
if (tensorrt_graph_list_.empty()) {
|
||||
MS_LOG(ERROR) << "TensorRTGraph is nullptr.";
|
||||
return false;
|
||||
}
|
||||
if (inputs.size() != inputs_.size()) {
|
||||
MS_LOG(ERROR) << "Graph inputs size " << inputs_.size() << " != execute outputs size " << inputs.size();
|
||||
return false;
|
||||
|
@ -647,70 +579,20 @@ bool TensorRTExecutor::RunGraph(const FuncGraphPtr &graph, const std::vector<ten
|
|||
MS_LOG(ERROR) << "Graph outputs size " << inputs_.size() << " != expected outputs size " << outputs->size();
|
||||
return false;
|
||||
}
|
||||
if (tensorrt_graph_list_.size() == 1) {
|
||||
auto &sub_graph = tensorrt_graph_list_[0];
|
||||
if (sub_graph.sub_graph == nullptr) {
|
||||
MS_LOG(ERROR) << "TensorRT subgraph is nullptr.";
|
||||
return false;
|
||||
}
|
||||
return sub_graph.sub_graph->Execute(inputs, outputs) == RET_OK;
|
||||
if (tensorrt_graph_ == nullptr) {
|
||||
MS_LOG(ERROR) << "TensorRT subgraph is nullptr.";
|
||||
return false;
|
||||
}
|
||||
std::map<TensorInfo, std::shared_ptr<tensor::Tensor>> tensor_val_map;
|
||||
for (size_t i = 0; i < inputs.size(); i++) {
|
||||
tensor_val_map[inputs_[i]] = std::make_shared<tensor::Tensor>(inputs[i]);
|
||||
}
|
||||
for (auto &sub_graph : tensorrt_graph_list_) {
|
||||
std::vector<tensor::Tensor> sub_inputs;
|
||||
std::vector<tensor::Tensor> sub_outputs;
|
||||
for (auto &item : sub_graph.inputs) {
|
||||
auto it = tensor_val_map.find(item);
|
||||
if (it == tensor_val_map.end()) {
|
||||
MS_LOG(ERROR) << "Cannot find input tensor " << item.Name() << " in tensor val map";
|
||||
return false;
|
||||
}
|
||||
sub_inputs.push_back(*it->second);
|
||||
}
|
||||
if (sub_graph.sub_graph == nullptr) {
|
||||
MS_LOG(ERROR) << "TensorRT subgraph is nullptr.";
|
||||
return false;
|
||||
}
|
||||
auto ret = sub_graph.sub_graph->Execute(sub_inputs, &sub_outputs);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Failed to execute graph.";
|
||||
return false;
|
||||
}
|
||||
if (sub_graph.outputs.size() != sub_outputs.size()) {
|
||||
MS_LOG(ERROR) << "Subgraph outputs size " << sub_graph.outputs.size() << " != result outputs size "
|
||||
<< sub_outputs.size();
|
||||
return false;
|
||||
}
|
||||
for (size_t i = 0; i < sub_graph.outputs.size(); i++) {
|
||||
tensor_val_map[sub_graph.outputs[i]] = std::make_shared<tensor::Tensor>(sub_outputs[i]);
|
||||
}
|
||||
}
|
||||
outputs->clear();
|
||||
for (auto &item : outputs_) {
|
||||
auto it = tensor_val_map.find(item);
|
||||
if (it == tensor_val_map.end()) {
|
||||
MS_LOG(ERROR) << "Cannot find input tensor " << item.Name() << " in tensor val map";
|
||||
return false;
|
||||
}
|
||||
outputs->push_back(*it->second);
|
||||
}
|
||||
return true;
|
||||
return tensorrt_graph_->Execute(inputs, outputs) == RET_OK;
|
||||
}
|
||||
|
||||
bool TensorRTExecutor::Resize(const FuncGraphPtr &, const std::vector<tensor::Tensor> &inputs,
|
||||
const std::vector<std::vector<int64_t>> &new_shapes) {
|
||||
if (tensorrt_graph_list_.size() == 1) {
|
||||
auto &sub_graph = tensorrt_graph_list_[0];
|
||||
if (sub_graph.sub_graph == nullptr) {
|
||||
MS_LOG(ERROR) << "TensorRT subgraph is nullptr.";
|
||||
return false;
|
||||
}
|
||||
return sub_graph.sub_graph->Resize(inputs, new_shapes) == RET_OK;
|
||||
if (tensorrt_graph_ == nullptr) {
|
||||
MS_LOG(ERROR) << "TensorRT subgraph is nullptr.";
|
||||
return false;
|
||||
}
|
||||
return false;
|
||||
return tensorrt_graph_->Resize(inputs, new_shapes) == RET_OK;
|
||||
}
|
||||
|
||||
std::vector<tensor::Tensor> TensorRTExecutor::GetInputInfos(const FuncGraphPtr &) {
|
||||
|
@ -733,9 +615,12 @@ std::vector<tensor::Tensor> TensorRTExecutor::GetOutputInfos(const FuncGraphPtr
|
|||
return tensors;
|
||||
}
|
||||
|
||||
static std::shared_ptr<device::GraphExecutor> TensorRTGraphExecutorCreator(const std::shared_ptr<Context> &ctx) {
|
||||
auto executor = std::make_shared<TensorRTExecutor>(ctx);
|
||||
executor->Init();
|
||||
static std::shared_ptr<device::GraphExecutor> TensorRTGraphExecutorCreator(const std::shared_ptr<Context> &ctx,
|
||||
const ConfigInfos &config_infos) {
|
||||
auto executor = std::make_shared<TensorRTExecutor>(ctx, config_infos);
|
||||
if (!executor->Init()) {
|
||||
return nullptr;
|
||||
}
|
||||
return executor;
|
||||
}
|
||||
|
||||
|
|
|
@ -43,13 +43,11 @@ struct TrtGraphContext {
|
|||
|
||||
class TensorRTExecutor : public LiteGraphExecutor {
|
||||
public:
|
||||
TensorRTExecutor(const std::shared_ptr<mindspore::Context> &context, const std::string &cache_model_path,
|
||||
size_t vocab_size, size_t device_cache_size, const std::map<std::string, std::string> &ms_cache);
|
||||
explicit TensorRTExecutor(const std::shared_ptr<mindspore::Context> &context);
|
||||
TensorRTExecutor(const std::shared_ptr<mindspore::Context> &context, const ConfigInfos &config_infos);
|
||||
|
||||
~TensorRTExecutor() override;
|
||||
|
||||
Status Init();
|
||||
bool Init();
|
||||
|
||||
bool CompileGraph(const FuncGraphPtr &graph, const std::map<string, string> &compile_options) override;
|
||||
bool RunGraph(const FuncGraphPtr &graph, const std::vector<tensor::Tensor> &inputs,
|
||||
|
@ -61,8 +59,9 @@ class TensorRTExecutor : public LiteGraphExecutor {
|
|||
std::vector<tensor::Tensor> GetOutputInfos(const FuncGraphPtr &) override;
|
||||
|
||||
private:
|
||||
int ParseOptimizationProfile();
|
||||
|
||||
Status BuildSubGraph(const KernelGraphPtr &graph);
|
||||
Status UpdateTrtSubGraphInputsDepend();
|
||||
|
||||
TensorRTOp *FindTensorRTOp(const CNodePtr &cnode, const BaseOperatorPtr &base_operator,
|
||||
const std::vector<TensorInfo> &input_tensors,
|
||||
|
@ -74,6 +73,7 @@ class TensorRTExecutor : public LiteGraphExecutor {
|
|||
const std::vector<TensorInfo> &outputs);
|
||||
|
||||
std::shared_ptr<mindspore::Context> context_{nullptr};
|
||||
ConfigInfos config_infos_;
|
||||
std::shared_ptr<GPUDeviceInfo> device_info_{nullptr};
|
||||
TensorRTRuntime *runtime_{nullptr};
|
||||
bool support_hw_resize_{true};
|
||||
|
@ -88,12 +88,9 @@ class TensorRTExecutor : public LiteGraphExecutor {
|
|||
|
||||
std::vector<kernel::Kernel> kernel_list_;
|
||||
|
||||
std::vector<TrtGraphContext> tensorrt_graph_list_;
|
||||
|
||||
std::vector<nvinfer1::Dims> min_dims_;
|
||||
std::vector<nvinfer1::Dims> opt_dims_;
|
||||
std::vector<nvinfer1::Dims> max_dims_;
|
||||
ProfileConfigs trt_profile_configs_;
|
||||
|
||||
std::shared_ptr<TensorRTSubGraph> tensorrt_graph_ = nullptr;
|
||||
std::vector<TensorInfo> inputs_;
|
||||
std::vector<TensorInfo> outputs_;
|
||||
};
|
||||
|
|
|
@ -14,13 +14,27 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
#include "extendrt/delegate/tensorrt/tensorrt_plugin_impl.h"
|
||||
#ifdef LITE_CUDA_DISTRIBUTION
|
||||
#include "extendrt/delegate/tensorrt/distribution/distribution_base.h"
|
||||
#include "plugin/device/gpu/hal/device/distribution/collective_wrapper.h"
|
||||
#endif
|
||||
|
||||
namespace mindspore::lite {
|
||||
int TensorRTPluginImpl::GetGPUGroupSize() const { return GetGroupSize(NCCL_WORLD_GROUP); }
|
||||
int TensorRTPluginImpl::GetGPUGroupSize() const {
|
||||
#ifdef LITE_CUDA_DISTRIBUTION
|
||||
return GetGroupSize(NCCL_WORLD_GROUP);
|
||||
#else
|
||||
return 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
int TensorRTPluginImpl::GetRankID() const { return GetRankIDByGroup(NCCL_WORLD_GROUP); }
|
||||
int TensorRTPluginImpl::GetRankID() const {
|
||||
#ifdef LITE_CUDA_DISTRIBUTION
|
||||
return GetRankIDByGroup(NCCL_WORLD_GROUP);
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
} // namespace mindspore::lite
|
||||
|
||||
mindspore::lite::TensorRTExecutorPluginImplBase *CreateTensorRTPluginImpl() {
|
||||
|
|
|
@ -24,6 +24,8 @@
|
|||
#include <numeric>
|
||||
#include <functional>
|
||||
#include <fstream>
|
||||
#include <limits>
|
||||
#include <unordered_map>
|
||||
#include "src/extendrt/delegate/delegate_utils.h"
|
||||
#include "src/common/utils.h"
|
||||
|
||||
|
@ -32,25 +34,23 @@
|
|||
#include "ops/strided_slice.h"
|
||||
#include "ops/expand_dims.h"
|
||||
#include "ops/fusion/topk_fusion.h"
|
||||
#include "ops/broadcast_to.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
TensorRTSubGraph::TensorRTSubGraph(std::vector<TensorRTOp *> ops, const std::vector<TensorInfo> &inputs,
|
||||
const std::vector<TensorInfo> &outputs, const mindspore::Context *ctx,
|
||||
std::shared_ptr<GPUDeviceInfo> device_info, TensorRTRuntime *runtime,
|
||||
bool support_resize, bool support_hw_resize,
|
||||
const std::vector<nvinfer1::Dims> &min_dims,
|
||||
const std::vector<nvinfer1::Dims> &opt_dims,
|
||||
const std::vector<nvinfer1::Dims> &max_dims)
|
||||
const ProfileConfigs &trt_profile_config)
|
||||
: inputs_(inputs),
|
||||
outputs_(outputs),
|
||||
all_ops_(std::move(ops)),
|
||||
device_info_(device_info),
|
||||
runtime_(runtime),
|
||||
min_dims_(min_dims),
|
||||
opt_dims_(opt_dims),
|
||||
max_dims_(max_dims) {
|
||||
trt_profile_config_(trt_profile_config) {
|
||||
trt_specific_weight_handled_inner_ = {
|
||||
ops::kNameTranspose, ops::kNameReshape, ops::kNameStridedSlice, ops::kNameExpandDims, ops::kNameTopKFusion,
|
||||
ops::kNameTranspose, ops::kNameReshape, ops::kNameStridedSlice,
|
||||
ops::kNameExpandDims, ops::kNameTopKFusion, ops::kNameBroadcastTo,
|
||||
};
|
||||
if (!support_resize) {
|
||||
input_batchsize_index_ = -1;
|
||||
|
@ -86,26 +86,55 @@ TensorRTSubGraph::~TensorRTSubGraph() {
|
|||
}
|
||||
}
|
||||
|
||||
bool TensorRTSubGraph::IsValidProfileDims() const {
|
||||
if (trt_profile_config_.profiles.empty()) {
|
||||
MS_LOG(INFO) << "Number of profiles is 0.";
|
||||
return false;
|
||||
}
|
||||
for (auto &profile : trt_profile_config_.profiles) {
|
||||
if (profile.inputs.size() != trt_profile_config_.input_infos.size()) {
|
||||
MS_LOG(WARNING) << "Profile input size " << profile.inputs.size() << " != input shape size "
|
||||
<< trt_profile_config_.input_infos.size();
|
||||
return false;
|
||||
}
|
||||
for (size_t i = 0; i < profile.inputs.size(); i++) {
|
||||
const auto &profile_input = profile.inputs[i];
|
||||
const auto &input_info = trt_profile_config_.input_infos[i];
|
||||
if (profile_input.min_dims.size() != input_info.input_shape.size()) {
|
||||
MS_LOG(WARNING) << "Profile input " << input_info.name << " min dims number " << profile_input.min_dims.size()
|
||||
<< " != input shape dim number " << input_info.input_shape.size();
|
||||
return false;
|
||||
}
|
||||
if (profile_input.max_dims.size() != input_info.input_shape.size()) {
|
||||
MS_LOG(WARNING) << "Profile input " << input_info.name << " max dims number " << profile_input.max_dims.size()
|
||||
<< " != input shape dim number " << input_info.input_shape.size();
|
||||
return false;
|
||||
}
|
||||
if (profile_input.opt_dims.size() != input_info.input_shape.size()) {
|
||||
MS_LOG(WARNING) << "Profile input " << input_info.name << " opt dims number " << profile_input.opt_dims.size()
|
||||
<< " != input shape dim number " << input_info.input_shape.size();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int TensorRTSubGraph::Init(cudaStream_t stream, cublasHandle_t cublas_handle, cublasLtHandle_t cublaslt_handle) {
|
||||
auto ret = GetGraphInOutOps(inputs_, outputs_, &in_ops_, &out_ops_, all_ops_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Get TensorRT subgraph input and output ops failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
profile_ = runtime_->GetBuilder()->createOptimizationProfile();
|
||||
if (profile_ == nullptr) {
|
||||
MS_LOG(ERROR) << "createOptimizationProfile failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
ctx_ = new (std::nothrow) TensorRTContext();
|
||||
if (ctx_ == nullptr) {
|
||||
MS_LOG(ERROR) << "New TensorRTContext failed.";
|
||||
return RET_OK;
|
||||
return RET_ERROR;
|
||||
}
|
||||
ctx_->SetRuntime(runtime_);
|
||||
if (!ctx_->Init()) {
|
||||
MS_LOG(ERROR) << "New TensorRTContext failed.";
|
||||
return RET_OK;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (SetDeviceConfig(stream, cublas_handle, cublaslt_handle) != RET_OK) {
|
||||
MS_LOG(WARNING) << "set tensorrt config failed.";
|
||||
|
@ -115,6 +144,20 @@ int TensorRTSubGraph::Init(cudaStream_t stream, cublasHandle_t cublas_handle, cu
|
|||
MS_LOG(ERROR) << "create Serializer failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
using_input_ranges_ = IsValidProfileDims();
|
||||
if (using_input_ranges_) {
|
||||
for (size_t i = 0; i != trt_profile_config_.profiles.size(); ++i) {
|
||||
profiles_.push_back(runtime_->GetBuilder()->createOptimizationProfile());
|
||||
}
|
||||
} else {
|
||||
profiles_.push_back(runtime_->GetBuilder()->createOptimizationProfile());
|
||||
}
|
||||
for (size_t i = 0; i != profiles_.size(); ++i) {
|
||||
if (profiles_[i] == nullptr) {
|
||||
MS_LOG(ERROR) << "create optimization profile failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
engine_ = serializer_->GetSerializedEngine();
|
||||
if (engine_ != nullptr) {
|
||||
MS_LOG(INFO) << "using serialized engine " << serialize_file_path_;
|
||||
|
@ -130,9 +173,11 @@ int TensorRTSubGraph::Init(cudaStream_t stream, cublasHandle_t cublas_handle, cu
|
|||
|
||||
int TensorRTSubGraph::BuildEngine() {
|
||||
// print all network ops
|
||||
if (this->config_->addOptimizationProfile(profile_) == -1) {
|
||||
MS_LOG(ERROR) << "addOptimizationProfile failed.";
|
||||
return RET_ERROR;
|
||||
for (auto &profile : profiles_) {
|
||||
if (this->config_->addOptimizationProfile(profile) == -1) {
|
||||
MS_LOG(ERROR) << "addOptimizationProfile failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
MS_LOG(INFO) << "build engine for tensorrt network: " << ctx_->network()->getName();
|
||||
for (int i = 0; i < ctx_->network()->getNbLayers(); i++) {
|
||||
|
@ -184,7 +229,8 @@ int TensorRTSubGraph::SetDeviceConfig(cudaStream_t stream, cublasHandle_t cublas
|
|||
MS_LOG(INFO) << GetRankID() << " tensorrt subgraph stream: " << stream_;
|
||||
|
||||
// config setMaxWorkspaceSize to 2047 MB for max limit
|
||||
config_->setMaxWorkspaceSize(2047 * (1 << 20));
|
||||
constexpr size_t kWorkspaceSize = 2047 * (1 << 20);
|
||||
config_->setMaxWorkspaceSize(kWorkspaceSize);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -197,7 +243,10 @@ bool TensorRTSubGraph::IsInt8Mode() {
|
|||
return false;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *TensorRTSubGraph::SetTensorRTNetworkInput(const TensorInfo &in_tensor, size_t index) {
|
||||
nvinfer1::ITensor *TensorRTSubGraph::SetTensorRTNetworkInput(const TensorInfo &in_tensor, int index) {
|
||||
if (index < 0) {
|
||||
return nullptr;
|
||||
}
|
||||
for (int i = 0; i < ctx_->network()->getNbInputs(); i++) {
|
||||
if (in_tensor.Name().compare(ctx_->network()->getInput(i)->getName()) == 0) {
|
||||
MS_LOG(INFO) << "input tensor is already added in network: " << in_tensor.Name();
|
||||
|
@ -211,68 +260,48 @@ nvinfer1::ITensor *TensorRTSubGraph::SetTensorRTNetworkInput(const TensorInfo &i
|
|||
return nullptr;
|
||||
}
|
||||
nvinfer1::Dims input_dims;
|
||||
if (min_dims_.size() != 0 && min_dims_.size() == opt_dims_.size() && min_dims_.size() == max_dims_.size()) {
|
||||
if (using_input_ranges_) {
|
||||
input_dims = SetInputDimsProfile(in_tensor, index);
|
||||
} else {
|
||||
input_dims = ParseInputDimsProfile(in_tensor);
|
||||
input_dims = ParseInputDimsProfile(in_tensor, index);
|
||||
}
|
||||
MS_LOG(INFO) << "add network input: " << in_tensor.Name();
|
||||
return ctx_->network()->addInput(in_tensor.Name().c_str(), cuda_dtype, input_dims);
|
||||
}
|
||||
|
||||
nvinfer1::Dims TensorRTSubGraph::SetInputDimsProfile(const TensorInfo &in_tensor, size_t index) {
|
||||
nvinfer1::Dims input_dims;
|
||||
input_dims.nbDims = min_dims_[index].nbDims;
|
||||
for (int i = 0; i != input_dims.nbDims; ++i) {
|
||||
input_dims.d[i] = (max_dims_[index].d[i] != min_dims_[index].d[i]) ? -1 : min_dims_[index].d[i];
|
||||
nvinfer1::Dims TensorRTSubGraph::SetInputDimsProfile(const TensorInfo &in_tensor, int index) {
|
||||
auto input_info = trt_profile_config_.input_infos[index];
|
||||
auto input_dims = ConvertCudaDims(input_info.input_shape);
|
||||
DebugDims("input dims", input_dims);
|
||||
for (size_t i = 0; i < trt_profile_config_.profiles.size(); i++) {
|
||||
auto &profile = trt_profile_config_.profiles[i];
|
||||
auto min_dims = ConvertCudaDims(profile.inputs[index].min_dims);
|
||||
if (!profiles_[i]->setDimensions(input_info.name.c_str(), nvinfer1::OptProfileSelector::kMIN, min_dims)) {
|
||||
MS_LOG(ERROR) << "setDimensions of kMIN failed for " << input_info.name;
|
||||
return input_dims;
|
||||
}
|
||||
auto opt_dims = ConvertCudaDims(profile.inputs[index].opt_dims);
|
||||
if (!profiles_[i]->setDimensions(input_info.name.c_str(), nvinfer1::OptProfileSelector::kOPT, opt_dims)) {
|
||||
MS_LOG(ERROR) << "setDimensions of kOPT failed for " << input_info.name;
|
||||
return input_dims;
|
||||
}
|
||||
|
||||
auto max_dims = ConvertCudaDims(profile.inputs[index].max_dims);
|
||||
if (!profiles_[i]->setDimensions(input_info.name.c_str(), nvinfer1::OptProfileSelector::kMAX, max_dims)) {
|
||||
MS_LOG(ERROR) << "setDimensions of kMAX failed for " << input_info.name;
|
||||
return input_dims;
|
||||
}
|
||||
DebugDims("min dims", min_dims);
|
||||
DebugDims("opt dims", opt_dims);
|
||||
DebugDims("max dims", max_dims);
|
||||
}
|
||||
if (profile_ == nullptr) {
|
||||
MS_LOG(ERROR) << "profile is null.";
|
||||
return input_dims;
|
||||
}
|
||||
if (!profile_->setDimensions(in_tensor.Name().c_str(), nvinfer1::OptProfileSelector::kMIN, min_dims_[index])) {
|
||||
MS_LOG(ERROR) << "setDimensions of kMIN failed for " << in_tensor.Name();
|
||||
return input_dims;
|
||||
}
|
||||
if (!profile_->setDimensions(in_tensor.Name().c_str(), nvinfer1::OptProfileSelector::kOPT, opt_dims_[index])) {
|
||||
MS_LOG(ERROR) << "setDimensions of kOPT failed for " << in_tensor.Name();
|
||||
return input_dims;
|
||||
}
|
||||
if (!profile_->setDimensions(in_tensor.Name().c_str(), nvinfer1::OptProfileSelector::kMAX, max_dims_[index])) {
|
||||
MS_LOG(ERROR) << "setDimensions of kMAX failed for " << in_tensor.Name();
|
||||
return input_dims;
|
||||
}
|
||||
DebugDims(min_dims_[index]);
|
||||
DebugDims(opt_dims_[index]);
|
||||
DebugDims(max_dims_[index]);
|
||||
return input_dims;
|
||||
}
|
||||
|
||||
nvinfer1::Dims TensorRTSubGraph::ParseInputDimsProfile(const TensorInfo &in_tensor) {
|
||||
nvinfer1::Dims TensorRTSubGraph::ParseInputDimsProfile(const TensorInfo &in_tensor, int index) {
|
||||
nvinfer1::Dims input_dims = ConvertCudaDims(in_tensor.Shape());
|
||||
if (profile_ == nullptr) {
|
||||
MS_LOG(ERROR) << "profile is null.";
|
||||
return input_dims;
|
||||
}
|
||||
if (runtime_->GetBatchSize() == 0) {
|
||||
runtime_->SetBatchSize(input_dims.d[0]);
|
||||
MS_LOG(INFO) << "batch size init as " << runtime_->GetBatchSize();
|
||||
if (input_batchsize_index_ != -1) {
|
||||
input_dims.d[0] = -1; // dynamic batch size with wildcard N, default batchsize is first dims
|
||||
input_batchsize_index_ = 0;
|
||||
}
|
||||
} else {
|
||||
if (input_batchsize_index_ != -1) {
|
||||
for (int n = 0; n < input_dims.nbDims; n++) {
|
||||
if (input_dims.d[n] == runtime_->GetBatchSize()) {
|
||||
runtime_->SetBatchSize(std::max(input_dims.d[0], runtime_->GetBatchSize()));
|
||||
// first dims equals to batchsize
|
||||
input_dims.d[n] = -1;
|
||||
input_batchsize_index_ = n;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (input_batchsize_index_ != -1) {
|
||||
input_dims.d[0] = -1;
|
||||
}
|
||||
// only support NHWC HW dim resize
|
||||
if (input_hw_index_ != -1) {
|
||||
|
@ -281,42 +310,50 @@ nvinfer1::Dims TensorRTSubGraph::ParseInputDimsProfile(const TensorInfo &in_tens
|
|||
input_dims.d[input_hw_index_] = -1;
|
||||
input_dims.d[input_hw_index_ + 1] = -1;
|
||||
}
|
||||
auto shape = in_tensor.Shape();
|
||||
// We do not need to check the return of setDimension and addOptimizationProfile here as all dims are explicitly set
|
||||
nvinfer1::Dims input_dims_min = ConvertCudaDims(shape);
|
||||
nvinfer1::Dims input_dims_min = ConvertCudaDims(in_tensor.Shape());
|
||||
if (input_batchsize_index_ != -1) {
|
||||
input_dims_min.d[input_batchsize_index_] = 1;
|
||||
input_dims_min.d[0] = 1;
|
||||
if (input_hw_index_ != -1) {
|
||||
input_dims_min.d[input_hw_index_] = 1;
|
||||
input_dims_min.d[input_hw_index_ + 1] = 1;
|
||||
}
|
||||
}
|
||||
if (!profile_->setDimensions(in_tensor.Name().c_str(), nvinfer1::OptProfileSelector::kMIN, input_dims_min)) {
|
||||
if (!profiles_.front()->setDimensions(in_tensor.Name().c_str(), nvinfer1::OptProfileSelector::kMIN, input_dims_min)) {
|
||||
MS_LOG(ERROR) << "setDimensions of kMIN failed for " << in_tensor.Name();
|
||||
return input_dims;
|
||||
}
|
||||
nvinfer1::Dims input_dims_opt = ConvertCudaDims(shape);
|
||||
if (!profile_->setDimensions(in_tensor.Name().c_str(), nvinfer1::OptProfileSelector::kOPT, input_dims_opt)) {
|
||||
nvinfer1::Dims input_dims_opt = ConvertCudaDims(in_tensor.Shape());
|
||||
if (!profiles_.front()->setDimensions(in_tensor.Name().c_str(), nvinfer1::OptProfileSelector::kOPT, input_dims_opt)) {
|
||||
MS_LOG(ERROR) << "setDimensions of kOPT failed for " << in_tensor.Name();
|
||||
return input_dims;
|
||||
}
|
||||
nvinfer1::Dims input_dims_max = ConvertCudaDims(shape);
|
||||
nvinfer1::Dims input_dims_max = ConvertCudaDims(in_tensor.Shape());
|
||||
// input_dims_max should be the same with input network dims
|
||||
if (!profile_->setDimensions(in_tensor.Name().c_str(), nvinfer1::OptProfileSelector::kMAX, input_dims_max)) {
|
||||
if (!profiles_.front()->setDimensions(in_tensor.Name().c_str(), nvinfer1::OptProfileSelector::kMAX, input_dims_max)) {
|
||||
MS_LOG(ERROR) << "setDimensions of kMAX failed for " << in_tensor.Name();
|
||||
return input_dims;
|
||||
}
|
||||
DebugDims(input_dims);
|
||||
DebugDims(input_dims_min);
|
||||
DebugDims(input_dims_opt);
|
||||
DebugDims(input_dims_max);
|
||||
if (trt_profile_config_.profiles.empty()) {
|
||||
ProfileItem profile_item;
|
||||
profile_item.inputs.resize(inputs_.size());
|
||||
trt_profile_config_.profiles.push_back(profile_item);
|
||||
}
|
||||
auto &profile_item = trt_profile_config_.profiles.back();
|
||||
profile_item.inputs[index].min_dims = ConvertMSShape(input_dims_min);
|
||||
profile_item.inputs[index].opt_dims = ConvertMSShape(input_dims_opt);
|
||||
profile_item.inputs[index].max_dims = ConvertMSShape(input_dims_max);
|
||||
|
||||
DebugDims("input min dims", input_dims_min);
|
||||
DebugDims("input opt dims", input_dims_opt);
|
||||
DebugDims("input max dims", input_dims_max);
|
||||
return input_dims;
|
||||
}
|
||||
|
||||
int TensorRTSubGraph::ParseInputsProfile() {
|
||||
MS_LOG(INFO) << "using serialied engine.";
|
||||
for (auto in_tensor : inputs()) {
|
||||
auto dim = ParseInputDimsProfile(in_tensor);
|
||||
for (size_t i = 0; i < inputs_.size(); i++) {
|
||||
auto dim = ParseInputDimsProfile(inputs_[i], i);
|
||||
if (dim.nbDims <= 0) {
|
||||
MS_LOG(ERROR) << "input dims is invalid.";
|
||||
return RET_ERROR;
|
||||
|
@ -325,6 +362,15 @@ int TensorRTSubGraph::ParseInputsProfile() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int TensorRTSubGraph::GetInputIndexByName(const std::string &name) {
|
||||
for (size_t i = 0; i != inputs().size(); ++i) {
|
||||
if (inputs()[i].Name() == name) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
int TensorRTSubGraph::BuildTensorRTGraph() {
|
||||
MS_ASSERT(!all_ops_.empty());
|
||||
int ret;
|
||||
|
@ -338,7 +384,7 @@ int TensorRTSubGraph::BuildTensorRTGraph() {
|
|||
// Data From CPU
|
||||
auto in_tensor = cur_op->inputs()[i];
|
||||
if (IsSubGraphInputTensor(this->inputs(), in_tensor)) {
|
||||
nvinfer1::ITensor *trt_tensor = SetTensorRTNetworkInput(in_tensor, i);
|
||||
nvinfer1::ITensor *trt_tensor = SetTensorRTNetworkInput(in_tensor, GetInputIndexByName(in_tensor.Name()));
|
||||
if (trt_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "SetTensorRTNetworkInput failed for " << in_tensor.Name();
|
||||
return RET_ERROR;
|
||||
|
@ -442,7 +488,7 @@ int TensorRTSubGraph::Prepare() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
int binding_num = this->engine_->getNbBindings();
|
||||
if (binding_num < 0) {
|
||||
if (binding_num <= 0) {
|
||||
MS_LOG(ERROR) << "TensorRTSubGraph binding num < 0.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
@ -452,28 +498,36 @@ int TensorRTSubGraph::Prepare() {
|
|||
return RET_ERROR;
|
||||
}
|
||||
|
||||
for (auto tensor : inputs_) {
|
||||
auto device_ptr = runtime_->GetAllocator()->MallocDeviceMem(tensor, tensor.DataSize());
|
||||
profile_index_ = MaxVolumnProfileIndex();
|
||||
if (this->trt_context_->setOptimizationProfile(profile_index_)) {
|
||||
MS_LOG(INFO) << "setOptimizationProfile: " << profile_index_;
|
||||
}
|
||||
const auto &profile = trt_profile_config_.profiles[profile_index_];
|
||||
for (size_t i = 0; i != inputs_.size(); ++i) {
|
||||
auto &tensor = inputs_[i];
|
||||
auto max_profile_dims = profile.inputs[i].max_dims;
|
||||
tensor.SetShape(max_profile_dims);
|
||||
int volumn = std::accumulate(max_profile_dims.begin(), max_profile_dims.end(), 1, std::multiplies<int>());
|
||||
auto type_size = lite::DataTypeSize(static_cast<enum TypeId>(tensor.DataType()));
|
||||
auto device_ptr = runtime_->GetAllocator()->MallocDeviceMem(tensor, volumn * type_size);
|
||||
if (device_ptr == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc for inputs tensor device memory failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
runtime_->SetBatchSize(tensor.Shape()[0]);
|
||||
int index = this->engine_->getBindingIndex(tensor.Name().c_str());
|
||||
MS_LOG(INFO) << "device index " << index << " for tensor : " << tensor.Name() << " attr: " << device_ptr;
|
||||
auto tensor_name = tensor.Name();
|
||||
trt_in_tensor_name_.push_back(tensor_name);
|
||||
int index = GetProfileBindingIndex(tensor_name, profile_index_);
|
||||
MS_LOG(INFO) << "device index " << index << " for tensor : " << tensor_name << " attr: " << device_ptr;
|
||||
tensor_bindings_[index] = device_ptr;
|
||||
trt_in_tensor_name_.push_back(tensor.Name());
|
||||
nvinfer1::Dims input_dims = ConvertCudaDims(tensor.Shape());
|
||||
nvinfer1::Dims input_dims = ConvertCudaDims(profile.inputs[i].max_dims);
|
||||
for (int od = 0; od < input_dims.nbDims; od++) {
|
||||
MS_LOG(DEBUG) << "in tensor " << tensor.Name() << " dims at " << od << " is " << input_dims.d[od];
|
||||
}
|
||||
|
||||
if (!this->trt_context_->setBindingDimensions(index, input_dims)) {
|
||||
MS_LOG(ERROR) << "invalid input dims of " << tensor.Name();
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
|
||||
if (!this->trt_context_->allInputDimensionsSpecified()) {
|
||||
MS_LOG(ERROR) << "input dims need to be specified.";
|
||||
return RET_ERROR;
|
||||
|
@ -486,30 +540,91 @@ int TensorRTSubGraph::Prepare() {
|
|||
}
|
||||
}
|
||||
for (auto &tensor : outputs_) {
|
||||
int index = this->engine_->getBindingIndex(tensor.Name().c_str());
|
||||
auto out_dims = trt_context_->getBindingDimensions(index);
|
||||
int max_index = GetProfileBindingIndex(tensor.Name(), profile_index_);
|
||||
auto out_dims = trt_context_->getBindingDimensions(max_index);
|
||||
int elem_num = std::accumulate(out_dims.d, out_dims.d + out_dims.nbDims, 1, std::multiplies<int>());
|
||||
DebugDims(out_dims);
|
||||
auto new_shape = lite::ConvertMSShape(out_dims);
|
||||
MS_LOG(INFO) << "Set output shape of " << tensor.Name() << " to " << new_shape << " by tensorrt binding output";
|
||||
tensor.SetShape(new_shape);
|
||||
auto type_size = DataTypeSize(static_cast<enum TypeId>(tensor.DataType()));
|
||||
DebugDims("out dims", out_dims);
|
||||
MS_LOG(INFO) << "Set output shape by tensorrt binding output";
|
||||
tensor.SetShape(lite::ConvertMSShape(out_dims));
|
||||
auto type_size = lite::DataTypeSize(static_cast<enum TypeId>(tensor.DataType()));
|
||||
auto device_ptr = runtime_->GetAllocator()->MallocDeviceMem(tensor, elem_num * type_size);
|
||||
if (device_ptr == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc for outputs tensor device memory failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
tensor_bindings_[index] = device_ptr;
|
||||
for (size_t j = 0; j != profiles_.size(); ++j) {
|
||||
int index = GetProfileBindingIndex(tensor.Name(), j);
|
||||
tensor_bindings_[index] = device_ptr;
|
||||
}
|
||||
trt_out_tensor_name_.push_back(tensor.Name());
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int TensorRTSubGraph::SelectProfile(const std::vector<ShapeVector> &new_shapes) const {
|
||||
std::vector<int> profile_index;
|
||||
for (size_t i = 0; i < profiles_.size(); ++i) {
|
||||
const auto &profile = trt_profile_config_.profiles[i];
|
||||
bool condition = true;
|
||||
for (size_t j = 0; j < trt_in_tensor_name_.size(); ++j) {
|
||||
auto new_shape = new_shapes[j];
|
||||
auto profile_input = profile.inputs[j];
|
||||
if (new_shape.size() != profile_input.max_dims.size()) {
|
||||
condition = false;
|
||||
} else {
|
||||
for (size_t od = 0; od < new_shape.size(); od++) {
|
||||
if (new_shape[od] < profile_input.min_dims[od] || new_shape[od] > profile_input.max_dims[od]) {
|
||||
condition = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (condition) {
|
||||
profile_index.push_back(i);
|
||||
}
|
||||
}
|
||||
return profile_index.empty() ? -1 : profile_index.front();
|
||||
}
|
||||
|
||||
size_t TensorRTSubGraph::MaxVolumnProfileIndex() const {
|
||||
int max_volumn = std::numeric_limits<int>::min();
|
||||
size_t max_volumn_index = 0;
|
||||
for (size_t i = 0; i < trt_profile_config_.profiles.size(); ++i) {
|
||||
const auto &profile = trt_profile_config_.profiles[i];
|
||||
// depend on the first input tensor
|
||||
int64_t volumn = std::accumulate(profile.inputs[0].max_dims.begin(), profile.inputs[0].max_dims.end(), 1,
|
||||
std::multiplies<int64_t>());
|
||||
if (volumn > max_volumn) {
|
||||
max_volumn_index = i;
|
||||
max_volumn = volumn;
|
||||
}
|
||||
}
|
||||
return max_volumn_index;
|
||||
}
|
||||
|
||||
int TensorRTSubGraph::GetProfileBindingIndex(const std::string &name, size_t profile_index) {
|
||||
std::string binding_name = name;
|
||||
if (profile_index != 0) {
|
||||
binding_name += " [profile " + std::to_string(profile_index) + "]";
|
||||
}
|
||||
return this->engine_->getBindingIndex(binding_name.c_str());
|
||||
}
|
||||
|
||||
int TensorRTSubGraph::OnNewInputShapes(const std::vector<ShapeVector> &new_shapes) {
|
||||
if (inputs_.size() != new_shapes.size()) {
|
||||
MS_LOG(ERROR) << "Graph inputs size " << inputs_.size() << " != resize input size " << new_shapes.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
auto select_profile_index = SelectProfile(new_shapes);
|
||||
if (select_profile_index < 0) {
|
||||
MS_LOG(ERROR) << "Not support input shape " << new_shapes;
|
||||
return RET_ERROR;
|
||||
}
|
||||
profile_index_ = static_cast<size_t>(select_profile_index);
|
||||
if (this->trt_context_->setOptimizationProfile(profile_index_)) {
|
||||
MS_LOG(INFO) << "setOptimizationProfile: " << profile_index_;
|
||||
}
|
||||
int batch_size = -1;
|
||||
for (size_t i = 0; i < trt_in_tensor_name_.size(); i++) {
|
||||
if (inputs_[i].Shape() == new_shapes[i]) {
|
||||
|
@ -543,9 +658,8 @@ int TensorRTSubGraph::OnNewInputShapes(const std::vector<ShapeVector> &new_shape
|
|||
return RET_ERROR;
|
||||
}
|
||||
batch_size = new_batch_size;
|
||||
runtime_->SetBatchSize(batch_size);
|
||||
|
||||
int index = this->engine_->getBindingIndex(trt_in_tensor_name_[i].c_str());
|
||||
int index = GetProfileBindingIndex(trt_in_tensor_name_[i], profile_index_);
|
||||
// Set actual input size
|
||||
nvinfer1::Dims input_dims = ConvertCudaDims(inputs_[i].Shape());
|
||||
for (int od = 0; od < input_dims.nbDims; od++) {
|
||||
|
@ -563,8 +677,9 @@ int TensorRTSubGraph::OnNewInputShapes(const std::vector<ShapeVector> &new_shape
|
|||
}
|
||||
if (batch_size != -1) {
|
||||
for (size_t i = 0; i < trt_out_tensor_name_.size(); i++) {
|
||||
int index = this->engine_->getBindingIndex(trt_out_tensor_name_[i].c_str());
|
||||
auto index = GetProfileBindingIndex(trt_out_tensor_name_[i], profile_index_);
|
||||
auto out_dims = trt_context_->getBindingDimensions(index);
|
||||
DebugDims("out dims", out_dims);
|
||||
auto new_shape = lite::ConvertMSShape(out_dims);
|
||||
MS_LOG(INFO) << "Set output shape of " << trt_out_tensor_name_[i] << " to " << new_shape
|
||||
<< " by tensorrt binding output";
|
||||
|
@ -610,13 +725,13 @@ int TensorRTSubGraph::PreExecute(const std::vector<tensor::Tensor> &inputs,
|
|||
}
|
||||
runtime_->GetAllocator()->MarkMemValid(trt_tensor_name, true);
|
||||
}
|
||||
int index = this->engine_->getBindingIndex(trt_tensor_name.c_str());
|
||||
int index = GetProfileBindingIndex(trt_tensor_name, profile_index_);
|
||||
MS_LOG(INFO) << "device index " << index << " for tensor : " << trt_tensor_name << " attr: " << device_ptr;
|
||||
tensor_bindings_[index] = device_ptr;
|
||||
}
|
||||
for (size_t i = 0; i < trt_out_tensor_name_.size(); i++) {
|
||||
const auto &trt_out_tensor_name = trt_out_tensor_name_[i];
|
||||
int index = this->engine_->getBindingIndex(trt_out_tensor_name.c_str());
|
||||
int index = GetProfileBindingIndex(trt_out_tensor_name, profile_index_);
|
||||
void *device_ptr = nullptr;
|
||||
if (outputs.size() > i) {
|
||||
auto &output = outputs[i];
|
||||
|
@ -651,16 +766,10 @@ int TensorRTSubGraph::PostExecute(std::vector<tensor::Tensor> *outputs) {
|
|||
auto has_outputs = !outputs->empty();
|
||||
for (size_t i = 0; i < trt_out_tensor_name_.size(); i++) {
|
||||
const auto &trt_out_tensor_name = trt_out_tensor_name_[i];
|
||||
int index = this->engine_->getBindingIndex(trt_out_tensor_name.c_str());
|
||||
auto index = GetProfileBindingIndex(trt_out_tensor_name, profile_index_);
|
||||
// actual output tensor dims
|
||||
auto out_dims = this->trt_context_->getBindingDimensions(index);
|
||||
std::vector<int64_t> new_shape = lite::ConvertMSShape(out_dims);
|
||||
// batchsize resize need set new batch size
|
||||
if (input_batchsize_index_ != -1) {
|
||||
if (runtime_->GetBatchSize() != new_shape[output_batchsize_index_]) {
|
||||
new_shape[output_batchsize_index_] = runtime_->GetBatchSize();
|
||||
}
|
||||
}
|
||||
outputs_[i].SetShape(new_shape);
|
||||
for (int od = 0; od < out_dims.nbDims; od++) {
|
||||
MS_LOG(DEBUG) << "out tensor " << trt_out_tensor_name << " dims at " << od << " is " << new_shape[od];
|
||||
|
@ -712,20 +821,6 @@ bool TensorRTSubGraph::ValidInputResizeDims(const nvinfer1::Dims &construct_dims
|
|||
MS_LOG(ERROR) << "invalid resize input.";
|
||||
return false;
|
||||
}
|
||||
if (input_hw_index_ == -1) {
|
||||
// only NHWC format support HW resize, otherwise only support batchsize resize
|
||||
for (int d = 0; d < construct_dims.nbDims; d++) {
|
||||
if (d != input_batchsize_index_ && construct_dims.d[d] != resize_input_shape[d]) {
|
||||
MS_LOG(ERROR) << "only support dynamic batch size resize input.";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
} else if ((input_hw_index_ == 1 && construct_dims.d[DIMENSION_3D] != resize_input_shape[DIMENSION_3D]) ||
|
||||
(input_hw_index_ == DIMENSION_2D && construct_dims.d[1] != resize_input_shape[1])) {
|
||||
// input may be nhwc || nchw
|
||||
MS_LOG(ERROR) << "don't support dynamic channel resize input.";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@
|
|||
#include "src/extendrt/delegate/tensorrt/op/tensorrt_op.h"
|
||||
#include "src/extendrt/delegate/parameter_cache/embedding_cache_manager.h"
|
||||
#include "include/api/context.h"
|
||||
#include "common/config_infos.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
using mindspore::lite::RET_ERROR;
|
||||
|
@ -42,8 +43,7 @@ class TensorRTSubGraph {
|
|||
TensorRTSubGraph(std::vector<TensorRTOp *> ops, const std::vector<TensorInfo> &inputs,
|
||||
const std::vector<TensorInfo> &outputs, const mindspore::Context *ctx,
|
||||
std::shared_ptr<GPUDeviceInfo> device_info, TensorRTRuntime *runtime, bool support_resize,
|
||||
bool support_hw_resize, const std::vector<nvinfer1::Dims> &min_dims,
|
||||
const std::vector<nvinfer1::Dims> &opt_dims, const std::vector<nvinfer1::Dims> &max_dims);
|
||||
bool support_hw_resize, const ProfileConfigs &trt_profile_config);
|
||||
~TensorRTSubGraph();
|
||||
|
||||
int Prepare();
|
||||
|
@ -63,6 +63,7 @@ class TensorRTSubGraph {
|
|||
std::vector<TensorInfo> &outputs() { return outputs_; }
|
||||
|
||||
private:
|
||||
int GetInputIndexByName(const std::string &name);
|
||||
int BuildEngine();
|
||||
|
||||
int SetDeviceConfig(cudaStream_t stream, cublasHandle_t cublas_handle, cublasLtHandle_t cublaslt_handle);
|
||||
|
@ -71,7 +72,7 @@ class TensorRTSubGraph {
|
|||
|
||||
bool SupportFP16();
|
||||
|
||||
nvinfer1::ITensor *SetTensorRTNetworkInput(const TensorInfo &in_tensor, size_t index);
|
||||
nvinfer1::ITensor *SetTensorRTNetworkInput(const TensorInfo &in_tensor, int index);
|
||||
|
||||
ITensorHelper FindTensorRTInputs(TensorRTOp *cur_op, const TensorInfo &in_tensor);
|
||||
|
||||
|
@ -85,17 +86,21 @@ class TensorRTSubGraph {
|
|||
|
||||
int HandleCacheTensor(TensorRTOp *cur_op, const TensorInfo &in_tensor);
|
||||
|
||||
nvinfer1::Dims ParseInputDimsProfile(const TensorInfo &in_tensor);
|
||||
nvinfer1::Dims SetInputDimsProfile(const TensorInfo &in_tensor, size_t index);
|
||||
nvinfer1::Dims ParseInputDimsProfile(const TensorInfo &in_tensor, int index);
|
||||
nvinfer1::Dims SetInputDimsProfile(const TensorInfo &in_tensor, int index);
|
||||
int ParseInputsProfile();
|
||||
|
||||
bool ValidInputResizeDims(const nvinfer1::Dims &construct_dims, const std::vector<int64_t> &resize_input_shape);
|
||||
|
||||
int PreExecute(const std::vector<tensor::Tensor> &inputs, const std::vector<tensor::Tensor> &outputs);
|
||||
int PostExecute(std::vector<tensor::Tensor> *outputs);
|
||||
|
||||
int OnNewInputShapes(const std::vector<ShapeVector> &inputs);
|
||||
|
||||
size_t MaxVolumnProfileIndex() const;
|
||||
int SelectProfile(const std::vector<ShapeVector> &new_shapes) const;
|
||||
int GetProfileBindingIndex(const std::string &name, size_t profile_index);
|
||||
bool ValidInputResizeDims(const nvinfer1::Dims &construct_dims, const std::vector<int64_t> &resize_input_shape);
|
||||
bool IsValidProfileDims() const;
|
||||
|
||||
std::string name_;
|
||||
std::vector<TensorInfo> inputs_;
|
||||
std::vector<TensorInfo> outputs_;
|
||||
|
@ -122,7 +127,6 @@ class TensorRTSubGraph {
|
|||
nvinfer1::IBuilderConfig *config_{nullptr};
|
||||
nvinfer1::ICudaEngine *engine_{nullptr};
|
||||
nvinfer1::IExecutionContext *trt_context_{nullptr};
|
||||
nvinfer1::IOptimizationProfile *profile_{nullptr};
|
||||
|
||||
TensorRTContext *ctx_;
|
||||
|
||||
|
@ -138,9 +142,10 @@ class TensorRTSubGraph {
|
|||
std::string serialize_file_path_;
|
||||
cudaStream_t stream_{nullptr};
|
||||
|
||||
std::vector<nvinfer1::Dims> min_dims_;
|
||||
std::vector<nvinfer1::Dims> opt_dims_;
|
||||
std::vector<nvinfer1::Dims> max_dims_;
|
||||
std::vector<nvinfer1::IOptimizationProfile *> profiles_{};
|
||||
bool using_input_ranges_{false};
|
||||
ProfileConfigs trt_profile_config_;
|
||||
size_t profile_index_{0};
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_TENSORRT_SUBGRAPH_H_
|
||||
|
|
|
@ -75,7 +75,6 @@ nvinfer1::Dims ConvertCudaDims(const TensorInfo &ms_tensor) {
|
|||
}
|
||||
return dims;
|
||||
}
|
||||
|
||||
bool SameDims(nvinfer1::Dims dims, const std::vector<int64_t> &shape) {
|
||||
if (dims.nbDims != static_cast<int>(shape.size())) {
|
||||
return false;
|
||||
|
@ -240,9 +239,9 @@ std::experimental::optional<ActivationParams> TryConvertActivationType(Activatio
|
|||
{ActivationType::RELU6, ActivationParams{nvinfer1::ActivationType::kCLIP, true, 0, true, 6}},
|
||||
{ActivationType::RELU1, ActivationParams{nvinfer1::ActivationType::kCLIP, true, 0, true, 1}},
|
||||
{ActivationType::HARD_TANH, ActivationParams{nvinfer1::ActivationType::kCLIP, true, -1, true, 1}},
|
||||
{ActivationType::SWISH, ActivationParams{nvinfer1::ActivationType::kSIGMOID, false, 0, false, 0}},
|
||||
// using plugin
|
||||
{ActivationType::GELU, ActivationParams{nvinfer1::ActivationType::kTHRESHOLDED_RELU, false, 0, false, 0}}};
|
||||
{ActivationType::GELU, ActivationParams{nvinfer1::ActivationType::kTHRESHOLDED_RELU, false, 0, false, 0}},
|
||||
{ActivationType::SWISH, ActivationParams{nvinfer1::ActivationType::kSIGMOID, false, 0, false, 0}}};
|
||||
return action_map.find(activation_type) != action_map.end()
|
||||
? std::experimental::optional<ActivationParams>(action_map[activation_type])
|
||||
: std::experimental::nullopt;
|
||||
|
@ -270,6 +269,10 @@ nvinfer1::ITensor *ConvertTensorWithExpandDims(TensorRTContext *ctx, const Tenso
|
|||
MS_LOG(ERROR) << "network is null for ConvertTensorWithExpandDims";
|
||||
return nullptr;
|
||||
}
|
||||
if (!ms_tensor.IsConst()) {
|
||||
MS_LOG(ERROR) << "ConvertTensorWithExpandDims from a MSTensor with nullptr data";
|
||||
return nullptr;
|
||||
}
|
||||
auto origin_shape = ms_tensor.Shape();
|
||||
std::vector<int64_t> convert_shape(expect_shape);
|
||||
AlignShapeRank(&origin_shape, convert_shape);
|
||||
|
@ -295,10 +298,6 @@ nvinfer1::ITensor *ConvertTensorWithExpandDims(TensorRTContext *ctx, const Tenso
|
|||
return nullptr;
|
||||
}
|
||||
nvinfer1::DataType data_type = ConvertDataType(ms_tensor.DataType());
|
||||
if (!ms_tensor.IsConst()) {
|
||||
MS_LOG(ERROR) << "ConvertTensorWithExpandDims from a MSTensor with nullptr data";
|
||||
return nullptr;
|
||||
}
|
||||
nvinfer1::Weights weights{data_type, ms_tensor.Data(), ms_tensor.ElementNum()};
|
||||
nvinfer1::IConstantLayer *constant_tensor = ctx->network()->addConstant(dims, weights);
|
||||
if (constant_tensor == nullptr) {
|
||||
|
@ -709,6 +708,87 @@ int ParseData2Vector(const TensorInfo &ms_tensor, std::vector<float> *dst) {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *ExpandDim(TensorRTContext *ctx, nvinfer1::ITensor *input_tensor, int axis) {
|
||||
// input has to prepocess to nchw
|
||||
auto input_dims = input_tensor->getDimensions();
|
||||
nvinfer1::IShuffleLayer *shuffle_layer = ctx->network()->addShuffle(*input_tensor);
|
||||
// if expand dim not at last dim and shape is dynamic, change to expanddim at last dim and transpose
|
||||
bool special_expand = false;
|
||||
for (int i = 0; i < input_dims.nbDims; i++) {
|
||||
special_expand = special_expand || input_dims.d[i] == -1;
|
||||
}
|
||||
special_expand = special_expand && (axis != -1 && axis != input_dims.nbDims);
|
||||
|
||||
if (special_expand) {
|
||||
std::vector<int64_t> new_shape;
|
||||
for (int i = 0; i < input_dims.nbDims; i++) {
|
||||
new_shape.push_back(input_dims.d[i] == -1 ? 0 : input_dims.d[i]);
|
||||
}
|
||||
new_shape.push_back(1);
|
||||
nvinfer1::Dims new_dims = ConvertCudaDims(new_shape);
|
||||
if (new_dims.nbDims == -1) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
shuffle_layer->setReshapeDimensions(new_dims);
|
||||
// transpose
|
||||
nvinfer1::Permutation perm{};
|
||||
for (int i = 0; i < new_dims.nbDims; i++) {
|
||||
if (i < axis) {
|
||||
perm.order[i] = i;
|
||||
} else if (i == axis) {
|
||||
perm.order[i] = new_dims.nbDims - 1;
|
||||
} else {
|
||||
perm.order[i] = i - 1;
|
||||
}
|
||||
}
|
||||
nvinfer1::IShuffleLayer *trans_layer = ctx->network()->addShuffle(*shuffle_layer->getOutput(0));
|
||||
if (trans_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add transpose layer failed for special expand dims op ";
|
||||
return nullptr;
|
||||
}
|
||||
trans_layer->setFirstTranspose(perm);
|
||||
return trans_layer->getOutput(0);
|
||||
} else {
|
||||
std::vector<int64_t> new_shape;
|
||||
for (int i = 0; i < input_dims.nbDims; i++) {
|
||||
if (axis == i) {
|
||||
new_shape.push_back(1);
|
||||
}
|
||||
new_shape.push_back(input_dims.d[i] == -1 ? 0 : input_dims.d[i]);
|
||||
}
|
||||
if (axis == -1 || axis == input_dims.nbDims) {
|
||||
new_shape.push_back(1);
|
||||
}
|
||||
nvinfer1::Dims new_dims = ConvertCudaDims(new_shape);
|
||||
if (new_dims.nbDims == -1) {
|
||||
return nullptr;
|
||||
}
|
||||
shuffle_layer->setReshapeDimensions(new_dims);
|
||||
return shuffle_layer->getOutput(0);
|
||||
}
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *Broadcast(TensorRTContext *ctx, nvinfer1::ITensor *input, nvinfer1::ITensor *shape) {
|
||||
int rank = shape->getDimensions().d[0];
|
||||
|
||||
nvinfer1::Dims starts{rank};
|
||||
std::fill(starts.d, starts.d + rank, 0);
|
||||
nvinfer1::Dims strides{rank};
|
||||
std::fill(strides.d, strides.d + rank, 1);
|
||||
|
||||
auto slice_layer = ctx->network()->addSlice(*input, starts, {}, strides);
|
||||
slice_layer->setMode(nvinfer1::SliceMode::kWRAP);
|
||||
const int INPUT2 = 2;
|
||||
slice_layer->setInput(INPUT2, *shape);
|
||||
|
||||
auto shuffler_output = slice_layer->getOutput(0);
|
||||
if (shuffler_output == nullptr) {
|
||||
MS_LOG(ERROR) << "add slice layer failed";
|
||||
}
|
||||
return shuffler_output;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *Reshape(TensorRTContext *ctx, nvinfer1::ITensor *input, const std::vector<int64_t> &shape) {
|
||||
return Reshape(ctx, input, ConvertCudaDims(shape));
|
||||
}
|
||||
|
@ -723,10 +803,23 @@ nvinfer1::ITensor *Reshape(TensorRTContext *ctx, nvinfer1::ITensor *input, const
|
|||
return reshape_layer->getOutput(0);
|
||||
}
|
||||
|
||||
void DebugDims(const nvinfer1::Dims &dims) {
|
||||
MS_LOG(DEBUG) << "nbdim : " << dims.nbDims;
|
||||
void DebugDims(const std::string &key, const nvinfer1::Dims &dims) {
|
||||
MS_LOG(DEBUG) << key << ":" << dims.nbDims;
|
||||
for (int i = 0; i != dims.nbDims; ++i) {
|
||||
MS_LOG(DEBUG) << dims.d[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
nvinfer1::DataType GetNvinferDataType<float>() {
|
||||
return nvinfer1::DataType::kFLOAT;
|
||||
}
|
||||
|
||||
template <>
|
||||
nvinfer1::DataType GetNvinferDataType<int>() {
|
||||
return nvinfer1::DataType::kINT32;
|
||||
}
|
||||
|
||||
template nvinfer1::DataType GetNvinferDataType<float>();
|
||||
template nvinfer1::DataType GetNvinferDataType<int>();
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -138,7 +138,14 @@ nvinfer1::ITensor *ConvertConstantTensor1D(TensorRTContext *ctx, int *weights_ve
|
|||
|
||||
int ParseData2Vector(const TensorInfo &ms_tensor, std::vector<float> *dst);
|
||||
|
||||
void DebugDims(const nvinfer1::Dims &dims);
|
||||
void DebugDims(const std::string &key, const nvinfer1::Dims &dims);
|
||||
|
||||
nvinfer1::ITensor *ExpandDim(TensorRTContext *ctx, nvinfer1::ITensor *input_tensor, int axis);
|
||||
|
||||
nvinfer1::ITensor *Broadcast(TensorRTContext *ctx, nvinfer1::ITensor *input, nvinfer1::ITensor *shape);
|
||||
|
||||
template <typename T>
|
||||
nvinfer1::DataType GetNvinferDataType();
|
||||
|
||||
template <typename T1, typename T2>
|
||||
bool SameDims(const std::vector<T1> &shape1, const std::vector<T2> &shape2) {
|
||||
|
|
|
@ -76,11 +76,13 @@ std::vector<std::string> DefaultInferSession::GetOutputNames() { return std::vec
|
|||
std::vector<std::string> DefaultInferSession::GetInputNames() { return std::vector<std::string>(); }
|
||||
MutableTensorImplPtr DefaultInferSession::GetOutputByTensorName(const std::string &tensorName) { return nullptr; }
|
||||
MutableTensorImplPtr DefaultInferSession::GetInputByTensorName(const std::string &name) { return nullptr; }
|
||||
std::shared_ptr<InferSession> InferSession::CreateSession(const std::shared_ptr<Context> &context) {
|
||||
|
||||
std::shared_ptr<InferSession> InferSession::CreateSession(const std::shared_ptr<Context> &context,
|
||||
const ConfigInfos &config_info) {
|
||||
HandleContext(context);
|
||||
auto session_type = SelectSession(context);
|
||||
MS_LOG(DEBUG) << "Session type " << static_cast<int64_t>(session_type);
|
||||
return SessionRegistry::GetInstance().GetSession(session_type, context);
|
||||
return SessionRegistry::GetInstance().GetSession(session_type, context, config_info);
|
||||
}
|
||||
|
||||
void InferSession::HandleContext(const std::shared_ptr<Context> &context) {
|
||||
|
@ -148,7 +150,8 @@ SessionType InferSession::SelectSession(const std::shared_ptr<Context> &context)
|
|||
return kDefaultSession;
|
||||
}
|
||||
|
||||
static std::shared_ptr<InferSession> DefaultSessionCreator(const std::shared_ptr<Context> &ctx) {
|
||||
static std::shared_ptr<InferSession> DefaultSessionCreator(const std::shared_ptr<Context> &ctx,
|
||||
const ConfigInfos &config_infos) {
|
||||
auto session = std::make_shared<DefaultInferSession>(ctx);
|
||||
session->Init(ctx);
|
||||
return session;
|
||||
|
|
|
@ -29,12 +29,14 @@
|
|||
#include "extendrt/session/type.h"
|
||||
#include "common/mutable_tensor_impl.h"
|
||||
#include "extendrt/utils/kernel_graph_utils.h"
|
||||
#include "src/common/config_infos.h"
|
||||
|
||||
namespace mindspore {
|
||||
class InferSession : public std::enable_shared_from_this<InferSession> {
|
||||
public:
|
||||
virtual ~InferSession() = default;
|
||||
static std::shared_ptr<InferSession> CreateSession(const std::shared_ptr<Context> &context);
|
||||
static std::shared_ptr<InferSession> CreateSession(const std::shared_ptr<Context> &context,
|
||||
const ConfigInfos &config_info);
|
||||
static SessionType SelectSession(const std::shared_ptr<Context> &context);
|
||||
virtual Status Init(const std::shared_ptr<Context> &context) = 0;
|
||||
virtual Status CompileGraph(FuncGraphPtr graph, const void *data = nullptr, size_t size = 0) = 0;
|
||||
|
|
|
@ -181,7 +181,8 @@ MutableTensorImplPtr GraphSinkSession::GetInputByTensorName(const std::string &n
|
|||
}
|
||||
return nullptr;
|
||||
}
|
||||
static std::shared_ptr<InferSession> DelegateSessionCreator(const std::shared_ptr<Context> &ctx) {
|
||||
static std::shared_ptr<InferSession> DelegateSessionCreator(const std::shared_ptr<Context> &ctx,
|
||||
const ConfigInfos &config_infos) {
|
||||
auto &device_contexts = ctx->MutableDeviceInfo();
|
||||
if (device_contexts.empty()) {
|
||||
return nullptr;
|
||||
|
@ -189,7 +190,10 @@ static std::shared_ptr<InferSession> DelegateSessionCreator(const std::shared_pt
|
|||
auto device_type = device_contexts.at(0)->GetDeviceType();
|
||||
auto provider = device_contexts.at(0)->GetProvider();
|
||||
|
||||
auto delegate = DelegateRegistry::GetInstance().GetDelegate(device_type, provider, ctx);
|
||||
auto delegate = DelegateRegistry::GetInstance().GetDelegate(device_type, provider, ctx, config_infos);
|
||||
if (delegate == nullptr) {
|
||||
return nullptr;
|
||||
}
|
||||
auto session = std::make_shared<GraphSinkSession>(delegate);
|
||||
session->Init(ctx);
|
||||
return session;
|
||||
|
|
|
@ -26,18 +26,17 @@ SessionRegistry &SessionRegistry::GetInstance() {
|
|||
return instance;
|
||||
}
|
||||
|
||||
void SessionRegistry::RegSession(
|
||||
const mindspore::SessionType &session_type,
|
||||
const std::function<std::shared_ptr<InferSession>(const std::shared_ptr<Context> &)> &creator) {
|
||||
void SessionRegistry::RegSession(const mindspore::SessionType &session_type, const InferSessionRegFunc &creator) {
|
||||
session_map_[session_type] = creator;
|
||||
}
|
||||
|
||||
std::shared_ptr<InferSession> SessionRegistry::GetSession(const mindspore::SessionType &session_type,
|
||||
const std::shared_ptr<Context> &ctx) {
|
||||
const std::shared_ptr<Context> &ctx,
|
||||
const ConfigInfos &config_info) {
|
||||
auto it = session_map_.find(session_type);
|
||||
if (it == session_map_.end()) {
|
||||
return nullptr;
|
||||
}
|
||||
return it->second(ctx);
|
||||
return it->second(ctx, config_info);
|
||||
}
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -23,6 +23,9 @@
|
|||
#include "extendrt/infer_session.h"
|
||||
#include "include/api/context.h"
|
||||
namespace mindspore {
|
||||
using InferSessionRegFunc =
|
||||
std::function<std::shared_ptr<InferSession>(const std::shared_ptr<Context> &, const ConfigInfos &)>;
|
||||
|
||||
class SessionRegistry {
|
||||
public:
|
||||
SessionRegistry() = default;
|
||||
|
@ -30,21 +33,18 @@ class SessionRegistry {
|
|||
|
||||
static SessionRegistry &GetInstance();
|
||||
|
||||
void RegSession(const mindspore::SessionType &session_type,
|
||||
const std::function<std::shared_ptr<InferSession>(const std::shared_ptr<Context> &)> &creator);
|
||||
void RegSession(const mindspore::SessionType &session_type, const InferSessionRegFunc &creator);
|
||||
|
||||
std::shared_ptr<InferSession> GetSession(const mindspore::SessionType &session_type,
|
||||
const std::shared_ptr<Context> &);
|
||||
std::shared_ptr<InferSession> GetSession(const mindspore::SessionType &session_type, const std::shared_ptr<Context> &,
|
||||
const ConfigInfos &);
|
||||
|
||||
private:
|
||||
mindspore::HashMap<SessionType, std::function<std::shared_ptr<InferSession>(const std::shared_ptr<Context> &)>>
|
||||
session_map_;
|
||||
mindspore::HashMap<SessionType, InferSessionRegFunc> session_map_;
|
||||
};
|
||||
|
||||
class SessionRegistrar {
|
||||
public:
|
||||
SessionRegistrar(const mindspore::SessionType &session_type,
|
||||
const std::function<std::shared_ptr<InferSession>(const std::shared_ptr<Context> &)> &creator) {
|
||||
SessionRegistrar(const mindspore::SessionType &session_type, const InferSessionRegFunc &creator) {
|
||||
SessionRegistry::GetInstance().RegSession(session_type, creator);
|
||||
}
|
||||
~SessionRegistrar() = default;
|
||||
|
|
|
@ -294,7 +294,8 @@ std::vector<tensor::TensorPtr> LiteInferSession::ConvertToTensors(
|
|||
return tensors;
|
||||
}
|
||||
|
||||
static std::shared_ptr<InferSession> LiteInferSessionCreator(const std::shared_ptr<Context> &ctx) {
|
||||
static std::shared_ptr<InferSession> LiteInferSessionCreator(const std::shared_ptr<Context> &ctx,
|
||||
const ConfigInfos &config_infos) {
|
||||
auto session = std::make_shared<LiteInferSession>();
|
||||
session->Init(ctx);
|
||||
return session;
|
||||
|
|
|
@ -277,7 +277,8 @@ MutableTensorImplPtr SingleOpInferSession::GetInputByTensorName(const std::strin
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
static std::shared_ptr<InferSession> SingleOpSessionCreator(const std::shared_ptr<Context> &ctx) {
|
||||
static std::shared_ptr<InferSession> SingleOpSessionCreator(const std::shared_ptr<Context> &ctx,
|
||||
const ConfigInfos &config_infos) {
|
||||
auto session = std::make_shared<SingleOpInferSession>();
|
||||
session->Init(ctx);
|
||||
return session;
|
||||
|
|
|
@ -31,6 +31,44 @@
|
|||
namespace mindspore {
|
||||
const size_t max_depth = 128;
|
||||
GraphId KernelGraphUtils::graph_sum_ = 0;
|
||||
|
||||
std::vector<AnfNodePtr> KernelGraphUtils::GetKernelGraphOutputs(const KernelGraphPtr &func_graph) {
|
||||
if (!func_graph) {
|
||||
return {};
|
||||
}
|
||||
std::vector<AnfNodePtr> outputs = func_graph->outputs();
|
||||
while (true) {
|
||||
auto has_replace = false;
|
||||
for (auto it = outputs.begin(); it != outputs.end(); ++it) {
|
||||
auto output = *it;
|
||||
std::vector<AnfNodePtr> one_outputs;
|
||||
if (IsPrimitiveCNode(output, prim::kPrimDepend)) {
|
||||
auto depend = output->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(depend);
|
||||
output = depend->input(kRealInputIndexInDepend);
|
||||
}
|
||||
if (IsPrimitiveCNode(output, prim::kPrimMakeTuple)) {
|
||||
auto make_tuple = output->cast<CNodePtr>();
|
||||
MS_EXCEPTION_IF_NULL(make_tuple);
|
||||
auto &inputs = make_tuple->inputs();
|
||||
one_outputs = std::vector<AnfNodePtr>(inputs.begin() + 1, inputs.end());
|
||||
} else {
|
||||
one_outputs = {output};
|
||||
}
|
||||
if (one_outputs.size() != 1 || one_outputs[0] != output) {
|
||||
it = outputs.erase(it);
|
||||
outputs.insert(it, one_outputs.begin(), one_outputs.end());
|
||||
has_replace = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!has_replace) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
return outputs;
|
||||
}
|
||||
|
||||
KernelGraphPtr KernelGraphUtils::ConstructKernelGraph(const FuncGraphPtr &func_graph,
|
||||
std::vector<KernelGraphPtr> *all_out_graph,
|
||||
mindspore::device::DeviceType device_target) {
|
||||
|
@ -965,7 +1003,7 @@ void KernelGraphUtils::GetModelOutputsInfo(uint32_t graph_id, std::vector<tensor
|
|||
VectorRef vector_outputs;
|
||||
std::map<tensor::TensorPtr, session::KernelWithIndex> tensor_to_node;
|
||||
session::KernelMapTensor node_to_tensor;
|
||||
auto anf_outputs = kernel_graph->outputs();
|
||||
auto anf_outputs = KernelGraphUtils::GetKernelGraphOutputs(kernel_graph);
|
||||
for (auto &item : anf_outputs) {
|
||||
MS_EXCEPTION_IF_NULL(item);
|
||||
MS_LOG(INFO) << "Create node output[" << item->DebugString() << "]";
|
||||
|
|
|
@ -24,11 +24,12 @@
|
|||
#include <string>
|
||||
|
||||
#include "backend/common/session/kernel_graph.h"
|
||||
#include "include/api/visible.h"
|
||||
|
||||
namespace mindspore {
|
||||
using GraphId = uint32_t;
|
||||
using KernelGraph = mindspore::session::KernelGraph;
|
||||
class KernelGraphUtils : public std::enable_shared_from_this<KernelGraphUtils> {
|
||||
class MS_API KernelGraphUtils : public std::enable_shared_from_this<KernelGraphUtils> {
|
||||
public:
|
||||
KernelGraphUtils() = default;
|
||||
virtual ~KernelGraphUtils() = default;
|
||||
|
@ -37,6 +38,7 @@ class KernelGraphUtils : public std::enable_shared_from_this<KernelGraphUtils> {
|
|||
static KernelGraphUtils instance;
|
||||
return instance;
|
||||
}
|
||||
static std::vector<AnfNodePtr> GetKernelGraphOutputs(const KernelGraphPtr &func_graph);
|
||||
|
||||
KernelGraphPtr ConstructKernelGraph(const FuncGraphPtr &func_graph, std::vector<KernelGraphPtr> *all_out_graph,
|
||||
mindspore::device::DeviceType device_target);
|
||||
|
|
|
@ -22,6 +22,7 @@
|
|||
#include <memory>
|
||||
|
||||
#include "extendrt/utils/runtime_utils.h"
|
||||
#include "extendrt/utils/kernel_graph_utils.h"
|
||||
|
||||
#include "src/extendrt/infer_device_address.h"
|
||||
#include "include/common/utils/anfalgo.h"
|
||||
|
@ -88,7 +89,7 @@ void RuntimeUtils::CopyInputTensorsToKernelGraph(const std::vector<tensor::Tenso
|
|||
void RuntimeUtils::CopyOutputTensorsFromKernelGraph(std::vector<tensor::Tensor> *outputs, KernelGraphPtr kernel_graph) {
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
outputs->clear();
|
||||
auto graph_outputs = kernel_graph->outputs();
|
||||
auto graph_outputs = KernelGraphUtils::GetKernelGraphOutputs(kernel_graph);
|
||||
for (auto graph_output : graph_outputs) {
|
||||
auto real_output_with_index = common::AnfAlgo::VisitKernelWithReturnType(graph_output, 0);
|
||||
auto real_output = real_output_with_index.first;
|
||||
|
|
|
@ -94,7 +94,8 @@ class TensorDefaultImpl : public MutableTensorImpl {
|
|||
|
||||
void SetDeviceData(void *data) override { device_data_ = data; }
|
||||
void *GetDeviceData() override { return device_data_; }
|
||||
bool IsConst() const override { return false; }
|
||||
bool IsConst() const override { return is_const_; }
|
||||
void SetIsConst(bool is_const) { is_const_ = is_const; }
|
||||
|
||||
bool IsDevice() const override { return device_data_ != nullptr; }
|
||||
|
||||
|
@ -138,6 +139,8 @@ class TensorDefaultImpl : public MutableTensorImpl {
|
|||
mutable const void *data_ = nullptr;
|
||||
bool own_data_ = false;
|
||||
|
||||
bool is_const_ = false;
|
||||
|
||||
void ResizeData() const {
|
||||
if (data_ != nullptr && data_ != buffer_.Data()) {
|
||||
return;
|
||||
|
|
Loading…
Reference in New Issue