forked from mindspore-Ecosystem/mindspore
!40888 [MSLITE] Support TensorRT dynamic split
Merge pull request !40888 from zhangyongxian/dev_zhangyongxian_dsplit
This commit is contained in:
commit
6ecba5385f
|
@ -23,19 +23,53 @@ namespace mindspore::lite {
|
|||
int SplitTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
|
||||
const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != 1 && in_tensors.size() != INPUT_SIZE2) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
dynamic_shape_params_.support_dynamic_ = false;
|
||||
dynamic_shape_params_.support_hw_dynamic_ = false;
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *SplitTensorRT::GetDynamicSliceSize(TensorRTContext *ctx, nvinfer1::ITensor *input, size_t i) {
|
||||
auto in_tensor_shape = ctx->network()->addShape(*input)->getOutput(0);
|
||||
if (in_tensor_shape == nullptr) {
|
||||
MS_LOG(ERROR) << "add shape layer of input failed!";
|
||||
return nullptr;
|
||||
}
|
||||
auto len_tensor = ctx->ConvertTo1DTensor(static_cast<int>(size_splits_[i]));
|
||||
if (len_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "convert 1d tensor failed!";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *concat_input_tensors[INPUT_SIZE2];
|
||||
concat_input_tensors[0] = in_tensor_shape;
|
||||
concat_input_tensors[1] = len_tensor;
|
||||
auto concat_layer = ctx->network()->addConcatenation(concat_input_tensors, INPUT_SIZE2);
|
||||
if (concat_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add concat layer failed!";
|
||||
return nullptr;
|
||||
}
|
||||
concat_layer->setAxis(0);
|
||||
auto shape_and_len = concat_layer->getOutput(0);
|
||||
if (shape_and_len == nullptr) {
|
||||
MS_LOG(ERROR) << "get concat layer result failed!";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::vector<int> gather_slices(input->getDimensions().nbDims);
|
||||
std::iota(gather_slices.begin(), gather_slices.end(), 0);
|
||||
gather_slices[axis_] = gather_slices.size();
|
||||
auto gather_slices_tensor = ctx->ConvertTo1DTensor(gather_slices);
|
||||
nvinfer1::IGatherLayer *gather_layer = ctx->network()->addGather(*shape_and_len, *gather_slices_tensor, 0);
|
||||
if (gather_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add gather layer failed!";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return gather_layer->getOutput(0);
|
||||
}
|
||||
|
||||
int SplitTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
||||
ITensorHelper split_input;
|
||||
int ret = PreprocessInputs2SameDim(ctx, input(ctx, 0), &split_input);
|
||||
|
@ -77,16 +111,24 @@ int SplitTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
|||
for (int i = 0; i != output_num_; ++i) {
|
||||
nvinfer1::Dims start_dims = lite::ConvertCudaDims(0, input_nbdims);
|
||||
start_dims.d[axis_] = axis_dim_index;
|
||||
axis_dim_index += size_splits_[i];
|
||||
|
||||
nvinfer1::Dims size_dims = split_input.trt_tensor_->getDimensions();
|
||||
nvinfer1::Dims size_dims;
|
||||
nvinfer1::ITensor *size_tensor = nullptr;
|
||||
if (!IsDynamicInput(ctx, 0)) {
|
||||
size_dims = split_input.trt_tensor_->getDimensions();
|
||||
size_dims.d[axis_] = size_splits_[i];
|
||||
} else {
|
||||
size_tensor = GetDynamicSliceSize(ctx, split_input.trt_tensor_, i);
|
||||
}
|
||||
axis_dim_index += size_splits_[i];
|
||||
|
||||
slice_layer = ctx->network()->addSlice(*split_input.trt_tensor_, start_dims, size_dims, one_dims);
|
||||
if (slice_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add Slice op failed for TensorRT: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (size_tensor != nullptr) {
|
||||
slice_layer->setInput(INPUT_SIZE2, *size_tensor);
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *out_tensor = slice_layer->getOutput(0);
|
||||
if (type_ == schema::PrimitiveType_Unstack) {
|
||||
|
|
|
@ -36,10 +36,11 @@ class SplitTensorRT : public TensorRTOp {
|
|||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
|
||||
private:
|
||||
nvinfer1::ITensor *GetDynamicSliceSize(TensorRTContext *ctx, nvinfer1::ITensor *input, size_t i);
|
||||
int ParseParams(const ITensorHelper &helper);
|
||||
int64_t axis_;
|
||||
int64_t output_num_;
|
||||
std::vector<int64_t> size_splits_;
|
||||
int output_num_;
|
||||
std::vector<int> size_splits_;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_SPLIT_TENSORRT_H_
|
||||
|
|
|
@ -80,6 +80,11 @@ bool TensorRTOp::IsShapeKnown() {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool TensorRTOp::IsDynamicInput(TensorRTContext *ctx, size_t k) {
|
||||
nvinfer1::Dims dims = input(ctx, k).trt_tensor_->getDimensions();
|
||||
return std::any_of(dims.d, dims.d + dims.nbDims, [](int d) { return d == -1; });
|
||||
}
|
||||
|
||||
int TensorRTOp::Prepare(void **network_tensor_bindings, nvinfer1::ICudaEngine *engine) {
|
||||
if (op_binding_tensor_.size() != 0) {
|
||||
MS_LOG(ERROR) << "need special op Prepare for " << op_name_;
|
||||
|
|
|
@ -111,6 +111,7 @@ class TensorRTOp {
|
|||
bool GetSupportInputBool();
|
||||
|
||||
void SetSupportInputBool(bool support_input_bool);
|
||||
bool IsDynamicInput(TensorRTContext *ctx, size_t k);
|
||||
|
||||
private:
|
||||
int SetTransposeDynamicRange();
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
*/
|
||||
|
||||
#include "src/extendrt/delegate/tensorrt/tensorrt_context.h"
|
||||
#include "src/extendrt/delegate/tensorrt/tensorrt_utils.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
TensorRTContext::~TensorRTContext() {
|
||||
|
@ -22,6 +23,9 @@ TensorRTContext::~TensorRTContext() {
|
|||
network_->destroy();
|
||||
network_ = nullptr;
|
||||
}
|
||||
for (auto ptr : owner_memorys_) {
|
||||
free(ptr);
|
||||
}
|
||||
}
|
||||
|
||||
bool TensorRTContext::Init() {
|
||||
|
@ -72,4 +76,31 @@ ITensorHelper TensorRTContext::MsName2Tensor(const std::string &ms_name) {
|
|||
MS_LOG(WARNING) << "Get Tensorrt tensor by ms_tensor: " << ms_name << " fail!";
|
||||
return {};
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
nvinfer1::ITensor *TensorRTContext::ConvertTo1DTensor(T value) {
|
||||
return ConvertTo1DTensor(std::vector<T>{value});
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
nvinfer1::ITensor *TensorRTContext::ConvertTo1DTensor(const std::vector<T> &values) {
|
||||
void *ptr = malloc(values.size() * sizeof(T));
|
||||
const T *begin = &values[0];
|
||||
memcpy(ptr, reinterpret_cast<const void *>(begin), values.size() * sizeof(T));
|
||||
owner_memorys_.push_back(ptr);
|
||||
|
||||
nvinfer1::Weights weights{GetNvinferDataType<T>(), ptr, values.size()};
|
||||
nvinfer1::Dims dims{1, {values.size()}};
|
||||
nvinfer1::IConstantLayer *constant_tensor = network()->addConstant(dims, weights);
|
||||
if (constant_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "create constant_tensor failed.";
|
||||
return nullptr;
|
||||
}
|
||||
return constant_tensor->getOutput(0);
|
||||
}
|
||||
|
||||
template nvinfer1::ITensor *TensorRTContext::ConvertTo1DTensor(int value);
|
||||
template nvinfer1::ITensor *TensorRTContext::ConvertTo1DTensor(float value);
|
||||
template nvinfer1::ITensor *TensorRTContext::ConvertTo1DTensor(const std::vector<int> &values);
|
||||
template nvinfer1::ITensor *TensorRTContext::ConvertTo1DTensor(const std::vector<float> &values);
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -20,6 +20,7 @@
|
|||
#include <NvInfer.h>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include "src/extendrt/delegate/tensorrt/tensorrt_runtime.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
|
@ -41,12 +42,17 @@ class TensorRTContext {
|
|||
void RegisterTensorWithSameName(ITensorHelper tensor, const std::string &basename);
|
||||
bool HasTensor(const std::string &name) const;
|
||||
ITensorHelper MsName2Tensor(const std::string &ms_name);
|
||||
template <typename T>
|
||||
nvinfer1::ITensor *ConvertTo1DTensor(T value);
|
||||
template <typename T>
|
||||
nvinfer1::ITensor *ConvertTo1DTensor(const std::vector<T> &values);
|
||||
|
||||
private:
|
||||
int counter_{0};
|
||||
nvinfer1::INetworkDefinition *network_{nullptr};
|
||||
std::unordered_map<std::string, ITensorHelper> ms_name2trt_tensor_;
|
||||
TensorRTRuntime *runtime_{nullptr};
|
||||
std::vector<void *> owner_memorys_;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_TENSORRT_CONTEXT_H_
|
||||
|
|
|
@ -176,7 +176,6 @@ nvinfer1::ITensor *ConvertConstantTensor(TensorRTContext *ctx, const mindspore::
|
|||
}
|
||||
ctx->RegisterLayer(constant_tensor, ms_tensor.Name() + "_" + op_name);
|
||||
auto tensor_ptr = constant_tensor->getOutput(0);
|
||||
// ctx->RegisterTensor(tensor_ptr, ms_tensor.Name());
|
||||
return tensor_ptr;
|
||||
}
|
||||
|
||||
|
@ -741,4 +740,17 @@ void DebugDims(const std::string &key, const nvinfer1::Dims &dims) {
|
|||
MS_LOG(DEBUG) << dims.d[i];
|
||||
}
|
||||
}
|
||||
|
||||
template <>
|
||||
nvinfer1::DataType GetNvinferDataType<float>() {
|
||||
return nvinfer1::DataType::kFLOAT;
|
||||
}
|
||||
|
||||
template <>
|
||||
nvinfer1::DataType GetNvinferDataType<int>() {
|
||||
return nvinfer1::DataType::kINT32;
|
||||
}
|
||||
|
||||
template nvinfer1::DataType GetNvinferDataType<float>();
|
||||
template nvinfer1::DataType GetNvinferDataType<int>();
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -138,6 +138,9 @@ int ParseData2Vector(const mindspore::MSTensor &ms_tensor, std::vector<float> *d
|
|||
|
||||
void DebugDims(const std::string &key, const nvinfer1::Dims &dims);
|
||||
|
||||
template <typename T>
|
||||
nvinfer1::DataType GetNvinferDataType();
|
||||
|
||||
template <typename T1, typename T2>
|
||||
bool SameDims(const std::vector<T1> &shape1, const std::vector<T2> &shape2) {
|
||||
if (shape1.size() != shape2.size()) {
|
||||
|
|
Loading…
Reference in New Issue