!22945 [MSLITE] optimize Execute time for tensorrt delegate by remove unnecessary transpose op

Merge pull request !22945 from Liu_Xuu/trt_0903_transpose
This commit is contained in:
i-robot 2021-09-08 07:58:01 +00:00 committed by Gitee
commit 277cf0d892
26 changed files with 463 additions and 250 deletions

View File

@ -15,7 +15,6 @@
*/
#include "src/delegate/tensorrt/op/activation_tensorrt.h"
#include "src/delegate/tensorrt/tensorrt_utils.h"
namespace mindspore::lite {
int ActivationTensorRT::IsSupport(const schema::Primitive *primitive,
@ -58,8 +57,8 @@ int ActivationTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
}
float alpha = activation_op->alpha();
nvinfer1::IActivationLayer *activation_layer =
ActivationTensorRT::AddActivation(network, activation_op->activation_type(), alpha, tensorrt_in_tensors_[0]);
nvinfer1::IActivationLayer *activation_layer = ActivationTensorRT::AddActivation(
network, activation_op->activation_type(), alpha, tensorrt_in_tensors_[0].trt_tensor_);
if (activation_layer == nullptr) {
MS_LOG(ERROR) << "add activation op failed for TensorRT.";
return RET_ERROR;
@ -67,7 +66,7 @@ int ActivationTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
activation_layer->setName(op_name_.c_str());
activation_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
this->AddInnerOutTensors(activation_layer->getOutput(0));
this->AddInnerOutTensors(ITensorHelper{activation_layer->getOutput(0), tensorrt_in_tensors_[0].format_});
return RET_OK;
}

View File

@ -24,7 +24,7 @@ int ConcateTensorRT::IsSupport(const schema::Primitive *primitive, const std::ve
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() < 1) {
if (in_tensors.size() != INPUT_SIZE2) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
return RET_ERROR;
}
@ -50,11 +50,46 @@ int ConcateTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
return RET_ERROR;
}
nvinfer1::ITensor *trt_input_tensors[tensorrt_in_tensors_.size()];
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims !=
tensorrt_in_tensors_[1].trt_tensor_->getDimensions().nbDims) {
MS_LOG(ERROR) << "dims of inputs is invalid for " << op_name_;
return RET_ERROR;
}
// make sure two inputs have same format
Format out_format = tensorrt_in_tensors_[0].format_;
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D) {
if (tensorrt_in_tensors_[0].format_ == tensorrt_in_tensors_[1].format_) {
for (size_t i = 0; i < tensorrt_in_tensors_.size(); i++) {
trt_input_tensors[i] = tensorrt_in_tensors_[i].trt_tensor_;
}
} else {
// when inputs format are different, change to NHWC
out_format = Format::NHWC;
int transpose_tensor_index = tensorrt_in_tensors_[0].format_ == Format::NCHW ? 0 : 1;
trt_input_tensors[1 - transpose_tensor_index] = tensorrt_in_tensors_[1 - transpose_tensor_index].trt_tensor_;
nvinfer1::IShuffleLayer *transpose_layer =
NCHW2NHWC(network, *tensorrt_in_tensors_[transpose_tensor_index].trt_tensor_);
if (transpose_layer == nullptr) {
MS_LOG(ERROR) << "op action convert failed";
return RET_ERROR;
}
trt_input_tensors[transpose_tensor_index] = transpose_layer->getOutput(0);
}
} else {
for (size_t i = 0; i < tensorrt_in_tensors_.size(); i++) {
trt_input_tensors[i] = tensorrt_in_tensors_[i].trt_tensor_;
}
}
int axis = RET_INVALID_OP_ATTR;
axis = concate_op->axis();
nvinfer1::ITensor *trt_input_tensors[tensorrt_in_tensors_.size()];
std::copy(tensorrt_in_tensors_.begin(), tensorrt_in_tensors_.end(), trt_input_tensors);
if (out_format == Format::NCHW) {
// when inputs all NCHW, change axis
axis = ConvertAxisFromNHWC2NCHW(axis);
MS_LOG(INFO) << "concate axis change to " << axis << " when using NCHW format.";
}
nvinfer1::IConcatenationLayer *concate_layer =
network->addConcatenation(trt_input_tensors, static_cast<int>(tensorrt_in_tensors_.size()));
@ -68,8 +103,7 @@ int ConcateTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
}
concate_layer->setName(op_name_.c_str());
concate_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
this->AddInnerOutTensors(concate_layer->getOutput(0));
this->AddInnerOutTensors(ITensorHelper{concate_layer->getOutput(0), out_format});
return RET_OK;
}
} // namespace mindspore::lite

View File

@ -16,7 +16,6 @@
#include "src/delegate/tensorrt/op/convolution_tensorrt.h"
#include "src/delegate/tensorrt/op/activation_tensorrt.h"
#include "src/delegate/tensorrt/tensorrt_utils.h"
namespace mindspore::lite {
constexpr int BIAS_INDEX = 2;
@ -28,7 +27,7 @@ int ConvolutionTensorRT::IsSupport(const schema::Primitive *primitive,
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() != 2 && in_tensors.size() != 3) {
if (in_tensors.size() != INPUT_SIZE2 && in_tensors.size() != INPUT_SIZE3) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
return RET_ERROR;
}
@ -36,6 +35,10 @@ int ConvolutionTensorRT::IsSupport(const schema::Primitive *primitive,
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
return RET_ERROR;
}
if (in_tensors[0].format() != Format::NHWC && in_tensors[0].format() != Format::NCHW) {
MS_LOG(ERROR) << "Unsupported input tensor format of " << in_tensors[0].format();
return RET_ERROR;
}
return RET_OK;
}
@ -49,13 +52,19 @@ int ConvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
MS_LOG(ERROR) << "op action convert failed";
return RET_ERROR;
}
// transpose: NHWC->NCHW
nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(network, *tensorrt_in_tensors_[0]);
if (transpose_layer_in == nullptr) {
MS_LOG(ERROR) << "transpose: NHWC->NCHW failed";
return RET_ERROR;
nvinfer1::ITensor *conv_input = tensorrt_in_tensors_[0].trt_tensor_;
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
tensorrt_in_tensors_[0].format_ == Format::NHWC) {
// transpose: NHWC->NCHW
nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(network, *tensorrt_in_tensors_[0].trt_tensor_);
if (transpose_layer_in == nullptr) {
MS_LOG(ERROR) << "transpose: NHWC->NCHW failed";
return RET_ERROR;
}
transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
conv_input = transpose_layer_in->getOutput(0);
}
transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
// transpose weight
const mindspore::MSTensor &weight_tensor = in_tensors_[1];
@ -86,7 +95,7 @@ int ConvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
}
nvinfer1::IConvolutionLayer *conv_layer =
network->addConvolutionNd(*transpose_layer_in->getOutput(0), nbOutputMaps, kernelSize, kernelWeights, biasWeights);
network->addConvolutionNd(*conv_input, nbOutputMaps, kernelSize, kernelWeights, biasWeights);
if (conv_layer == nullptr) {
MS_LOG(ERROR) << "ConvolutionLayer failed";
@ -111,15 +120,8 @@ int ConvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
activation_layer->setName((op_name_ + "_activation").c_str());
}
// transpose: NCHW->NHWC
nvinfer1::IShuffleLayer *transpose_layer_out = NCHW2NHWC(network, *activation_layer->getOutput(0));
if (transpose_layer_out == nullptr) {
MS_LOG(ERROR) << "op action convert failed";
return RET_ERROR;
}
transpose_layer_out->setName((op_name_ + "_transpose2NHWC").c_str());
transpose_layer_out->getOutput(0)->setName(out_tensors_[0].Name().c_str());
this->AddInnerOutTensors(transpose_layer_out->getOutput(0));
activation_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
this->AddInnerOutTensors(ITensorHelper{activation_layer->getOutput(0), Format::NCHW});
return RET_OK;
}

View File

@ -16,7 +16,6 @@
#include "src/delegate/tensorrt/op/deconvolution_tensorrt.h"
#include "src/delegate/tensorrt/op/activation_tensorrt.h"
#include "src/delegate/tensorrt/tensorrt_utils.h"
#include "nnacl/pack.h"
namespace mindspore::lite {
@ -35,6 +34,10 @@ int DeconvolutionTensorRT::IsSupport(const schema::Primitive *primitive,
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
return RET_ERROR;
}
if (in_tensors[0].format() != Format::NHWC && in_tensors[0].format() != Format::NCHW) {
MS_LOG(ERROR) << "Unsupported input tensor format of " << in_tensors[0].format();
return RET_ERROR;
}
return RET_OK;
}
int DeconvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
@ -47,13 +50,18 @@ int DeconvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
MS_LOG(ERROR) << "op action convert failed";
return RET_ERROR;
}
// transpose: NHWC->NCHW
nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(network, *tensorrt_in_tensors_[0]);
if (transpose_layer_in == nullptr) {
MS_LOG(ERROR) << "transpose: NHWC->NCHW failed";
return RET_ERROR;
nvinfer1::ITensor *deconv_input = tensorrt_in_tensors_[0].trt_tensor_;
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
tensorrt_in_tensors_[0].format_ == Format::NHWC) {
// transpose: NHWC->NCHW
nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(network, *tensorrt_in_tensors_[0].trt_tensor_);
if (transpose_layer_in == nullptr) {
MS_LOG(ERROR) << "transpose: NHWC->NCHW failed";
return RET_ERROR;
}
transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
deconv_input = transpose_layer_in->getOutput(0);
}
transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
// transpose weight
const mindspore::MSTensor &weight_tensor = in_tensors_[1];
@ -83,8 +91,8 @@ int DeconvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
biasWeights.values = nullptr;
}
nvinfer1::IDeconvolutionLayer *deconv_layer = network->addDeconvolutionNd(
*transpose_layer_in->getOutput(0), nbOutputMaps, kernelSize, kernelWeights, biasWeights);
nvinfer1::IDeconvolutionLayer *deconv_layer =
network->addDeconvolutionNd(*deconv_input, nbOutputMaps, kernelSize, kernelWeights, biasWeights);
if (deconv_layer == nullptr) {
MS_LOG(ERROR) << "DeconvolutionLayer failed";
@ -109,15 +117,8 @@ int DeconvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
activation_layer->setName((op_name_ + "_activation").c_str());
}
// transpose: NCHW->NHWC
nvinfer1::IShuffleLayer *transpose_layer_out = NCHW2NHWC(network, *activation_layer->getOutput(0));
if (transpose_layer_out == nullptr) {
MS_LOG(ERROR) << "op action convert failed";
return RET_ERROR;
}
transpose_layer_out->setName((op_name_ + "_transpose2NHWC").c_str());
transpose_layer_out->getOutput(0)->setName(out_tensors_[0].Name().c_str());
this->AddInnerOutTensors(transpose_layer_out->getOutput(0));
activation_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
this->AddInnerOutTensors(ITensorHelper{activation_layer->getOutput(0), Format::NCHW});
return RET_OK;
}

View File

@ -80,26 +80,38 @@ int ElementWiseTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
MS_LOG(ERROR) << "network or input tensor size is invalid";
return RET_ERROR;
}
first_in_tensor_index_ = strcmp(tensorrt_in_tensors_[0]->getName(), in_tensors_[0].Name().c_str()) == 0 ? 0 : 1;
// add elementwise
first_in_tensor_index_ =
strcmp(tensorrt_in_tensors_[0].trt_tensor_->getName(), in_tensors_[0].Name().c_str()) == 0 ? 0 : 1;
if (this->tensorrt_in_tensors_.size() != INPUT_SIZE2) {
// create ITensor from MS constant tensor of index 1 - first_in_tensor_index_
nvinfer1::ITensor *constant_input = nullptr;
if (this->in_tensors_[1 - first_in_tensor_index_].Shape().size() == 0) {
constant_input = lite::ConvertScalarToITensor(network, this->in_tensors_[first_in_tensor_index_].Shape().size(),
in_tensors_[1 - first_in_tensor_index_].Data().get());
} else {
constant_input = lite::ConvertConstantTensor(network, in_tensors_[1 - first_in_tensor_index_]);
int ret = AddConstTensor(network);
if (ret != RET_OK) {
MS_LOG(ERROR) << "AddConstTensor failed for " << op_name_;
return ret;
}
if (constant_input == nullptr) {
MS_LOG(ERROR) << "create Itensor from constant tensor failed: " << op_name_;
return RET_ERROR;
}
this->AddInnerInTensors(constant_input);
}
nvinfer1::IElementWiseLayer *cal_layer = network->addElementWise(
*tensorrt_in_tensors_[first_in_tensor_index_], *tensorrt_in_tensors_[1 - first_in_tensor_index_], element_wise_op_);
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
tensorrt_in_tensors_[0].format_ != tensorrt_in_tensors_[1].format_) {
// when inputs format are different, change to NHWC
int transpose_input_tensor = tensorrt_in_tensors_[0].format_ == Format::NCHW ? 0 : 1;
nvinfer1::IShuffleLayer *transpose_layer =
NCHW2NHWC(network, *tensorrt_in_tensors_[transpose_input_tensor].trt_tensor_);
if (transpose_layer == nullptr) {
MS_LOG(ERROR) << "op action convert failed";
return RET_ERROR;
}
transpose_layer->setName((op_name_ + "_input_transpose2NHWC").c_str());
tensorrt_in_tensors_[transpose_input_tensor].trt_tensor_ = transpose_layer->getOutput(0);
tensorrt_in_tensors_[transpose_input_tensor].format_ = Format::NHWC;
} else if (tensorrt_in_tensors_[0].format_ != tensorrt_in_tensors_[1].format_) {
MS_LOG(ERROR) << "elementwise op inputs are in different format: " << op_name_;
return RET_ERROR;
}
nvinfer1::IElementWiseLayer *cal_layer =
network->addElementWise(*tensorrt_in_tensors_[first_in_tensor_index_].trt_tensor_,
*tensorrt_in_tensors_[1 - first_in_tensor_index_].trt_tensor_, element_wise_op_);
if (cal_layer == nullptr) {
MS_LOG(ERROR) << "addElementWise failed for TensorRT.";
@ -129,9 +141,8 @@ int ElementWiseTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
MS_LOG(WARNING) << "deal with scale and shift for pow op";
}
}
op_out_tensor->setName(out_tensors_[0].Name().c_str());
this->AddInnerOutTensors(op_out_tensor);
this->AddInnerOutTensors(ITensorHelper{op_out_tensor, tensorrt_in_tensors_[1].format_});
return RET_OK;
}
@ -184,4 +195,26 @@ nvinfer1::ITensor *ElementWiseTensorRT::AddActivation(nvinfer1::INetworkDefiniti
}
return activation_out_tensor;
}
int ElementWiseTensorRT::AddConstTensor(nvinfer1::INetworkDefinition *network) {
// create ITensor from MS constant tensor of index 1 - first_in_tensor_index_
nvinfer1::ITensor *constant_input = nullptr;
if (this->in_tensors_[1 - first_in_tensor_index_].Shape().size() == 0) {
constant_input = lite::ConvertScalarToITensor(network, this->in_tensors_[first_in_tensor_index_].Shape().size(),
in_tensors_[1 - first_in_tensor_index_].Data().get());
if (constant_input == nullptr) {
MS_LOG(ERROR) << "create Itensor from constant tensor failed: " << op_name_;
return RET_ERROR;
}
this->AddInnerInTensors(ITensorHelper{constant_input, tensorrt_in_tensors_[0].format_});
} else {
constant_input = lite::ConvertConstantTensor(network, in_tensors_[1 - first_in_tensor_index_]);
if (constant_input == nullptr) {
MS_LOG(ERROR) << "create Itensor from constant tensor failed: " << op_name_;
return RET_ERROR;
}
this->AddInnerInTensors(ITensorHelper{constant_input, Format::NHWC});
}
return RET_OK;
}
} // namespace mindspore::lite

View File

@ -37,6 +37,8 @@ class ElementWiseTensorRT : public TensorRTOp {
private:
nvinfer1::ITensor *AddActivation(nvinfer1::INetworkDefinition *network, nvinfer1::ITensor *in_tensor);
int AddConstTensor(nvinfer1::INetworkDefinition *network);
nvinfer1::ElementWiseOperation element_wise_op_;
// index of first input MSTensor in the trt input tensor vector

View File

@ -59,15 +59,30 @@ int GatherTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
MS_LOG(ERROR) << "add a new tensor failed for TensorRT GatherTensorRTOp.";
return RET_ERROR;
}
nvinfer1::IGatherLayer *gather_layer =
network->addGather(*tensorrt_in_tensors_[0], *add_tensor /* indices */, axis_ /* axis */);
nvinfer1::ITensor *gather_input = tensorrt_in_tensors_[0].trt_tensor_;
Format out_format = tensorrt_in_tensors_[0].format_;
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
tensorrt_in_tensors_[0].format_ == Format::NCHW) {
// transpose: NCHW->NHWC
nvinfer1::IShuffleLayer *transpose_layer_in = NCHW2NHWC(network, *tensorrt_in_tensors_[0].trt_tensor_);
if (transpose_layer_in == nullptr) {
MS_LOG(ERROR) << "op action convert failed";
return RET_ERROR;
}
transpose_layer_in->setName((op_name_ + "_transpose2NHWC").c_str());
gather_input = transpose_layer_in->getOutput(0);
out_format = Format::NHWC;
}
nvinfer1::IGatherLayer *gather_layer = network->addGather(*gather_input, *add_tensor /* indices */, axis_ /* axis */);
if (gather_layer == nullptr) {
MS_LOG(ERROR) << "addGather failed for TensorRT.";
return RET_ERROR;
}
gather_layer->setName(op_name_.c_str());
gather_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
this->AddInnerOutTensors(gather_layer->getOutput(0));
this->AddInnerOutTensors(ITensorHelper{gather_layer->getOutput(0), out_format});
return RET_OK;
}
} // namespace mindspore::lite

View File

@ -43,7 +43,22 @@ int MatMulTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
transpose_b_ = primitive->transpose_b() ? nvinfer1::MatrixOperation::kTRANSPOSE : nvinfer1::MatrixOperation::kNONE;
auto weight = ConvertTensorWithExpandDims(network, in_tensors_[1], in_tensors_[0].Shape().size());
auto matmul_layer = network->addMatrixMultiply(*tensorrt_in_tensors_[0], transpose_a_, *weight, transpose_b_);
nvinfer1::ITensor *matmul_input = tensorrt_in_tensors_[0].trt_tensor_;
Format out_format = tensorrt_in_tensors_[0].format_;
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
tensorrt_in_tensors_[0].format_ == Format::NCHW) {
// transpose: NCHW->NHWC
nvinfer1::IShuffleLayer *transpose_layer_in = NCHW2NHWC(network, *tensorrt_in_tensors_[0].trt_tensor_);
if (transpose_layer_in == nullptr) {
MS_LOG(ERROR) << "op action convert failed";
return RET_ERROR;
}
transpose_layer_in->setName((op_name_ + "_transpose2NHWC").c_str());
matmul_input = transpose_layer_in->getOutput(0);
out_format = Format::NHWC;
}
auto matmul_layer = network->addMatrixMultiply(*matmul_input, transpose_a_, *weight, transpose_b_);
matmul_layer->setName(op_name_.c_str());
nvinfer1::ITensor *out_tensor = matmul_layer->getOutput(0);
@ -56,7 +71,7 @@ int MatMulTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
}
out_tensor->setName(out_tensors_[0].Name().c_str());
this->AddInnerOutTensors(out_tensor);
this->AddInnerOutTensors(ITensorHelper{out_tensor, out_format});
return RET_OK;
}
} // namespace mindspore::lite

View File

@ -49,6 +49,10 @@ int PadTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
MS_LOG(ERROR) << "Unsupported padding mode: " << pad_primitive << ", for op: " << op_name_;
return RET_ERROR;
}
if (in_tensors[0].format() != Format::NHWC && in_tensors[0].format() != Format::NCHW) {
MS_LOG(ERROR) << "Unsupported input tensor format of " << in_tensors[0].format();
return RET_ERROR;
}
constant_value_ = pad_primitive->constant_value();
return RET_OK;
}
@ -56,18 +60,24 @@ int PadTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
int PadTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
mindspore::MSTensor &pad_tensor = in_tensors_[1];
int element_cnt = std::accumulate(pad_tensor.Shape().begin(), pad_tensor.Shape().end(), 1, std::multiplies<int>());
if (element_cnt != tensorrt_in_tensors_[0]->getDimensions().nbDims * 2) {
if (element_cnt != tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims * 2) {
MS_LOG(ERROR) << "pad tensor cnt is invalid. cnt: " << element_cnt
<< ", input tensor dims cnt: " << tensorrt_in_tensors_[0]->getDimensions().nbDims;
<< ", input tensor dims cnt: " << tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims;
return RET_ERROR;
}
// transpose: NHWC->NCHW
nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(network, *tensorrt_in_tensors_[0]);
if (transpose_layer_in == nullptr) {
MS_LOG(ERROR) << "transpose: NHWC->NCHW failed";
return RET_ERROR;
nvinfer1::ITensor *pad_input = tensorrt_in_tensors_[0].trt_tensor_;
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
tensorrt_in_tensors_[0].format_ == Format::NHWC) {
// transpose: NHWC->NCHW
nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(network, *tensorrt_in_tensors_[0].trt_tensor_);
if (transpose_layer_in == nullptr) {
MS_LOG(ERROR) << "transpose: NHWC->NCHW failed";
return RET_ERROR;
}
transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
pad_input = transpose_layer_in->getOutput(0);
}
transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
// trt 6 only support 2D padding
const int *padding_data = reinterpret_cast<const int *>(in_tensors_[1].Data().get());
@ -84,7 +94,7 @@ int PadTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
MS_LOG(INFO) << "prePadding: " << *(padding_data + 2) << ", " << *(padding_data + 4);
MS_LOG(INFO) << "postPadding: " << *(padding_data + 3) << ", " << *(padding_data + 5);
padding_layer = network->addPadding(*transpose_layer_in->getOutput(0), prePadding, postPadding);
padding_layer = network->addPadding(*pad_input, prePadding, postPadding);
} else {
MS_LOG(ERROR) << "need check for pad_tensor dims: " << op_name_
<< ", pad_tensor ElementNum: " << pad_tensor.ElementNum();
@ -95,17 +105,8 @@ int PadTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
return RET_ERROR;
}
padding_layer->setName(op_name_.c_str());
// transpose: NCHW->NHWC
nvinfer1::IShuffleLayer *transpose_layer_out = NCHW2NHWC(network, *padding_layer->getOutput(0));
if (transpose_layer_out == nullptr) {
MS_LOG(ERROR) << "op action convert failed";
return RET_ERROR;
}
transpose_layer_out->setName((op_name_ + "_transpose2NHWC").c_str());
transpose_layer_out->getOutput(0)->setName(out_tensors_[0].Name().c_str());
this->AddInnerOutTensors(transpose_layer_out->getOutput(0));
padding_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
this->AddInnerOutTensors(ITensorHelper{padding_layer->getOutput(0), Format::NCHW});
return RET_OK;
}
} // namespace mindspore::lite

View File

@ -34,6 +34,10 @@ int PoolTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
return RET_ERROR;
}
if (in_tensors[0].format() != Format::NHWC && in_tensors[0].format() != Format::NCHW) {
MS_LOG(ERROR) << "Unsupported input tensor format of " << in_tensors[0].format();
return RET_ERROR;
}
return RET_OK;
}
@ -47,13 +51,18 @@ int PoolTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
MS_LOG(ERROR) << "invalid input tensor size: " << tensorrt_in_tensors_.size();
return RET_ERROR;
}
// transpose: NHWC->NCHW
nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(network, *tensorrt_in_tensors_[0]);
if (transpose_layer_in == nullptr) {
MS_LOG(ERROR) << "transpose: NHWC->NCHW failed";
return RET_ERROR;
nvinfer1::ITensor *pool_input = tensorrt_in_tensors_[0].trt_tensor_;
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
tensorrt_in_tensors_[0].format_ == Format::NHWC) {
// transpose: NHWC->NCHW
nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(network, *tensorrt_in_tensors_[0].trt_tensor_);
if (transpose_layer_in == nullptr) {
MS_LOG(ERROR) << "transpose: NHWC->NCHW failed";
return RET_ERROR;
}
transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
pool_input = transpose_layer_in->getOutput(0);
}
transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
// pooling layer
nvinfer1::PoolingType pooling_type = nvinfer1::PoolingType::kAVERAGE;
@ -64,8 +73,7 @@ int PoolTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
}
std::vector<int64_t> kernel_size_val = std::vector<int64_t>(kernel_size->begin(), kernel_size->end());
nvinfer1::Dims windowSize = lite::ConvertCudaDims(kernel_size_val);
nvinfer1::IPoolingLayer *pooling_layer =
network->addPoolingNd(*transpose_layer_in->getOutput(0), pooling_type, windowSize);
nvinfer1::IPoolingLayer *pooling_layer = network->addPoolingNd(*pool_input, pooling_type, windowSize);
if (pooling_layer == nullptr) {
MS_LOG(ERROR) << "addPoolingNd failed for TensorRT.";
return RET_ERROR;
@ -86,15 +94,8 @@ int PoolTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
}
activation_layer->setName((op_name_ + "_activation").c_str());
}
// transpose: NCHW->NHWC
nvinfer1::IShuffleLayer *transpose_layer_out = NCHW2NHWC(network, *activation_layer->getOutput(0));
if (transpose_layer_out == nullptr) {
MS_LOG(ERROR) << "op action convert failed";
return RET_ERROR;
}
transpose_layer_out->setName((op_name_ + "_transpose2NHWC").c_str());
transpose_layer_out->getOutput(0)->setName(out_tensors_[0].Name().c_str());
this->AddInnerOutTensors(transpose_layer_out->getOutput(0));
activation_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
this->AddInnerOutTensors(ITensorHelper{activation_layer->getOutput(0), Format::NCHW});
return RET_OK;
}

View File

@ -28,7 +28,7 @@ int ReduceTensorRT::IsSupport(const schema::Primitive *primitive, const std::vec
MS_LOG(ERROR) << "convert failed";
return RET_ERROR;
}
if (in_tensors.size() != 2) {
if (in_tensors.size() != INPUT_SIZE2) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
}
if (out_tensors.size() != 1) {
@ -55,23 +55,17 @@ int ReduceTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
return RET_ERROR;
}
bool keep_dims = reduce_op->keep_dims();
// axis
uint32_t reduceAxes = 0;
mindspore::MSTensor axis_tensor = this->in_tensors_[1];
if (axis_tensor.Data() == nullptr) {
MS_LOG(ERROR) << "invalid axis_tensor";
return RET_ERROR;
nvinfer1::ITensor *reduce_input = tensorrt_in_tensors_[0].trt_tensor_;
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
tensorrt_in_tensors_[0].format_ == Format::NCHW) {
out_format_ = Format::NHWC;
} else {
out_format_ = tensorrt_in_tensors_[0].format_;
}
if (axis_tensor.DataType() != DataType::kNumberTypeInt32) {
MS_LOG(WARNING) << "not int data type";
}
int *axis_data = reinterpret_cast<int *>(axis_tensor.MutableData());
for (int i = 0; i < axis_tensor.ElementNum(); i++) {
reduceAxes |= (16 - (1u << *axis_data));
axis_data++;
}
MS_LOG(INFO) << "reduceAxes: " << reduceAxes;
nvinfer1::IReduceLayer *layer = network->addReduce(*tensorrt_in_tensors_[0], reduce_op_, reduceAxes, keep_dims);
uint32_t reduceAxis = GetAxis();
nvinfer1::IReduceLayer *layer = network->addReduce(*reduce_input, reduce_op_, reduceAxis, keep_dims);
if (layer == nullptr) {
MS_LOG(ERROR) << "addReduce failed for TensorRT.";
return RET_ERROR;
@ -84,7 +78,29 @@ int ReduceTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
return RET_ERROR;
}
out_tensor->setName(out_tensors_[0].Name().c_str());
this->AddInnerOutTensors(out_tensor);
this->AddInnerOutTensors(ITensorHelper{out_tensor, out_format_});
return RET_OK;
}
uint32_t ReduceTensorRT::GetAxis() {
// axis
uint32_t reduceAxis = 0;
mindspore::MSTensor axis_tensor = this->in_tensors_[1];
if (axis_tensor.Data() == nullptr) {
MS_LOG(ERROR) << "invalid axis_tensor";
return reduceAxis;
}
if (axis_tensor.DataType() != DataType::kNumberTypeInt32) {
MS_LOG(WARNING) << "not int data type";
}
int *axis_data = reinterpret_cast<int *>(axis_tensor.MutableData());
bool need_transpose_axis =
(out_format_ == Format::NCHW) && (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D);
for (int i = 0; i < axis_tensor.ElementNum(); i++) {
int format_axis_data = need_transpose_axis ? ConvertAxisFromNHWC2NCHW(*axis_data) : *axis_data;
reduceAxis |= (16 - (1u << format_axis_data));
axis_data++;
}
MS_LOG(INFO) << "reduceAxis: " << reduceAxis;
return reduceAxis;
}
} // namespace mindspore::lite

View File

@ -36,6 +36,7 @@ class ReduceTensorRT : public TensorRTOp {
const std::vector<mindspore::MSTensor> &out_tensors) override;
private:
uint32_t GetAxis();
std::map<schema::ReduceMode, nvinfer1::ReduceOperation> reduce_ops_ = {
{schema::ReduceMode::ReduceMode_ReduceMean, nvinfer1::ReduceOperation::kAVG},
{schema::ReduceMode::ReduceMode_ReduceMax, nvinfer1::ReduceOperation::kMAX},
@ -44,6 +45,7 @@ class ReduceTensorRT : public TensorRTOp {
{schema::ReduceMode::ReduceMode_ReduceSum, nvinfer1::ReduceOperation::kSUM},
};
nvinfer1::ReduceOperation reduce_op_;
Format out_format_;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_REDUCE_TENSORRT_H_

View File

@ -17,6 +17,7 @@
#include <numeric>
#include <functional>
#include "src/delegate/tensorrt/op/scale_tensorrt.h"
#include "src/delegate/tensorrt/op/activation_tensorrt.h"
#include "src/delegate/tensorrt/tensorrt_utils.h"
namespace mindspore::lite {
@ -53,14 +54,26 @@ int ScaleTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
}
schema::ActivationType activation_type = scale_op->activation_type();
nvinfer1::ITensor *scale_in_tensor = tensorrt_in_tensors_[0];
// unsqueeze input Itensor to 4 dims
nvinfer1::ITensor *scale_in_tensor = tensorrt_in_tensors_[0].trt_tensor_;
Format out_format = in_tensors_[0].format();
if (in_tensors_[0].Shape().size() < INPUT_SIZE4) {
// unsqueeze input Itensor to 4 dims
scale_in_tensor = AddUnsqueezeOp(network);
if (scale_in_tensor == nullptr) {
MS_LOG(ERROR) << "AddUnsqueezeOp failed";
return RET_ERROR;
}
} else if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == 4 &&
tensorrt_in_tensors_[0].format_ == Format::NCHW) {
// transpose: NCHW->NHWC
nvinfer1::IShuffleLayer *transpose_layer_in = NCHW2NHWC(network, *tensorrt_in_tensors_[0].trt_tensor_);
if (transpose_layer_in == nullptr) {
MS_LOG(ERROR) << "op action convert failed";
return RET_ERROR;
}
transpose_layer_in->setName((op_name_ + "_transpose2NHWC").c_str());
scale_in_tensor = transpose_layer_in->getOutput(0);
out_format = Format::NHWC;
}
// mode of scale
size_t axis = scale_op->axis();
@ -100,18 +113,27 @@ int ScaleTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
return RET_ERROR;
}
cal_layer->setName(op_name_.c_str());
nvinfer1::ITensor *op_out_tensor = cal_layer->getOutput(0);
if (op_out_tensor == nullptr) {
MS_LOG(ERROR) << "addScaleNd output tensor is invalid for: " << op_name_;
return RET_ERROR;
}
// add activation
nvinfer1::ITensor *activation_tensor = cal_layer->getOutput(0);
if (activation_type != schema::ActivationType::ActivationType_NO_ACTIVATION) {
MS_LOG(WARNING) << "need activation for: " << op_name_;
auto activation_layer = ActivationTensorRT::AddActivation(network, activation_type, 0, cal_layer->getOutput(0));
if (activation_layer == nullptr) {
MS_LOG(ERROR) << "addActivation for scale failed";
return RET_ERROR;
}
activation_layer->setName((op_name_ + "_activation").c_str());
activation_tensor = activation_layer->getOutput(0);
}
// squeeze to origin dim
nvinfer1::ITensor *op_out_tensor = activation_tensor;
if (activation_tensor->getDimensions().nbDims > static_cast<int>(out_tensors_[0].Shape().size())) {
op_out_tensor = AddSqueezeOp(activation_tensor, network);
}
op_out_tensor->setName(out_tensors_[0].Name().c_str());
this->AddInnerOutTensors(op_out_tensor);
this->AddInnerOutTensors(ITensorHelper{op_out_tensor, out_format});
return RET_OK;
}
@ -136,7 +158,7 @@ nvinfer1::ScaleMode ScaleTensorRT::GetScaleMode(size_t axis) {
}
nvinfer1::ITensor *ScaleTensorRT::AddUnsqueezeOp(nvinfer1::INetworkDefinition *network) {
nvinfer1::IShuffleLayer *unsqueeze_layer = network->addShuffle(*this->tensorrt_in_tensors_[0]);
nvinfer1::IShuffleLayer *unsqueeze_layer = network->addShuffle(*this->tensorrt_in_tensors_[0].trt_tensor_);
if (unsqueeze_layer == nullptr) {
MS_LOG(ERROR) << "addShuffle failed for: " << op_name_;
return nullptr;
@ -150,4 +172,17 @@ nvinfer1::ITensor *ScaleTensorRT::AddUnsqueezeOp(nvinfer1::INetworkDefinition *n
unsqueeze_layer->setReshapeDimensions(unsqueeze_dims);
return unsqueeze_layer->getOutput(0);
}
nvinfer1::ITensor *ScaleTensorRT::AddSqueezeOp(nvinfer1::ITensor *in_tensor, nvinfer1::INetworkDefinition *network) {
nvinfer1::IShuffleLayer *squeeze_layer = network->addShuffle(*in_tensor);
if (squeeze_layer == nullptr) {
MS_LOG(ERROR) << "addShuffle failed for: " << op_name_;
return nullptr;
}
squeeze_layer->setName((op_name_ + "_squeeze").c_str());
nvinfer1::Dims squeeze_dims = lite::ConvertCudaDims(out_tensors_[0].Shape());
MS_LOG(INFO) << "squeeze_dims cnt for scale: " << squeeze_dims.nbDims;
squeeze_layer->setReshapeDimensions(squeeze_dims);
return squeeze_layer->getOutput(0);
}
} // namespace mindspore::lite

View File

@ -38,6 +38,8 @@ class ScaleTensorRT : public TensorRTOp {
private:
nvinfer1::ITensor *AddUnsqueezeOp(nvinfer1::INetworkDefinition *network);
nvinfer1::ITensor *AddSqueezeOp(nvinfer1::ITensor *in_tensor, nvinfer1::INetworkDefinition *network);
nvinfer1::ScaleMode GetScaleMode(size_t axis);
};
} // namespace mindspore::lite

View File

@ -38,7 +38,7 @@ int ShapeTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
MS_LOG(ERROR) << "network is invalid";
return RET_ERROR;
}
nvinfer1::IShapeLayer *shape_layer = network->addShape(*tensorrt_in_tensors_[0]);
nvinfer1::IShapeLayer *shape_layer = network->addShape(*tensorrt_in_tensors_[0].trt_tensor_);
if (shape_layer == nullptr) {
MS_LOG(DEBUG) << "add shape op failed for TensorRT.";
@ -46,7 +46,7 @@ int ShapeTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
}
shape_layer->setName(op_name_.c_str());
shape_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
this->AddInnerOutTensors(shape_layer->getOutput(0));
this->AddInnerOutTensors(ITensorHelper{shape_layer->getOutput(0), tensorrt_in_tensors_[0].format_});
return RET_OK;
}
} // namespace mindspore::lite

View File

@ -72,58 +72,56 @@ int ShuffleTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
MS_LOG(ERROR) << "network is invalid";
return RET_ERROR;
}
nvinfer1::IShuffleLayer *shuffle_layer = network->addShuffle(*tensorrt_in_tensors_[0]);
nvinfer1::ITensor *shuffler_input = tensorrt_in_tensors_[0].trt_tensor_;
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == 4 &&
tensorrt_in_tensors_[0].format_ == Format::NCHW && !tensorrt_in_tensors_[0].trt_tensor_->isNetworkInput()) {
// network input tensor format can be NCHW
nvinfer1::IShuffleLayer *transpose_layer = NCHW2NHWC(network, *tensorrt_in_tensors_[0].trt_tensor_);
if (transpose_layer == nullptr) {
MS_LOG(ERROR) << "create transpose layer failed for " << op_name_;
}
transpose_layer->setName((op_name_ + "_transpose_in").c_str());
shuffler_input = transpose_layer->getOutput(0);
}
nvinfer1::IShuffleLayer *shuffle_layer = network->addShuffle(*shuffler_input);
if (shuffle_layer == nullptr) {
MS_LOG(ERROR) << "add Shuffle op failed for TensorRT.";
return RET_ERROR;
}
shuffle_layer->setName(op_name_.c_str());
int ret = RET_OK;
switch (type_) {
case schema::PrimitiveType_Unsqueeze: {
int ret = AddUnsqueezeOp(shuffle_layer);
if (ret != RET_OK) {
MS_LOG(ERROR) << "AddUnSqueezeOp failed.";
return ret;
}
ret = AddUnsqueezeOp(shuffle_layer);
break;
}
case schema::PrimitiveType_Squeeze: {
int ret = AddSqueezeOp(shuffle_layer);
if (ret != RET_OK) {
MS_LOG(ERROR) << "AddSqueezeOp failed.";
return ret;
}
ret = AddSqueezeOp(shuffle_layer);
break;
}
case schema::PrimitiveType_Transpose: {
int ret = AddTransposeOp(shuffle_layer);
if (ret != RET_OK) {
MS_LOG(ERROR) << "AddTransposeOpss failed.";
return ret;
}
ret = AddTransposeOp(shuffle_layer);
break;
}
case schema::PrimitiveType_Reshape: {
int ret = AddReshapeOp(shuffle_layer);
if (ret != RET_OK) {
MS_LOG(ERROR) << "AddReshapeOp failed.";
return ret;
}
ret = AddReshapeOp(shuffle_layer);
break;
}
case schema::PrimitiveType_Flatten: {
int ret = AddFlattenOp(shuffle_layer);
if (ret != RET_OK) {
MS_LOG(ERROR) << "AddFlattenOp failed.";
return ret;
}
ret = AddFlattenOp(shuffle_layer);
break;
}
default:
MS_LOG(ERROR) << "Unsupported op type.";
MS_LOG(ERROR) << "Unsupported op type for " << op_name_;
return RET_ERROR;
}
if (ret != RET_OK) {
MS_LOG(ERROR) << "AddOp failed for " << op_name_;
return ret;
}
nvinfer1::ITensor *out_tensor = shuffle_layer->getOutput(0);
if (out_tensor == nullptr) {
@ -131,7 +129,7 @@ int ShuffleTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
return RET_ERROR;
}
out_tensor->setName(out_tensors_[0].Name().c_str());
this->AddInnerOutTensors(out_tensor);
this->AddInnerOutTensors(ITensorHelper{out_tensor, Format::NHWC});
return RET_OK;
}
@ -177,7 +175,7 @@ int ShuffleTensorRT::AddUnsqueezeOp(nvinfer1::IShuffleLayer *shuffle_layer) {
MS_LOG(WARNING) << "AddUnsqueezeOp size of in tensort needs check: " << in_tensors_.size();
}
// axis
auto unsqueeze_shape = tensorrt_in_tensors_[0]->getDimensions();
auto unsqueeze_shape = tensorrt_in_tensors_[0].trt_tensor_->getDimensions();
std::vector<int64_t> new_shape(unsqueeze_shape.d, unsqueeze_shape.d + unsqueeze_shape.nbDims);
auto axis = unsqueeze_op->axis();
@ -229,7 +227,7 @@ int ShuffleTensorRT::AddReshapeOp(nvinfer1::IShuffleLayer *shuffle_layer) {
MS_LOG(ERROR) << "invalid shape tensor for reshape " << op_name_;
return RET_ERROR;
}
shuffle_layer->setInput(1, *tensorrt_in_tensors_[1]);
shuffle_layer->setInput(1, *tensorrt_in_tensors_[1].trt_tensor_);
}
return RET_OK;
}

View File

@ -53,7 +53,22 @@ int SliceTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
nvinfer1::Dims size_dims = lite::ConvertCudaDims(out_tensors_[0].Shape());
nvinfer1::Dims stride_dims = lite::ConvertCudaDims(stride.Data().get(), stride.ElementNum());
nvinfer1::ISliceLayer *slice_layer = network->addSlice(*tensorrt_in_tensors_[0], start_dims, size_dims, stride_dims);
nvinfer1::ITensor *slice_input = tensorrt_in_tensors_[0].trt_tensor_;
Format out_format = tensorrt_in_tensors_[0].format_;
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == 4 &&
tensorrt_in_tensors_[0].format_ == Format::NCHW) {
// transpose: NCHW->NHWC
nvinfer1::IShuffleLayer *transpose_layer_in = NCHW2NHWC(network, *tensorrt_in_tensors_[0].trt_tensor_);
if (transpose_layer_in == nullptr) {
MS_LOG(ERROR) << "op action convert failed";
return RET_ERROR;
}
transpose_layer_in->setName((op_name_ + "_transpose2NHWC").c_str());
slice_input = transpose_layer_in->getOutput(0);
out_format = Format::NHWC;
}
nvinfer1::ISliceLayer *slice_layer = network->addSlice(*slice_input, start_dims, size_dims, stride_dims);
if (slice_layer == nullptr) {
MS_LOG(ERROR) << "add Slice op failed for TensorRT: " << op_name_;
return RET_ERROR;
@ -65,7 +80,7 @@ int SliceTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
return RET_ERROR;
}
out_tensor->setName(out_tensors_[0].Name().c_str());
this->AddInnerOutTensors(out_tensor);
this->AddInnerOutTensors(ITensorHelper{out_tensor, out_format});
return RET_OK;
}
} // namespace mindspore::lite

View File

@ -23,19 +23,10 @@ int SoftMaxTensorRT::IsSupport(const schema::Primitive *primitive, const std::ve
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (primitive->value_type() == schema::PrimitiveType::PrimitiveType_LogSoftmax) {
with_log_ = true;
auto softmax_op = primitive->value_as_LogSoftmax();
if (softmax_op == nullptr) {
MS_LOG(ERROR) << "LogSoftmax convert failed";
return RET_ERROR;
}
} else {
auto softmax_op = primitive->value_as_Softmax();
if (softmax_op == nullptr) {
MS_LOG(ERROR) << "convert failed";
return RET_ERROR;
}
softmax_op_ = primitive->value_as_Softmax();
if (softmax_op_ == nullptr) {
MS_LOG(ERROR) << "convert failed";
return RET_ERROR;
}
if (in_tensors.size() != 1) {
@ -48,7 +39,6 @@ int SoftMaxTensorRT::IsSupport(const schema::Primitive *primitive, const std::ve
}
return RET_OK;
}
int SoftMaxTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
if (network == nullptr) {
MS_LOG(ERROR) << "network is invalid";
@ -66,58 +56,36 @@ int SoftMaxTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
MS_LOG(ERROR) << "softmax output tensor create failed for TensorRT.";
return RET_ERROR;
}
if (with_log_) {
nvinfer1::IUnaryLayer *log_layer = network->addUnary(*out_tensor, nvinfer1::UnaryOperation::kLOG);
if (log_layer == nullptr) {
MS_LOG(ERROR) << "add log op failed for TensorRT.";
return RET_ERROR;
}
log_layer->setName((op_name_ + "_log").c_str());
out_tensor = log_layer->getOutput(0);
if (out_tensor == nullptr) {
MS_LOG(ERROR) << "softmax log output tensor create failed for TensorRT.";
return RET_ERROR;
}
}
out_tensor->setName(out_tensors_[0].Name().c_str());
this->AddInnerOutTensors(out_tensor);
this->AddInnerOutTensors(ITensorHelper{out_tensor, tensorrt_in_tensors_[0].format_});
return RET_OK;
}
nvinfer1::ISoftMaxLayer *SoftMaxTensorRT::AddSoftMaxOp(nvinfer1::INetworkDefinition *network) {
nvinfer1::ISoftMaxLayer *current_layer_ = network->addSoftMax(*this->GetInnerInTensors()[0]);
nvinfer1::ISoftMaxLayer *current_layer_ = network->addSoftMax(*tensorrt_in_tensors_[0].trt_tensor_);
if (current_layer_ == nullptr) {
MS_LOG(ERROR) << "add softmax op failed for TensorRT.";
return nullptr;
}
std::vector<int64_t> axis_val;
if (with_log_) {
auto softmax_op = this->GetPrimitive()->value_as_LogSoftmax();
if (softmax_op == nullptr) {
MS_LOG(ERROR) << "LogSoftmax convert failed";
return nullptr;
}
int64_t axis = softmax_op->axis();
axis_val.push_back(axis);
} else {
auto softmax_op = this->GetPrimitive()->value_as_Softmax();
if (softmax_op == nullptr) {
MS_LOG(ERROR) << "Softmax convert failed";
return nullptr;
}
auto axis = softmax_op->axis();
axis_val = std::vector<int64_t>(axis->begin(), axis->end());
}
auto axis = softmax_op_->axis();
axis_val = std::vector<int64_t>(axis->begin(), axis->end());
if (axis_val.size() != 1) {
MS_LOG(WARNING) << "axis needs check";
}
if (axis_val[0] >= this->tensorrt_in_tensors_[0]->getDimensions().nbDims) {
if (axis_val[0] >= this->tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims) {
MS_LOG(ERROR) << "axis is larger than input tensor dims.";
return nullptr;
}
current_layer_->setAxes(axis_val[0]);
int64_t axis_format_value = axis_val[0];
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == 4 &&
tensorrt_in_tensors_[0].format_ == Format::NCHW) {
// transpose axis to NCHW
axis_format_value = ConvertAxisFromNHWC2NCHW(axis_val[0]);
}
current_layer_->setAxes(axis_format_value);
return current_layer_;
}
} // namespace mindspore::lite

View File

@ -34,8 +34,9 @@ class SoftMaxTensorRT : public TensorRTOp {
const std::vector<mindspore::MSTensor> &out_tensors) override;
private:
bool with_log_ = false;
nvinfer1::ISoftMaxLayer *AddSoftMaxOp(nvinfer1::INetworkDefinition *network);
const schema::Softmax *softmax_op_;
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_SOFTMAX_TENSORRT_H_

View File

@ -19,13 +19,13 @@
namespace mindspore::lite {
const schema::Primitive *TensorRTOp::GetPrimitive() { return this->op_primitive_; }
void TensorRTOp::AddInnerInTensors(nvinfer1::ITensor *tensor) { this->tensorrt_in_tensors_.push_back(tensor); }
void TensorRTOp::AddInnerInTensors(ITensorHelper tensor) { this->tensorrt_in_tensors_.push_back(tensor); }
void TensorRTOp::AddInnerOutTensors(nvinfer1::ITensor *tensor) { this->tensorrt_out_tensors_.push_back(tensor); }
void TensorRTOp::AddInnerOutTensors(ITensorHelper tensor) { this->tensorrt_out_tensors_.push_back(tensor); }
std::vector<nvinfer1::ITensor *> &TensorRTOp::GetInnerOutTensor() { return this->tensorrt_out_tensors_; }
std::vector<ITensorHelper> &TensorRTOp::GetInnerOutTensor() { return this->tensorrt_out_tensors_; }
std::vector<nvinfer1::ITensor *> &TensorRTOp::GetInnerInTensors() { return this->tensorrt_in_tensors_; }
std::vector<ITensorHelper> &TensorRTOp::GetInnerInTensors() { return this->tensorrt_in_tensors_; }
std::string TensorRTOp::GetOpName() { return this->op_name_; }

View File

@ -23,12 +23,18 @@
#include "include/api/kernel.h"
#include "src/common/log_adapter.h"
#include "include/errorcode.h"
#include "src/delegate/tensorrt/tensorrt_utils.h"
namespace mindspore::lite {
constexpr int INPUT_SIZE2 = 2;
constexpr int INPUT_SIZE3 = 3;
constexpr int INPUT_SIZE4 = 4;
struct ITensorHelper {
nvinfer1::ITensor *trt_tensor_{nullptr};
mindspore::Format format_;
};
class TensorRTOp {
public:
explicit TensorRTOp(const schema::Primitive *primitive, std::vector<mindspore::MSTensor> in_tensors,
@ -51,13 +57,13 @@ class TensorRTOp {
const schema::Primitive *GetPrimitive();
void AddInnerInTensors(nvinfer1::ITensor *tensor);
void AddInnerInTensors(ITensorHelper tensor);
void AddInnerOutTensors(nvinfer1::ITensor *tensor);
void AddInnerOutTensors(ITensorHelper tensor);
std::vector<nvinfer1::ITensor *> &GetInnerOutTensor();
std::vector<ITensorHelper> &GetInnerOutTensor();
std::vector<nvinfer1::ITensor *> &GetInnerInTensors();
std::vector<ITensorHelper> &GetInnerInTensors();
std::string GetOpName();
@ -86,9 +92,9 @@ class TensorRTOp {
std::vector<mindspore::MSTensor> out_tensors_;
std::vector<nvinfer1::ITensor *> tensorrt_in_tensors_;
std::vector<ITensorHelper> tensorrt_in_tensors_;
std::vector<nvinfer1::ITensor *> tensorrt_out_tensors_;
std::vector<ITensorHelper> tensorrt_out_tensors_;
std::vector<TensorRTOp *> in_ops_;

View File

@ -44,7 +44,7 @@ int UnaryTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
MS_LOG(ERROR) << "network or input tensor is invalid";
return RET_ERROR;
}
nvinfer1::IUnaryLayer *cal_layer = network->addUnary(*tensorrt_in_tensors_[0], unary_op_);
nvinfer1::IUnaryLayer *cal_layer = network->addUnary(*tensorrt_in_tensors_[0].trt_tensor_, unary_op_);
if (cal_layer == nullptr) {
MS_LOG(ERROR) << "addUnary failed for: " << op_name_;
return RET_ERROR;
@ -53,7 +53,7 @@ int UnaryTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
nvinfer1::ITensor *op_out_tensor = cal_layer->getOutput(0);
op_out_tensor->setName(out_tensors_[0].Name().c_str());
this->AddInnerOutTensors(op_out_tensor);
this->AddInnerOutTensors(ITensorHelper{op_out_tensor, tensorrt_in_tensors_[0].format_});
return RET_OK;
}
} // namespace mindspore::lite

View File

@ -187,6 +187,7 @@ nvinfer1::ITensor *TensorRTSubGraph::SetTensorRTNetworkInput(const mindspore::MS
// only support NHWC HW dim resize
if (input_hw_index_ != -1) {
MS_LOG(INFO) << "input tensor format: " << in_tensor.format();
input_hw_index_ = in_tensor.format() == Format::NHWC ? 1 : /* NCHW*/ 2;
input_dims.d[input_hw_index_] = -1;
input_dims.d[input_hw_index_ + 1] = -1;
@ -208,19 +209,20 @@ int TensorRTSubGraph::BuildTensorRTGraph() {
MS_LOG(ERROR) << "SetTensorRTNetworkInput failed for " << in_tensor.Name();
return RET_ERROR;
}
cur_op->AddInnerInTensors(trt_tensor);
cur_op->AddInnerInTensors(ITensorHelper{trt_tensor, in_tensor.format()});
continue;
}
auto trt_tensor = FindTensorRTInputs(cur_op, in_tensor);
// weight tensor
if (trt_tensor == nullptr) {
ITensorHelper trt_tensor = FindTensorRTInputs(cur_op, in_tensor);
if (trt_tensor.trt_tensor_ == nullptr) {
// weight tensor
if (trt_specific_weight_nodes_.find(cur_op->type()) == trt_specific_weight_nodes_.end()) {
if (in_tensor == nullptr) {
MS_LOG(ERROR) << "Weight Tensor is nullptr.";
return RET_ERROR;
}
trt_tensor = lite::ConvertConstantTensor(this->network_, in_tensor);
trt_tensor.trt_tensor_ = lite::ConvertConstantTensor(this->network_, in_tensor);
trt_tensor.format_ = Format::NHWC;
MS_LOG(INFO) << "auto convert constant tensor for: " << cur_op->GetOpName();
cur_op->AddInnerInTensors(trt_tensor);
}
@ -236,16 +238,44 @@ int TensorRTSubGraph::BuildTensorRTGraph() {
}
}
ret = MarkOutputs();
if (ret != RET_OK) {
MS_LOG(ERROR) << "MarkOutputs failed in TensorRT network";
return ret;
}
ret = BuildEngine();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Create engine failed in TensorRT network";
return ret;
}
return RET_OK;
}
int TensorRTSubGraph::MarkOutputs() {
// Mark NetWork Output Tensor.
for (auto out_tensor : outputs_) {
for (auto out_op : this->out_ops_) {
for (size_t index = 0; index < out_op->outputs().size(); index++) {
if (out_op->outputs()[index] == out_tensor) {
out_op->GetInnerOutTensor()[index]->setName(out_tensor.Name().c_str());
nvinfer1::ITensor *out_trt_tensor = out_op->GetInnerOutTensor()[index].trt_tensor_;
if (out_op->GetInnerOutTensor()[index].trt_tensor_->getDimensions().nbDims == 4 &&
out_op->GetInnerOutTensor()[index].format_ == Format::NCHW) {
// transpose subgraph output from nchw to nhwc
nvinfer1::IShuffleLayer *transpose_layer_out =
NCHW2NHWC(network_, *out_op->GetInnerOutTensor()[index].trt_tensor_);
if (transpose_layer_out == nullptr) {
MS_LOG(ERROR) << "op action convert failed";
return RET_ERROR;
}
transpose_layer_out->setName((out_tensor.Name() + "_transpose2NHWC").c_str());
}
out_trt_tensor->setName(out_tensor.Name().c_str());
MS_LOG(INFO) << "markOutput for: " << out_tensor.Name();
this->network_->markOutput(*out_op->GetInnerOutTensor()[index]);
for (int n = 0; n < out_op->GetInnerOutTensor()[index]->getDimensions().nbDims; n++) {
if (out_op->GetInnerOutTensor()[index]->getDimensions().d[n] == -1) {
this->network_->markOutput(*out_trt_tensor);
for (int n = 0; n < out_trt_tensor->getDimensions().nbDims; n++) {
if (out_trt_tensor->getDimensions().d[n] == -1) {
output_batchsize_index_ = n;
break;
}
@ -254,12 +284,6 @@ int TensorRTSubGraph::BuildTensorRTGraph() {
}
}
}
ret = BuildEngine();
if (ret != RET_OK) {
MS_LOG(ERROR) << "Create engine failed in TensorRT network";
return ret;
}
return RET_OK;
}
@ -292,7 +316,7 @@ int TensorRTSubGraph::Prepare() {
trt_in_tensor_name_.push_back(tensor.Name());
nvinfer1::Dims input_dims = ConvertCudaDims(tensor.Shape());
for (int od = 0; od < input_dims.nbDims; od++) {
MS_LOG(INFO) << "in tensor " << tensor.Name() << " dims at " << od << " is " << input_dims.d[od];
MS_LOG(DEBUG) << "in tensor " << tensor.Name() << " dims at " << od << " is " << input_dims.d[od];
}
if (!this->trt_context_->setBindingDimensions(index, input_dims)) {
@ -363,7 +387,7 @@ int TensorRTSubGraph::ReSize() {
// Set actual input size
nvinfer1::Dims input_dims = ConvertCudaDims(inputs_[i].Shape());
for (int od = 0; od < input_dims.nbDims; od++) {
MS_LOG(INFO) << "in tensor " << trt_in_tensor_name_[i] << " dims at " << od << " is " << input_dims.d[od];
MS_LOG(DEBUG) << "in tensor " << trt_in_tensor_name_[i] << " dims at " << od << " is " << input_dims.d[od];
}
if (!this->trt_context_->setBindingDimensions(index, input_dims)) {
@ -420,7 +444,7 @@ int TensorRTSubGraph::Execute() {
new_shape[output_batchsize_index_] = runtime_->GetBatchSize();
}
for (int od = 0; od < out_dims.nbDims; od++) {
MS_LOG(INFO) << "out tensor " << trt_out_tensor_name_[i] << " dims at " << od << " is " << new_shape[od];
MS_LOG(DEBUG) << "out tensor " << trt_out_tensor_name_[i] << " dims at " << od << " is " << new_shape[od];
}
outputs_[i].SetShape(new_shape);
@ -438,7 +462,7 @@ int TensorRTSubGraph::Execute() {
return RET_OK;
}
nvinfer1::ITensor *TensorRTSubGraph::FindTensorRTInputs(TensorRTOp *cur_op, const mindspore::MSTensor &in_tensor) {
ITensorHelper TensorRTSubGraph::FindTensorRTInputs(TensorRTOp *cur_op, const mindspore::MSTensor &in_tensor) {
for (auto input_op : cur_op->in_ops()) {
for (size_t i = 0; i < input_op->outputs().size(); i++) {
auto out_tensor = input_op->outputs().at(i);
@ -447,6 +471,6 @@ nvinfer1::ITensor *TensorRTSubGraph::FindTensorRTInputs(TensorRTOp *cur_op, cons
}
}
}
return nullptr;
return ITensorHelper{};
}
} // namespace mindspore::lite

View File

@ -67,7 +67,9 @@ class TensorRTSubGraph : public kernel::Kernel {
nvinfer1::ITensor *SetTensorRTNetworkInput(const mindspore::MSTensor &in_tensor);
static nvinfer1::ITensor *FindTensorRTInputs(TensorRTOp *cur_op, const mindspore::MSTensor &in_tensor);
ITensorHelper FindTensorRTInputs(TensorRTOp *cur_op, const mindspore::MSTensor &in_tensor);
int MarkOutputs();
std::vector<TensorRTOp *> all_ops_{};
// subgraph input nodes.

View File

@ -254,4 +254,37 @@ void SetCudaDevice(std::shared_ptr<GPUDeviceInfo> device_info_) {
}
MS_LOG(INFO) << "cuda is running on device: " << device;
}
Format GetOutputFormat(Format input_format, nvinfer1::Permutation perm) {
if (input_format == Format::NHWC) {
if (perm.order[0] == 0 && perm.order[1] == 3 && perm.order[2] == 2 && perm.order[3] == 1) {
return Format::NCHW;
}
} else if (input_format == Format::NCHW) {
if (perm.order[0] == 0 && perm.order[1] == 2 && perm.order[2] == 3 && perm.order[3] == 1) {
return Format::NHWC;
}
}
MS_LOG(WARNING) << "transpose out format needs to check for " << input_format;
return input_format;
}
int ConvertAxisFromNHWC2NCHW(int nhwc_axis) {
// N0H1W2C3->N0C1H2W3
if (nhwc_axis > kNHWC_C) {
return nhwc_axis;
}
switch (nhwc_axis) {
case kNHWC_N:
return kNCHW_N;
case kNHWC_H:
return kNCHW_H;
case kNHWC_W:
return kNCHW_W;
case kNHWC_C:
return kNCHW_C;
default:
MS_LOG(ERROR) << "invalid input axis for nhwc: " << nhwc_axis;
}
return nhwc_axis;
}
} // namespace mindspore::lite

View File

@ -23,6 +23,10 @@
#include "schema/ops_generated.h"
#include "nnacl/pack.h"
#define kNCHW_N 0
#define kNCHW_C 1
#define kNCHW_H 2
#define kNCHW_W 3
namespace mindspore::lite {
struct ActivationParams {
nvinfer1::ActivationType activation_type;
@ -61,5 +65,9 @@ nvinfer1::Weights TransposeWeight(const mindspore::MSTensor &ms_tensor, float **
nvinfer1::Weights ConvertWeight(const mindspore::MSTensor &ms_tensor);
void SetCudaDevice(std::shared_ptr<GPUDeviceInfo> device_info_);
Format GetOutputFormat(Format input_format, nvinfer1::Permutation perm);
int ConvertAxisFromNHWC2NCHW(int nhwc_axis);
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_UTILS_H_