diff --git a/mindspore/lite/src/delegate/tensorrt/op/activation_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/activation_tensorrt.cc index a26c09c2e4a..52c3f386fcf 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/activation_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/activation_tensorrt.cc @@ -15,7 +15,6 @@ */ #include "src/delegate/tensorrt/op/activation_tensorrt.h" -#include "src/delegate/tensorrt/tensorrt_utils.h" namespace mindspore::lite { int ActivationTensorRT::IsSupport(const schema::Primitive *primitive, @@ -58,8 +57,8 @@ int ActivationTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { } float alpha = activation_op->alpha(); - nvinfer1::IActivationLayer *activation_layer = - ActivationTensorRT::AddActivation(network, activation_op->activation_type(), alpha, tensorrt_in_tensors_[0]); + nvinfer1::IActivationLayer *activation_layer = ActivationTensorRT::AddActivation( + network, activation_op->activation_type(), alpha, tensorrt_in_tensors_[0].trt_tensor_); if (activation_layer == nullptr) { MS_LOG(ERROR) << "add activation op failed for TensorRT."; return RET_ERROR; @@ -67,7 +66,7 @@ int ActivationTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { activation_layer->setName(op_name_.c_str()); activation_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str()); - this->AddInnerOutTensors(activation_layer->getOutput(0)); + this->AddInnerOutTensors(ITensorHelper{activation_layer->getOutput(0), tensorrt_in_tensors_[0].format_}); return RET_OK; } diff --git a/mindspore/lite/src/delegate/tensorrt/op/concate_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/concate_tensorrt.cc index b156b125dd4..71787f1e4f6 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/concate_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/concate_tensorrt.cc @@ -24,7 +24,7 @@ int ConcateTensorRT::IsSupport(const schema::Primitive *primitive, const std::ve MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_; return RET_ERROR; } - if (in_tensors.size() < 1) { + if (in_tensors.size() != INPUT_SIZE2) { MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size(); return RET_ERROR; } @@ -50,11 +50,46 @@ int ConcateTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { return RET_ERROR; } + nvinfer1::ITensor *trt_input_tensors[tensorrt_in_tensors_.size()]; + if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims != + tensorrt_in_tensors_[1].trt_tensor_->getDimensions().nbDims) { + MS_LOG(ERROR) << "dims of inputs is invalid for " << op_name_; + return RET_ERROR; + } + + // make sure two inputs have same format + Format out_format = tensorrt_in_tensors_[0].format_; + if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D) { + if (tensorrt_in_tensors_[0].format_ == tensorrt_in_tensors_[1].format_) { + for (size_t i = 0; i < tensorrt_in_tensors_.size(); i++) { + trt_input_tensors[i] = tensorrt_in_tensors_[i].trt_tensor_; + } + } else { + // when inputs format are different, change to NHWC + out_format = Format::NHWC; + int transpose_tensor_index = tensorrt_in_tensors_[0].format_ == Format::NCHW ? 0 : 1; + trt_input_tensors[1 - transpose_tensor_index] = tensorrt_in_tensors_[1 - transpose_tensor_index].trt_tensor_; + nvinfer1::IShuffleLayer *transpose_layer = + NCHW2NHWC(network, *tensorrt_in_tensors_[transpose_tensor_index].trt_tensor_); + if (transpose_layer == nullptr) { + MS_LOG(ERROR) << "op action convert failed"; + return RET_ERROR; + } + trt_input_tensors[transpose_tensor_index] = transpose_layer->getOutput(0); + } + } else { + for (size_t i = 0; i < tensorrt_in_tensors_.size(); i++) { + trt_input_tensors[i] = tensorrt_in_tensors_[i].trt_tensor_; + } + } + int axis = RET_INVALID_OP_ATTR; axis = concate_op->axis(); - - nvinfer1::ITensor *trt_input_tensors[tensorrt_in_tensors_.size()]; - std::copy(tensorrt_in_tensors_.begin(), tensorrt_in_tensors_.end(), trt_input_tensors); + if (out_format == Format::NCHW) { + // when inputs all NCHW, change axis + axis = ConvertAxisFromNHWC2NCHW(axis); + MS_LOG(INFO) << "concate axis change to " << axis << " when using NCHW format."; + } nvinfer1::IConcatenationLayer *concate_layer = network->addConcatenation(trt_input_tensors, static_cast(tensorrt_in_tensors_.size())); @@ -68,8 +103,7 @@ int ConcateTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { } concate_layer->setName(op_name_.c_str()); concate_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str()); - this->AddInnerOutTensors(concate_layer->getOutput(0)); - + this->AddInnerOutTensors(ITensorHelper{concate_layer->getOutput(0), out_format}); return RET_OK; } } // namespace mindspore::lite diff --git a/mindspore/lite/src/delegate/tensorrt/op/convolution_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/convolution_tensorrt.cc index 649158a5365..c2f92efb052 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/convolution_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/convolution_tensorrt.cc @@ -16,7 +16,6 @@ #include "src/delegate/tensorrt/op/convolution_tensorrt.h" #include "src/delegate/tensorrt/op/activation_tensorrt.h" -#include "src/delegate/tensorrt/tensorrt_utils.h" namespace mindspore::lite { constexpr int BIAS_INDEX = 2; @@ -28,7 +27,7 @@ int ConvolutionTensorRT::IsSupport(const schema::Primitive *primitive, MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_; return RET_ERROR; } - if (in_tensors.size() != 2 && in_tensors.size() != 3) { + if (in_tensors.size() != INPUT_SIZE2 && in_tensors.size() != INPUT_SIZE3) { MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size(); return RET_ERROR; } @@ -36,6 +35,10 @@ int ConvolutionTensorRT::IsSupport(const schema::Primitive *primitive, MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size(); return RET_ERROR; } + if (in_tensors[0].format() != Format::NHWC && in_tensors[0].format() != Format::NCHW) { + MS_LOG(ERROR) << "Unsupported input tensor format of " << in_tensors[0].format(); + return RET_ERROR; + } return RET_OK; } @@ -49,13 +52,19 @@ int ConvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { MS_LOG(ERROR) << "op action convert failed"; return RET_ERROR; } - // transpose: NHWC->NCHW - nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(network, *tensorrt_in_tensors_[0]); - if (transpose_layer_in == nullptr) { - MS_LOG(ERROR) << "transpose: NHWC->NCHW failed"; - return RET_ERROR; + + nvinfer1::ITensor *conv_input = tensorrt_in_tensors_[0].trt_tensor_; + if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D && + tensorrt_in_tensors_[0].format_ == Format::NHWC) { + // transpose: NHWC->NCHW + nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(network, *tensorrt_in_tensors_[0].trt_tensor_); + if (transpose_layer_in == nullptr) { + MS_LOG(ERROR) << "transpose: NHWC->NCHW failed"; + return RET_ERROR; + } + transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str()); + conv_input = transpose_layer_in->getOutput(0); } - transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str()); // transpose weight const mindspore::MSTensor &weight_tensor = in_tensors_[1]; @@ -86,7 +95,7 @@ int ConvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { } nvinfer1::IConvolutionLayer *conv_layer = - network->addConvolutionNd(*transpose_layer_in->getOutput(0), nbOutputMaps, kernelSize, kernelWeights, biasWeights); + network->addConvolutionNd(*conv_input, nbOutputMaps, kernelSize, kernelWeights, biasWeights); if (conv_layer == nullptr) { MS_LOG(ERROR) << "ConvolutionLayer failed"; @@ -111,15 +120,8 @@ int ConvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { activation_layer->setName((op_name_ + "_activation").c_str()); } - // transpose: NCHW->NHWC - nvinfer1::IShuffleLayer *transpose_layer_out = NCHW2NHWC(network, *activation_layer->getOutput(0)); - if (transpose_layer_out == nullptr) { - MS_LOG(ERROR) << "op action convert failed"; - return RET_ERROR; - } - transpose_layer_out->setName((op_name_ + "_transpose2NHWC").c_str()); - transpose_layer_out->getOutput(0)->setName(out_tensors_[0].Name().c_str()); - this->AddInnerOutTensors(transpose_layer_out->getOutput(0)); + activation_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str()); + this->AddInnerOutTensors(ITensorHelper{activation_layer->getOutput(0), Format::NCHW}); return RET_OK; } diff --git a/mindspore/lite/src/delegate/tensorrt/op/deconvolution_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/deconvolution_tensorrt.cc index af366c017ea..67690fe6e77 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/deconvolution_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/deconvolution_tensorrt.cc @@ -16,7 +16,6 @@ #include "src/delegate/tensorrt/op/deconvolution_tensorrt.h" #include "src/delegate/tensorrt/op/activation_tensorrt.h" -#include "src/delegate/tensorrt/tensorrt_utils.h" #include "nnacl/pack.h" namespace mindspore::lite { @@ -35,6 +34,10 @@ int DeconvolutionTensorRT::IsSupport(const schema::Primitive *primitive, MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size(); return RET_ERROR; } + if (in_tensors[0].format() != Format::NHWC && in_tensors[0].format() != Format::NCHW) { + MS_LOG(ERROR) << "Unsupported input tensor format of " << in_tensors[0].format(); + return RET_ERROR; + } return RET_OK; } int DeconvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { @@ -47,13 +50,18 @@ int DeconvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { MS_LOG(ERROR) << "op action convert failed"; return RET_ERROR; } - // transpose: NHWC->NCHW - nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(network, *tensorrt_in_tensors_[0]); - if (transpose_layer_in == nullptr) { - MS_LOG(ERROR) << "transpose: NHWC->NCHW failed"; - return RET_ERROR; + nvinfer1::ITensor *deconv_input = tensorrt_in_tensors_[0].trt_tensor_; + if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D && + tensorrt_in_tensors_[0].format_ == Format::NHWC) { + // transpose: NHWC->NCHW + nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(network, *tensorrt_in_tensors_[0].trt_tensor_); + if (transpose_layer_in == nullptr) { + MS_LOG(ERROR) << "transpose: NHWC->NCHW failed"; + return RET_ERROR; + } + transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str()); + deconv_input = transpose_layer_in->getOutput(0); } - transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str()); // transpose weight const mindspore::MSTensor &weight_tensor = in_tensors_[1]; @@ -83,8 +91,8 @@ int DeconvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { biasWeights.values = nullptr; } - nvinfer1::IDeconvolutionLayer *deconv_layer = network->addDeconvolutionNd( - *transpose_layer_in->getOutput(0), nbOutputMaps, kernelSize, kernelWeights, biasWeights); + nvinfer1::IDeconvolutionLayer *deconv_layer = + network->addDeconvolutionNd(*deconv_input, nbOutputMaps, kernelSize, kernelWeights, biasWeights); if (deconv_layer == nullptr) { MS_LOG(ERROR) << "DeconvolutionLayer failed"; @@ -109,15 +117,8 @@ int DeconvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { activation_layer->setName((op_name_ + "_activation").c_str()); } - // transpose: NCHW->NHWC - nvinfer1::IShuffleLayer *transpose_layer_out = NCHW2NHWC(network, *activation_layer->getOutput(0)); - if (transpose_layer_out == nullptr) { - MS_LOG(ERROR) << "op action convert failed"; - return RET_ERROR; - } - transpose_layer_out->setName((op_name_ + "_transpose2NHWC").c_str()); - transpose_layer_out->getOutput(0)->setName(out_tensors_[0].Name().c_str()); - this->AddInnerOutTensors(transpose_layer_out->getOutput(0)); + activation_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str()); + this->AddInnerOutTensors(ITensorHelper{activation_layer->getOutput(0), Format::NCHW}); return RET_OK; } diff --git a/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.cc index 896d2bf3f62..5da75f83a27 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.cc @@ -80,26 +80,38 @@ int ElementWiseTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { MS_LOG(ERROR) << "network or input tensor size is invalid"; return RET_ERROR; } - first_in_tensor_index_ = strcmp(tensorrt_in_tensors_[0]->getName(), in_tensors_[0].Name().c_str()) == 0 ? 0 : 1; - // add elementwise + first_in_tensor_index_ = + strcmp(tensorrt_in_tensors_[0].trt_tensor_->getName(), in_tensors_[0].Name().c_str()) == 0 ? 0 : 1; + if (this->tensorrt_in_tensors_.size() != INPUT_SIZE2) { - // create ITensor from MS constant tensor of index 1 - first_in_tensor_index_ - nvinfer1::ITensor *constant_input = nullptr; - if (this->in_tensors_[1 - first_in_tensor_index_].Shape().size() == 0) { - constant_input = lite::ConvertScalarToITensor(network, this->in_tensors_[first_in_tensor_index_].Shape().size(), - in_tensors_[1 - first_in_tensor_index_].Data().get()); - } else { - constant_input = lite::ConvertConstantTensor(network, in_tensors_[1 - first_in_tensor_index_]); + int ret = AddConstTensor(network); + if (ret != RET_OK) { + MS_LOG(ERROR) << "AddConstTensor failed for " << op_name_; + return ret; } - if (constant_input == nullptr) { - MS_LOG(ERROR) << "create Itensor from constant tensor failed: " << op_name_; - return RET_ERROR; - } - this->AddInnerInTensors(constant_input); } - nvinfer1::IElementWiseLayer *cal_layer = network->addElementWise( - *tensorrt_in_tensors_[first_in_tensor_index_], *tensorrt_in_tensors_[1 - first_in_tensor_index_], element_wise_op_); + if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D && + tensorrt_in_tensors_[0].format_ != tensorrt_in_tensors_[1].format_) { + // when inputs format are different, change to NHWC + int transpose_input_tensor = tensorrt_in_tensors_[0].format_ == Format::NCHW ? 0 : 1; + nvinfer1::IShuffleLayer *transpose_layer = + NCHW2NHWC(network, *tensorrt_in_tensors_[transpose_input_tensor].trt_tensor_); + if (transpose_layer == nullptr) { + MS_LOG(ERROR) << "op action convert failed"; + return RET_ERROR; + } + transpose_layer->setName((op_name_ + "_input_transpose2NHWC").c_str()); + tensorrt_in_tensors_[transpose_input_tensor].trt_tensor_ = transpose_layer->getOutput(0); + tensorrt_in_tensors_[transpose_input_tensor].format_ = Format::NHWC; + } else if (tensorrt_in_tensors_[0].format_ != tensorrt_in_tensors_[1].format_) { + MS_LOG(ERROR) << "elementwise op inputs are in different format: " << op_name_; + return RET_ERROR; + } + + nvinfer1::IElementWiseLayer *cal_layer = + network->addElementWise(*tensorrt_in_tensors_[first_in_tensor_index_].trt_tensor_, + *tensorrt_in_tensors_[1 - first_in_tensor_index_].trt_tensor_, element_wise_op_); if (cal_layer == nullptr) { MS_LOG(ERROR) << "addElementWise failed for TensorRT."; @@ -129,9 +141,8 @@ int ElementWiseTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { MS_LOG(WARNING) << "deal with scale and shift for pow op"; } } - op_out_tensor->setName(out_tensors_[0].Name().c_str()); - this->AddInnerOutTensors(op_out_tensor); + this->AddInnerOutTensors(ITensorHelper{op_out_tensor, tensorrt_in_tensors_[1].format_}); return RET_OK; } @@ -184,4 +195,26 @@ nvinfer1::ITensor *ElementWiseTensorRT::AddActivation(nvinfer1::INetworkDefiniti } return activation_out_tensor; } +int ElementWiseTensorRT::AddConstTensor(nvinfer1::INetworkDefinition *network) { + // create ITensor from MS constant tensor of index 1 - first_in_tensor_index_ + nvinfer1::ITensor *constant_input = nullptr; + if (this->in_tensors_[1 - first_in_tensor_index_].Shape().size() == 0) { + constant_input = lite::ConvertScalarToITensor(network, this->in_tensors_[first_in_tensor_index_].Shape().size(), + in_tensors_[1 - first_in_tensor_index_].Data().get()); + if (constant_input == nullptr) { + MS_LOG(ERROR) << "create Itensor from constant tensor failed: " << op_name_; + return RET_ERROR; + } + this->AddInnerInTensors(ITensorHelper{constant_input, tensorrt_in_tensors_[0].format_}); + } else { + constant_input = lite::ConvertConstantTensor(network, in_tensors_[1 - first_in_tensor_index_]); + if (constant_input == nullptr) { + MS_LOG(ERROR) << "create Itensor from constant tensor failed: " << op_name_; + return RET_ERROR; + } + this->AddInnerInTensors(ITensorHelper{constant_input, Format::NHWC}); + } + return RET_OK; +} + } // namespace mindspore::lite diff --git a/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.h b/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.h index c927ab074dd..e08a03d6ebc 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.h +++ b/mindspore/lite/src/delegate/tensorrt/op/elementwise_tensorrt.h @@ -37,6 +37,8 @@ class ElementWiseTensorRT : public TensorRTOp { private: nvinfer1::ITensor *AddActivation(nvinfer1::INetworkDefinition *network, nvinfer1::ITensor *in_tensor); + int AddConstTensor(nvinfer1::INetworkDefinition *network); + nvinfer1::ElementWiseOperation element_wise_op_; // index of first input MSTensor in the trt input tensor vector diff --git a/mindspore/lite/src/delegate/tensorrt/op/gather_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/gather_tensorrt.cc index 188b7c9b874..d1cee562fa2 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/gather_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/gather_tensorrt.cc @@ -59,15 +59,30 @@ int GatherTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { MS_LOG(ERROR) << "add a new tensor failed for TensorRT GatherTensorRTOp."; return RET_ERROR; } - nvinfer1::IGatherLayer *gather_layer = - network->addGather(*tensorrt_in_tensors_[0], *add_tensor /* indices */, axis_ /* axis */); + + nvinfer1::ITensor *gather_input = tensorrt_in_tensors_[0].trt_tensor_; + Format out_format = tensorrt_in_tensors_[0].format_; + if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D && + tensorrt_in_tensors_[0].format_ == Format::NCHW) { + // transpose: NCHW->NHWC + nvinfer1::IShuffleLayer *transpose_layer_in = NCHW2NHWC(network, *tensorrt_in_tensors_[0].trt_tensor_); + if (transpose_layer_in == nullptr) { + MS_LOG(ERROR) << "op action convert failed"; + return RET_ERROR; + } + transpose_layer_in->setName((op_name_ + "_transpose2NHWC").c_str()); + gather_input = transpose_layer_in->getOutput(0); + out_format = Format::NHWC; + } + + nvinfer1::IGatherLayer *gather_layer = network->addGather(*gather_input, *add_tensor /* indices */, axis_ /* axis */); if (gather_layer == nullptr) { MS_LOG(ERROR) << "addGather failed for TensorRT."; return RET_ERROR; } gather_layer->setName(op_name_.c_str()); gather_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str()); - this->AddInnerOutTensors(gather_layer->getOutput(0)); + this->AddInnerOutTensors(ITensorHelper{gather_layer->getOutput(0), out_format}); return RET_OK; } } // namespace mindspore::lite diff --git a/mindspore/lite/src/delegate/tensorrt/op/matmul_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/matmul_tensorrt.cc index 1ee26217aa4..eddbf670091 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/matmul_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/matmul_tensorrt.cc @@ -43,7 +43,22 @@ int MatMulTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { transpose_b_ = primitive->transpose_b() ? nvinfer1::MatrixOperation::kTRANSPOSE : nvinfer1::MatrixOperation::kNONE; auto weight = ConvertTensorWithExpandDims(network, in_tensors_[1], in_tensors_[0].Shape().size()); - auto matmul_layer = network->addMatrixMultiply(*tensorrt_in_tensors_[0], transpose_a_, *weight, transpose_b_); + nvinfer1::ITensor *matmul_input = tensorrt_in_tensors_[0].trt_tensor_; + Format out_format = tensorrt_in_tensors_[0].format_; + if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D && + tensorrt_in_tensors_[0].format_ == Format::NCHW) { + // transpose: NCHW->NHWC + nvinfer1::IShuffleLayer *transpose_layer_in = NCHW2NHWC(network, *tensorrt_in_tensors_[0].trt_tensor_); + if (transpose_layer_in == nullptr) { + MS_LOG(ERROR) << "op action convert failed"; + return RET_ERROR; + } + transpose_layer_in->setName((op_name_ + "_transpose2NHWC").c_str()); + matmul_input = transpose_layer_in->getOutput(0); + out_format = Format::NHWC; + } + + auto matmul_layer = network->addMatrixMultiply(*matmul_input, transpose_a_, *weight, transpose_b_); matmul_layer->setName(op_name_.c_str()); nvinfer1::ITensor *out_tensor = matmul_layer->getOutput(0); @@ -56,7 +71,7 @@ int MatMulTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { } out_tensor->setName(out_tensors_[0].Name().c_str()); - this->AddInnerOutTensors(out_tensor); + this->AddInnerOutTensors(ITensorHelper{out_tensor, out_format}); return RET_OK; } } // namespace mindspore::lite diff --git a/mindspore/lite/src/delegate/tensorrt/op/pad_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/pad_tensorrt.cc index ad93cd1c1d9..0d742255490 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/pad_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/pad_tensorrt.cc @@ -49,6 +49,10 @@ int PadTensorRT::IsSupport(const mindspore::schema::Primitive *primitive, MS_LOG(ERROR) << "Unsupported padding mode: " << pad_primitive << ", for op: " << op_name_; return RET_ERROR; } + if (in_tensors[0].format() != Format::NHWC && in_tensors[0].format() != Format::NCHW) { + MS_LOG(ERROR) << "Unsupported input tensor format of " << in_tensors[0].format(); + return RET_ERROR; + } constant_value_ = pad_primitive->constant_value(); return RET_OK; } @@ -56,18 +60,24 @@ int PadTensorRT::IsSupport(const mindspore::schema::Primitive *primitive, int PadTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { mindspore::MSTensor &pad_tensor = in_tensors_[1]; int element_cnt = std::accumulate(pad_tensor.Shape().begin(), pad_tensor.Shape().end(), 1, std::multiplies()); - if (element_cnt != tensorrt_in_tensors_[0]->getDimensions().nbDims * 2) { + if (element_cnt != tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims * 2) { MS_LOG(ERROR) << "pad tensor cnt is invalid. cnt: " << element_cnt - << ", input tensor dims cnt: " << tensorrt_in_tensors_[0]->getDimensions().nbDims; + << ", input tensor dims cnt: " << tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims; return RET_ERROR; } - // transpose: NHWC->NCHW - nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(network, *tensorrt_in_tensors_[0]); - if (transpose_layer_in == nullptr) { - MS_LOG(ERROR) << "transpose: NHWC->NCHW failed"; - return RET_ERROR; + + nvinfer1::ITensor *pad_input = tensorrt_in_tensors_[0].trt_tensor_; + if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D && + tensorrt_in_tensors_[0].format_ == Format::NHWC) { + // transpose: NHWC->NCHW + nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(network, *tensorrt_in_tensors_[0].trt_tensor_); + if (transpose_layer_in == nullptr) { + MS_LOG(ERROR) << "transpose: NHWC->NCHW failed"; + return RET_ERROR; + } + transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str()); + pad_input = transpose_layer_in->getOutput(0); } - transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str()); // trt 6 only support 2D padding const int *padding_data = reinterpret_cast(in_tensors_[1].Data().get()); @@ -84,7 +94,7 @@ int PadTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { MS_LOG(INFO) << "prePadding: " << *(padding_data + 2) << ", " << *(padding_data + 4); MS_LOG(INFO) << "postPadding: " << *(padding_data + 3) << ", " << *(padding_data + 5); - padding_layer = network->addPadding(*transpose_layer_in->getOutput(0), prePadding, postPadding); + padding_layer = network->addPadding(*pad_input, prePadding, postPadding); } else { MS_LOG(ERROR) << "need check for pad_tensor dims: " << op_name_ << ", pad_tensor ElementNum: " << pad_tensor.ElementNum(); @@ -95,17 +105,8 @@ int PadTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { return RET_ERROR; } padding_layer->setName(op_name_.c_str()); - - // transpose: NCHW->NHWC - nvinfer1::IShuffleLayer *transpose_layer_out = NCHW2NHWC(network, *padding_layer->getOutput(0)); - if (transpose_layer_out == nullptr) { - MS_LOG(ERROR) << "op action convert failed"; - return RET_ERROR; - } - transpose_layer_out->setName((op_name_ + "_transpose2NHWC").c_str()); - transpose_layer_out->getOutput(0)->setName(out_tensors_[0].Name().c_str()); - - this->AddInnerOutTensors(transpose_layer_out->getOutput(0)); + padding_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str()); + this->AddInnerOutTensors(ITensorHelper{padding_layer->getOutput(0), Format::NCHW}); return RET_OK; } } // namespace mindspore::lite diff --git a/mindspore/lite/src/delegate/tensorrt/op/pool_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/pool_tensorrt.cc index 3ade0a4834b..570ec1de889 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/pool_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/pool_tensorrt.cc @@ -34,6 +34,10 @@ int PoolTensorRT::IsSupport(const mindspore::schema::Primitive *primitive, MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size(); return RET_ERROR; } + if (in_tensors[0].format() != Format::NHWC && in_tensors[0].format() != Format::NCHW) { + MS_LOG(ERROR) << "Unsupported input tensor format of " << in_tensors[0].format(); + return RET_ERROR; + } return RET_OK; } @@ -47,13 +51,18 @@ int PoolTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { MS_LOG(ERROR) << "invalid input tensor size: " << tensorrt_in_tensors_.size(); return RET_ERROR; } - // transpose: NHWC->NCHW - nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(network, *tensorrt_in_tensors_[0]); - if (transpose_layer_in == nullptr) { - MS_LOG(ERROR) << "transpose: NHWC->NCHW failed"; - return RET_ERROR; + nvinfer1::ITensor *pool_input = tensorrt_in_tensors_[0].trt_tensor_; + if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D && + tensorrt_in_tensors_[0].format_ == Format::NHWC) { + // transpose: NHWC->NCHW + nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(network, *tensorrt_in_tensors_[0].trt_tensor_); + if (transpose_layer_in == nullptr) { + MS_LOG(ERROR) << "transpose: NHWC->NCHW failed"; + return RET_ERROR; + } + transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str()); + pool_input = transpose_layer_in->getOutput(0); } - transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str()); // pooling layer nvinfer1::PoolingType pooling_type = nvinfer1::PoolingType::kAVERAGE; @@ -64,8 +73,7 @@ int PoolTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { } std::vector kernel_size_val = std::vector(kernel_size->begin(), kernel_size->end()); nvinfer1::Dims windowSize = lite::ConvertCudaDims(kernel_size_val); - nvinfer1::IPoolingLayer *pooling_layer = - network->addPoolingNd(*transpose_layer_in->getOutput(0), pooling_type, windowSize); + nvinfer1::IPoolingLayer *pooling_layer = network->addPoolingNd(*pool_input, pooling_type, windowSize); if (pooling_layer == nullptr) { MS_LOG(ERROR) << "addPoolingNd failed for TensorRT."; return RET_ERROR; @@ -86,15 +94,8 @@ int PoolTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { } activation_layer->setName((op_name_ + "_activation").c_str()); } - // transpose: NCHW->NHWC - nvinfer1::IShuffleLayer *transpose_layer_out = NCHW2NHWC(network, *activation_layer->getOutput(0)); - if (transpose_layer_out == nullptr) { - MS_LOG(ERROR) << "op action convert failed"; - return RET_ERROR; - } - transpose_layer_out->setName((op_name_ + "_transpose2NHWC").c_str()); - transpose_layer_out->getOutput(0)->setName(out_tensors_[0].Name().c_str()); - this->AddInnerOutTensors(transpose_layer_out->getOutput(0)); + activation_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str()); + this->AddInnerOutTensors(ITensorHelper{activation_layer->getOutput(0), Format::NCHW}); return RET_OK; } diff --git a/mindspore/lite/src/delegate/tensorrt/op/reduce_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/reduce_tensorrt.cc index 3cf38700868..43ea9f657c0 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/reduce_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/reduce_tensorrt.cc @@ -28,7 +28,7 @@ int ReduceTensorRT::IsSupport(const schema::Primitive *primitive, const std::vec MS_LOG(ERROR) << "convert failed"; return RET_ERROR; } - if (in_tensors.size() != 2) { + if (in_tensors.size() != INPUT_SIZE2) { MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size(); } if (out_tensors.size() != 1) { @@ -55,23 +55,17 @@ int ReduceTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { return RET_ERROR; } bool keep_dims = reduce_op->keep_dims(); - // axis - uint32_t reduceAxes = 0; - mindspore::MSTensor axis_tensor = this->in_tensors_[1]; - if (axis_tensor.Data() == nullptr) { - MS_LOG(ERROR) << "invalid axis_tensor"; - return RET_ERROR; + nvinfer1::ITensor *reduce_input = tensorrt_in_tensors_[0].trt_tensor_; + if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D && + tensorrt_in_tensors_[0].format_ == Format::NCHW) { + out_format_ = Format::NHWC; + } else { + out_format_ = tensorrt_in_tensors_[0].format_; } - if (axis_tensor.DataType() != DataType::kNumberTypeInt32) { - MS_LOG(WARNING) << "not int data type"; - } - int *axis_data = reinterpret_cast(axis_tensor.MutableData()); - for (int i = 0; i < axis_tensor.ElementNum(); i++) { - reduceAxes |= (16 - (1u << *axis_data)); - axis_data++; - } - MS_LOG(INFO) << "reduceAxes: " << reduceAxes; - nvinfer1::IReduceLayer *layer = network->addReduce(*tensorrt_in_tensors_[0], reduce_op_, reduceAxes, keep_dims); + + uint32_t reduceAxis = GetAxis(); + + nvinfer1::IReduceLayer *layer = network->addReduce(*reduce_input, reduce_op_, reduceAxis, keep_dims); if (layer == nullptr) { MS_LOG(ERROR) << "addReduce failed for TensorRT."; return RET_ERROR; @@ -84,7 +78,29 @@ int ReduceTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { return RET_ERROR; } out_tensor->setName(out_tensors_[0].Name().c_str()); - this->AddInnerOutTensors(out_tensor); + this->AddInnerOutTensors(ITensorHelper{out_tensor, out_format_}); return RET_OK; } +uint32_t ReduceTensorRT::GetAxis() { + // axis + uint32_t reduceAxis = 0; + mindspore::MSTensor axis_tensor = this->in_tensors_[1]; + if (axis_tensor.Data() == nullptr) { + MS_LOG(ERROR) << "invalid axis_tensor"; + return reduceAxis; + } + if (axis_tensor.DataType() != DataType::kNumberTypeInt32) { + MS_LOG(WARNING) << "not int data type"; + } + int *axis_data = reinterpret_cast(axis_tensor.MutableData()); + bool need_transpose_axis = + (out_format_ == Format::NCHW) && (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D); + for (int i = 0; i < axis_tensor.ElementNum(); i++) { + int format_axis_data = need_transpose_axis ? ConvertAxisFromNHWC2NCHW(*axis_data) : *axis_data; + reduceAxis |= (16 - (1u << format_axis_data)); + axis_data++; + } + MS_LOG(INFO) << "reduceAxis: " << reduceAxis; + return reduceAxis; +} } // namespace mindspore::lite diff --git a/mindspore/lite/src/delegate/tensorrt/op/reduce_tensorrt.h b/mindspore/lite/src/delegate/tensorrt/op/reduce_tensorrt.h index b325e4b60e4..55c5cdf0307 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/reduce_tensorrt.h +++ b/mindspore/lite/src/delegate/tensorrt/op/reduce_tensorrt.h @@ -36,6 +36,7 @@ class ReduceTensorRT : public TensorRTOp { const std::vector &out_tensors) override; private: + uint32_t GetAxis(); std::map reduce_ops_ = { {schema::ReduceMode::ReduceMode_ReduceMean, nvinfer1::ReduceOperation::kAVG}, {schema::ReduceMode::ReduceMode_ReduceMax, nvinfer1::ReduceOperation::kMAX}, @@ -44,6 +45,7 @@ class ReduceTensorRT : public TensorRTOp { {schema::ReduceMode::ReduceMode_ReduceSum, nvinfer1::ReduceOperation::kSUM}, }; nvinfer1::ReduceOperation reduce_op_; + Format out_format_; }; } // namespace mindspore::lite #endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_REDUCE_TENSORRT_H_ diff --git a/mindspore/lite/src/delegate/tensorrt/op/scale_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/scale_tensorrt.cc index 764f13938c1..dbcd7aa3574 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/scale_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/scale_tensorrt.cc @@ -17,6 +17,7 @@ #include #include #include "src/delegate/tensorrt/op/scale_tensorrt.h" +#include "src/delegate/tensorrt/op/activation_tensorrt.h" #include "src/delegate/tensorrt/tensorrt_utils.h" namespace mindspore::lite { @@ -53,14 +54,26 @@ int ScaleTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { } schema::ActivationType activation_type = scale_op->activation_type(); - nvinfer1::ITensor *scale_in_tensor = tensorrt_in_tensors_[0]; - // unsqueeze input Itensor to 4 dims + nvinfer1::ITensor *scale_in_tensor = tensorrt_in_tensors_[0].trt_tensor_; + Format out_format = in_tensors_[0].format(); if (in_tensors_[0].Shape().size() < INPUT_SIZE4) { + // unsqueeze input Itensor to 4 dims scale_in_tensor = AddUnsqueezeOp(network); if (scale_in_tensor == nullptr) { MS_LOG(ERROR) << "AddUnsqueezeOp failed"; return RET_ERROR; } + } else if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == 4 && + tensorrt_in_tensors_[0].format_ == Format::NCHW) { + // transpose: NCHW->NHWC + nvinfer1::IShuffleLayer *transpose_layer_in = NCHW2NHWC(network, *tensorrt_in_tensors_[0].trt_tensor_); + if (transpose_layer_in == nullptr) { + MS_LOG(ERROR) << "op action convert failed"; + return RET_ERROR; + } + transpose_layer_in->setName((op_name_ + "_transpose2NHWC").c_str()); + scale_in_tensor = transpose_layer_in->getOutput(0); + out_format = Format::NHWC; } // mode of scale size_t axis = scale_op->axis(); @@ -100,18 +113,27 @@ int ScaleTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { return RET_ERROR; } cal_layer->setName(op_name_.c_str()); - nvinfer1::ITensor *op_out_tensor = cal_layer->getOutput(0); - if (op_out_tensor == nullptr) { - MS_LOG(ERROR) << "addScaleNd output tensor is invalid for: " << op_name_; - return RET_ERROR; - } // add activation + nvinfer1::ITensor *activation_tensor = cal_layer->getOutput(0); if (activation_type != schema::ActivationType::ActivationType_NO_ACTIVATION) { - MS_LOG(WARNING) << "need activation for: " << op_name_; + auto activation_layer = ActivationTensorRT::AddActivation(network, activation_type, 0, cal_layer->getOutput(0)); + if (activation_layer == nullptr) { + MS_LOG(ERROR) << "addActivation for scale failed"; + return RET_ERROR; + } + activation_layer->setName((op_name_ + "_activation").c_str()); + activation_tensor = activation_layer->getOutput(0); } + + // squeeze to origin dim + nvinfer1::ITensor *op_out_tensor = activation_tensor; + if (activation_tensor->getDimensions().nbDims > static_cast(out_tensors_[0].Shape().size())) { + op_out_tensor = AddSqueezeOp(activation_tensor, network); + } + op_out_tensor->setName(out_tensors_[0].Name().c_str()); - this->AddInnerOutTensors(op_out_tensor); + this->AddInnerOutTensors(ITensorHelper{op_out_tensor, out_format}); return RET_OK; } @@ -136,7 +158,7 @@ nvinfer1::ScaleMode ScaleTensorRT::GetScaleMode(size_t axis) { } nvinfer1::ITensor *ScaleTensorRT::AddUnsqueezeOp(nvinfer1::INetworkDefinition *network) { - nvinfer1::IShuffleLayer *unsqueeze_layer = network->addShuffle(*this->tensorrt_in_tensors_[0]); + nvinfer1::IShuffleLayer *unsqueeze_layer = network->addShuffle(*this->tensorrt_in_tensors_[0].trt_tensor_); if (unsqueeze_layer == nullptr) { MS_LOG(ERROR) << "addShuffle failed for: " << op_name_; return nullptr; @@ -150,4 +172,17 @@ nvinfer1::ITensor *ScaleTensorRT::AddUnsqueezeOp(nvinfer1::INetworkDefinition *n unsqueeze_layer->setReshapeDimensions(unsqueeze_dims); return unsqueeze_layer->getOutput(0); } + +nvinfer1::ITensor *ScaleTensorRT::AddSqueezeOp(nvinfer1::ITensor *in_tensor, nvinfer1::INetworkDefinition *network) { + nvinfer1::IShuffleLayer *squeeze_layer = network->addShuffle(*in_tensor); + if (squeeze_layer == nullptr) { + MS_LOG(ERROR) << "addShuffle failed for: " << op_name_; + return nullptr; + } + squeeze_layer->setName((op_name_ + "_squeeze").c_str()); + nvinfer1::Dims squeeze_dims = lite::ConvertCudaDims(out_tensors_[0].Shape()); + MS_LOG(INFO) << "squeeze_dims cnt for scale: " << squeeze_dims.nbDims; + squeeze_layer->setReshapeDimensions(squeeze_dims); + return squeeze_layer->getOutput(0); +} } // namespace mindspore::lite diff --git a/mindspore/lite/src/delegate/tensorrt/op/scale_tensorrt.h b/mindspore/lite/src/delegate/tensorrt/op/scale_tensorrt.h index d4e68c5fdd7..3a8fce721f6 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/scale_tensorrt.h +++ b/mindspore/lite/src/delegate/tensorrt/op/scale_tensorrt.h @@ -38,6 +38,8 @@ class ScaleTensorRT : public TensorRTOp { private: nvinfer1::ITensor *AddUnsqueezeOp(nvinfer1::INetworkDefinition *network); + nvinfer1::ITensor *AddSqueezeOp(nvinfer1::ITensor *in_tensor, nvinfer1::INetworkDefinition *network); + nvinfer1::ScaleMode GetScaleMode(size_t axis); }; } // namespace mindspore::lite diff --git a/mindspore/lite/src/delegate/tensorrt/op/shape_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/shape_tensorrt.cc index 9e006341215..ad50e6204e1 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/shape_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/shape_tensorrt.cc @@ -38,7 +38,7 @@ int ShapeTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { MS_LOG(ERROR) << "network is invalid"; return RET_ERROR; } - nvinfer1::IShapeLayer *shape_layer = network->addShape(*tensorrt_in_tensors_[0]); + nvinfer1::IShapeLayer *shape_layer = network->addShape(*tensorrt_in_tensors_[0].trt_tensor_); if (shape_layer == nullptr) { MS_LOG(DEBUG) << "add shape op failed for TensorRT."; @@ -46,7 +46,7 @@ int ShapeTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { } shape_layer->setName(op_name_.c_str()); shape_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str()); - this->AddInnerOutTensors(shape_layer->getOutput(0)); + this->AddInnerOutTensors(ITensorHelper{shape_layer->getOutput(0), tensorrt_in_tensors_[0].format_}); return RET_OK; } } // namespace mindspore::lite diff --git a/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.cc index 5f8d20b2f65..4a6c8733877 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/shuffle_tensorrt.cc @@ -72,58 +72,56 @@ int ShuffleTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { MS_LOG(ERROR) << "network is invalid"; return RET_ERROR; } - nvinfer1::IShuffleLayer *shuffle_layer = network->addShuffle(*tensorrt_in_tensors_[0]); + + nvinfer1::ITensor *shuffler_input = tensorrt_in_tensors_[0].trt_tensor_; + if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == 4 && + tensorrt_in_tensors_[0].format_ == Format::NCHW && !tensorrt_in_tensors_[0].trt_tensor_->isNetworkInput()) { + // network input tensor format can be NCHW + nvinfer1::IShuffleLayer *transpose_layer = NCHW2NHWC(network, *tensorrt_in_tensors_[0].trt_tensor_); + if (transpose_layer == nullptr) { + MS_LOG(ERROR) << "create transpose layer failed for " << op_name_; + } + transpose_layer->setName((op_name_ + "_transpose_in").c_str()); + shuffler_input = transpose_layer->getOutput(0); + } + + nvinfer1::IShuffleLayer *shuffle_layer = network->addShuffle(*shuffler_input); if (shuffle_layer == nullptr) { MS_LOG(ERROR) << "add Shuffle op failed for TensorRT."; return RET_ERROR; } shuffle_layer->setName(op_name_.c_str()); + int ret = RET_OK; switch (type_) { case schema::PrimitiveType_Unsqueeze: { - int ret = AddUnsqueezeOp(shuffle_layer); - if (ret != RET_OK) { - MS_LOG(ERROR) << "AddUnSqueezeOp failed."; - return ret; - } + ret = AddUnsqueezeOp(shuffle_layer); break; } case schema::PrimitiveType_Squeeze: { - int ret = AddSqueezeOp(shuffle_layer); - if (ret != RET_OK) { - MS_LOG(ERROR) << "AddSqueezeOp failed."; - return ret; - } + ret = AddSqueezeOp(shuffle_layer); break; } case schema::PrimitiveType_Transpose: { - int ret = AddTransposeOp(shuffle_layer); - if (ret != RET_OK) { - MS_LOG(ERROR) << "AddTransposeOpss failed."; - return ret; - } + ret = AddTransposeOp(shuffle_layer); break; } case schema::PrimitiveType_Reshape: { - int ret = AddReshapeOp(shuffle_layer); - if (ret != RET_OK) { - MS_LOG(ERROR) << "AddReshapeOp failed."; - return ret; - } + ret = AddReshapeOp(shuffle_layer); break; } case schema::PrimitiveType_Flatten: { - int ret = AddFlattenOp(shuffle_layer); - if (ret != RET_OK) { - MS_LOG(ERROR) << "AddFlattenOp failed."; - return ret; - } + ret = AddFlattenOp(shuffle_layer); break; } default: - MS_LOG(ERROR) << "Unsupported op type."; + MS_LOG(ERROR) << "Unsupported op type for " << op_name_; return RET_ERROR; } + if (ret != RET_OK) { + MS_LOG(ERROR) << "AddOp failed for " << op_name_; + return ret; + } nvinfer1::ITensor *out_tensor = shuffle_layer->getOutput(0); if (out_tensor == nullptr) { @@ -131,7 +129,7 @@ int ShuffleTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { return RET_ERROR; } out_tensor->setName(out_tensors_[0].Name().c_str()); - this->AddInnerOutTensors(out_tensor); + this->AddInnerOutTensors(ITensorHelper{out_tensor, Format::NHWC}); return RET_OK; } @@ -177,7 +175,7 @@ int ShuffleTensorRT::AddUnsqueezeOp(nvinfer1::IShuffleLayer *shuffle_layer) { MS_LOG(WARNING) << "AddUnsqueezeOp size of in tensort needs check: " << in_tensors_.size(); } // axis - auto unsqueeze_shape = tensorrt_in_tensors_[0]->getDimensions(); + auto unsqueeze_shape = tensorrt_in_tensors_[0].trt_tensor_->getDimensions(); std::vector new_shape(unsqueeze_shape.d, unsqueeze_shape.d + unsqueeze_shape.nbDims); auto axis = unsqueeze_op->axis(); @@ -229,7 +227,7 @@ int ShuffleTensorRT::AddReshapeOp(nvinfer1::IShuffleLayer *shuffle_layer) { MS_LOG(ERROR) << "invalid shape tensor for reshape " << op_name_; return RET_ERROR; } - shuffle_layer->setInput(1, *tensorrt_in_tensors_[1]); + shuffle_layer->setInput(1, *tensorrt_in_tensors_[1].trt_tensor_); } return RET_OK; } diff --git a/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.cc index a5e172e0dc5..a6549cbc692 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/slice_tensorrt.cc @@ -53,7 +53,22 @@ int SliceTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { nvinfer1::Dims size_dims = lite::ConvertCudaDims(out_tensors_[0].Shape()); nvinfer1::Dims stride_dims = lite::ConvertCudaDims(stride.Data().get(), stride.ElementNum()); - nvinfer1::ISliceLayer *slice_layer = network->addSlice(*tensorrt_in_tensors_[0], start_dims, size_dims, stride_dims); + nvinfer1::ITensor *slice_input = tensorrt_in_tensors_[0].trt_tensor_; + Format out_format = tensorrt_in_tensors_[0].format_; + if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == 4 && + tensorrt_in_tensors_[0].format_ == Format::NCHW) { + // transpose: NCHW->NHWC + nvinfer1::IShuffleLayer *transpose_layer_in = NCHW2NHWC(network, *tensorrt_in_tensors_[0].trt_tensor_); + if (transpose_layer_in == nullptr) { + MS_LOG(ERROR) << "op action convert failed"; + return RET_ERROR; + } + transpose_layer_in->setName((op_name_ + "_transpose2NHWC").c_str()); + slice_input = transpose_layer_in->getOutput(0); + out_format = Format::NHWC; + } + + nvinfer1::ISliceLayer *slice_layer = network->addSlice(*slice_input, start_dims, size_dims, stride_dims); if (slice_layer == nullptr) { MS_LOG(ERROR) << "add Slice op failed for TensorRT: " << op_name_; return RET_ERROR; @@ -65,7 +80,7 @@ int SliceTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { return RET_ERROR; } out_tensor->setName(out_tensors_[0].Name().c_str()); - this->AddInnerOutTensors(out_tensor); + this->AddInnerOutTensors(ITensorHelper{out_tensor, out_format}); return RET_OK; } } // namespace mindspore::lite diff --git a/mindspore/lite/src/delegate/tensorrt/op/softmax_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/softmax_tensorrt.cc index e65508276f7..1e0816e188b 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/softmax_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/softmax_tensorrt.cc @@ -23,19 +23,10 @@ int SoftMaxTensorRT::IsSupport(const schema::Primitive *primitive, const std::ve MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_; return RET_ERROR; } - if (primitive->value_type() == schema::PrimitiveType::PrimitiveType_LogSoftmax) { - with_log_ = true; - auto softmax_op = primitive->value_as_LogSoftmax(); - if (softmax_op == nullptr) { - MS_LOG(ERROR) << "LogSoftmax convert failed"; - return RET_ERROR; - } - } else { - auto softmax_op = primitive->value_as_Softmax(); - if (softmax_op == nullptr) { - MS_LOG(ERROR) << "convert failed"; - return RET_ERROR; - } + softmax_op_ = primitive->value_as_Softmax(); + if (softmax_op_ == nullptr) { + MS_LOG(ERROR) << "convert failed"; + return RET_ERROR; } if (in_tensors.size() != 1) { @@ -48,7 +39,6 @@ int SoftMaxTensorRT::IsSupport(const schema::Primitive *primitive, const std::ve } return RET_OK; } - int SoftMaxTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { if (network == nullptr) { MS_LOG(ERROR) << "network is invalid"; @@ -66,58 +56,36 @@ int SoftMaxTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { MS_LOG(ERROR) << "softmax output tensor create failed for TensorRT."; return RET_ERROR; } - if (with_log_) { - nvinfer1::IUnaryLayer *log_layer = network->addUnary(*out_tensor, nvinfer1::UnaryOperation::kLOG); - if (log_layer == nullptr) { - MS_LOG(ERROR) << "add log op failed for TensorRT."; - return RET_ERROR; - } - log_layer->setName((op_name_ + "_log").c_str()); - out_tensor = log_layer->getOutput(0); - if (out_tensor == nullptr) { - MS_LOG(ERROR) << "softmax log output tensor create failed for TensorRT."; - return RET_ERROR; - } - } out_tensor->setName(out_tensors_[0].Name().c_str()); - this->AddInnerOutTensors(out_tensor); + this->AddInnerOutTensors(ITensorHelper{out_tensor, tensorrt_in_tensors_[0].format_}); return RET_OK; } nvinfer1::ISoftMaxLayer *SoftMaxTensorRT::AddSoftMaxOp(nvinfer1::INetworkDefinition *network) { - nvinfer1::ISoftMaxLayer *current_layer_ = network->addSoftMax(*this->GetInnerInTensors()[0]); + nvinfer1::ISoftMaxLayer *current_layer_ = network->addSoftMax(*tensorrt_in_tensors_[0].trt_tensor_); if (current_layer_ == nullptr) { MS_LOG(ERROR) << "add softmax op failed for TensorRT."; return nullptr; } std::vector axis_val; - if (with_log_) { - auto softmax_op = this->GetPrimitive()->value_as_LogSoftmax(); - if (softmax_op == nullptr) { - MS_LOG(ERROR) << "LogSoftmax convert failed"; - return nullptr; - } - int64_t axis = softmax_op->axis(); - axis_val.push_back(axis); - } else { - auto softmax_op = this->GetPrimitive()->value_as_Softmax(); - if (softmax_op == nullptr) { - MS_LOG(ERROR) << "Softmax convert failed"; - return nullptr; - } - auto axis = softmax_op->axis(); - axis_val = std::vector(axis->begin(), axis->end()); - } + auto axis = softmax_op_->axis(); + axis_val = std::vector(axis->begin(), axis->end()); if (axis_val.size() != 1) { MS_LOG(WARNING) << "axis needs check"; } - if (axis_val[0] >= this->tensorrt_in_tensors_[0]->getDimensions().nbDims) { + if (axis_val[0] >= this->tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims) { MS_LOG(ERROR) << "axis is larger than input tensor dims."; return nullptr; } - current_layer_->setAxes(axis_val[0]); + int64_t axis_format_value = axis_val[0]; + if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == 4 && + tensorrt_in_tensors_[0].format_ == Format::NCHW) { + // transpose axis to NCHW + axis_format_value = ConvertAxisFromNHWC2NCHW(axis_val[0]); + } + current_layer_->setAxes(axis_format_value); return current_layer_; } } // namespace mindspore::lite diff --git a/mindspore/lite/src/delegate/tensorrt/op/softmax_tensorrt.h b/mindspore/lite/src/delegate/tensorrt/op/softmax_tensorrt.h index 86f74e444ae..f14643168a1 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/softmax_tensorrt.h +++ b/mindspore/lite/src/delegate/tensorrt/op/softmax_tensorrt.h @@ -34,8 +34,9 @@ class SoftMaxTensorRT : public TensorRTOp { const std::vector &out_tensors) override; private: - bool with_log_ = false; nvinfer1::ISoftMaxLayer *AddSoftMaxOp(nvinfer1::INetworkDefinition *network); + + const schema::Softmax *softmax_op_; }; } // namespace mindspore::lite #endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_SOFTMAX_TENSORRT_H_ diff --git a/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.cc b/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.cc index 4f7b3ca8164..82a8f93d1dd 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.cc @@ -19,13 +19,13 @@ namespace mindspore::lite { const schema::Primitive *TensorRTOp::GetPrimitive() { return this->op_primitive_; } -void TensorRTOp::AddInnerInTensors(nvinfer1::ITensor *tensor) { this->tensorrt_in_tensors_.push_back(tensor); } +void TensorRTOp::AddInnerInTensors(ITensorHelper tensor) { this->tensorrt_in_tensors_.push_back(tensor); } -void TensorRTOp::AddInnerOutTensors(nvinfer1::ITensor *tensor) { this->tensorrt_out_tensors_.push_back(tensor); } +void TensorRTOp::AddInnerOutTensors(ITensorHelper tensor) { this->tensorrt_out_tensors_.push_back(tensor); } -std::vector &TensorRTOp::GetInnerOutTensor() { return this->tensorrt_out_tensors_; } +std::vector &TensorRTOp::GetInnerOutTensor() { return this->tensorrt_out_tensors_; } -std::vector &TensorRTOp::GetInnerInTensors() { return this->tensorrt_in_tensors_; } +std::vector &TensorRTOp::GetInnerInTensors() { return this->tensorrt_in_tensors_; } std::string TensorRTOp::GetOpName() { return this->op_name_; } diff --git a/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.h b/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.h index 73848185c0a..2e941d40a90 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.h +++ b/mindspore/lite/src/delegate/tensorrt/op/tensorrt_op.h @@ -23,12 +23,18 @@ #include "include/api/kernel.h" #include "src/common/log_adapter.h" #include "include/errorcode.h" +#include "src/delegate/tensorrt/tensorrt_utils.h" namespace mindspore::lite { constexpr int INPUT_SIZE2 = 2; constexpr int INPUT_SIZE3 = 3; constexpr int INPUT_SIZE4 = 4; +struct ITensorHelper { + nvinfer1::ITensor *trt_tensor_{nullptr}; + mindspore::Format format_; +}; + class TensorRTOp { public: explicit TensorRTOp(const schema::Primitive *primitive, std::vector in_tensors, @@ -51,13 +57,13 @@ class TensorRTOp { const schema::Primitive *GetPrimitive(); - void AddInnerInTensors(nvinfer1::ITensor *tensor); + void AddInnerInTensors(ITensorHelper tensor); - void AddInnerOutTensors(nvinfer1::ITensor *tensor); + void AddInnerOutTensors(ITensorHelper tensor); - std::vector &GetInnerOutTensor(); + std::vector &GetInnerOutTensor(); - std::vector &GetInnerInTensors(); + std::vector &GetInnerInTensors(); std::string GetOpName(); @@ -86,9 +92,9 @@ class TensorRTOp { std::vector out_tensors_; - std::vector tensorrt_in_tensors_; + std::vector tensorrt_in_tensors_; - std::vector tensorrt_out_tensors_; + std::vector tensorrt_out_tensors_; std::vector in_ops_; diff --git a/mindspore/lite/src/delegate/tensorrt/op/unary_tensorrt.cc b/mindspore/lite/src/delegate/tensorrt/op/unary_tensorrt.cc index c5f59da7825..b68a4799609 100644 --- a/mindspore/lite/src/delegate/tensorrt/op/unary_tensorrt.cc +++ b/mindspore/lite/src/delegate/tensorrt/op/unary_tensorrt.cc @@ -44,7 +44,7 @@ int UnaryTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { MS_LOG(ERROR) << "network or input tensor is invalid"; return RET_ERROR; } - nvinfer1::IUnaryLayer *cal_layer = network->addUnary(*tensorrt_in_tensors_[0], unary_op_); + nvinfer1::IUnaryLayer *cal_layer = network->addUnary(*tensorrt_in_tensors_[0].trt_tensor_, unary_op_); if (cal_layer == nullptr) { MS_LOG(ERROR) << "addUnary failed for: " << op_name_; return RET_ERROR; @@ -53,7 +53,7 @@ int UnaryTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) { nvinfer1::ITensor *op_out_tensor = cal_layer->getOutput(0); op_out_tensor->setName(out_tensors_[0].Name().c_str()); - this->AddInnerOutTensors(op_out_tensor); + this->AddInnerOutTensors(ITensorHelper{op_out_tensor, tensorrt_in_tensors_[0].format_}); return RET_OK; } } // namespace mindspore::lite diff --git a/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.cc b/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.cc index 46c0be23ad6..5ec047b13d2 100644 --- a/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.cc +++ b/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.cc @@ -187,6 +187,7 @@ nvinfer1::ITensor *TensorRTSubGraph::SetTensorRTNetworkInput(const mindspore::MS // only support NHWC HW dim resize if (input_hw_index_ != -1) { + MS_LOG(INFO) << "input tensor format: " << in_tensor.format(); input_hw_index_ = in_tensor.format() == Format::NHWC ? 1 : /* NCHW*/ 2; input_dims.d[input_hw_index_] = -1; input_dims.d[input_hw_index_ + 1] = -1; @@ -208,19 +209,20 @@ int TensorRTSubGraph::BuildTensorRTGraph() { MS_LOG(ERROR) << "SetTensorRTNetworkInput failed for " << in_tensor.Name(); return RET_ERROR; } - cur_op->AddInnerInTensors(trt_tensor); + cur_op->AddInnerInTensors(ITensorHelper{trt_tensor, in_tensor.format()}); continue; } - auto trt_tensor = FindTensorRTInputs(cur_op, in_tensor); - // weight tensor - if (trt_tensor == nullptr) { + ITensorHelper trt_tensor = FindTensorRTInputs(cur_op, in_tensor); + if (trt_tensor.trt_tensor_ == nullptr) { + // weight tensor if (trt_specific_weight_nodes_.find(cur_op->type()) == trt_specific_weight_nodes_.end()) { if (in_tensor == nullptr) { MS_LOG(ERROR) << "Weight Tensor is nullptr."; return RET_ERROR; } - trt_tensor = lite::ConvertConstantTensor(this->network_, in_tensor); + trt_tensor.trt_tensor_ = lite::ConvertConstantTensor(this->network_, in_tensor); + trt_tensor.format_ = Format::NHWC; MS_LOG(INFO) << "auto convert constant tensor for: " << cur_op->GetOpName(); cur_op->AddInnerInTensors(trt_tensor); } @@ -236,16 +238,44 @@ int TensorRTSubGraph::BuildTensorRTGraph() { } } + ret = MarkOutputs(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "MarkOutputs failed in TensorRT network"; + return ret; + } + + ret = BuildEngine(); + if (ret != RET_OK) { + MS_LOG(ERROR) << "Create engine failed in TensorRT network"; + return ret; + } + return RET_OK; +} + +int TensorRTSubGraph::MarkOutputs() { // Mark NetWork Output Tensor. for (auto out_tensor : outputs_) { for (auto out_op : this->out_ops_) { for (size_t index = 0; index < out_op->outputs().size(); index++) { if (out_op->outputs()[index] == out_tensor) { - out_op->GetInnerOutTensor()[index]->setName(out_tensor.Name().c_str()); + nvinfer1::ITensor *out_trt_tensor = out_op->GetInnerOutTensor()[index].trt_tensor_; + if (out_op->GetInnerOutTensor()[index].trt_tensor_->getDimensions().nbDims == 4 && + out_op->GetInnerOutTensor()[index].format_ == Format::NCHW) { + // transpose subgraph output from nchw to nhwc + nvinfer1::IShuffleLayer *transpose_layer_out = + NCHW2NHWC(network_, *out_op->GetInnerOutTensor()[index].trt_tensor_); + if (transpose_layer_out == nullptr) { + MS_LOG(ERROR) << "op action convert failed"; + return RET_ERROR; + } + transpose_layer_out->setName((out_tensor.Name() + "_transpose2NHWC").c_str()); + } + + out_trt_tensor->setName(out_tensor.Name().c_str()); MS_LOG(INFO) << "markOutput for: " << out_tensor.Name(); - this->network_->markOutput(*out_op->GetInnerOutTensor()[index]); - for (int n = 0; n < out_op->GetInnerOutTensor()[index]->getDimensions().nbDims; n++) { - if (out_op->GetInnerOutTensor()[index]->getDimensions().d[n] == -1) { + this->network_->markOutput(*out_trt_tensor); + for (int n = 0; n < out_trt_tensor->getDimensions().nbDims; n++) { + if (out_trt_tensor->getDimensions().d[n] == -1) { output_batchsize_index_ = n; break; } @@ -254,12 +284,6 @@ int TensorRTSubGraph::BuildTensorRTGraph() { } } } - - ret = BuildEngine(); - if (ret != RET_OK) { - MS_LOG(ERROR) << "Create engine failed in TensorRT network"; - return ret; - } return RET_OK; } @@ -292,7 +316,7 @@ int TensorRTSubGraph::Prepare() { trt_in_tensor_name_.push_back(tensor.Name()); nvinfer1::Dims input_dims = ConvertCudaDims(tensor.Shape()); for (int od = 0; od < input_dims.nbDims; od++) { - MS_LOG(INFO) << "in tensor " << tensor.Name() << " dims at " << od << " is " << input_dims.d[od]; + MS_LOG(DEBUG) << "in tensor " << tensor.Name() << " dims at " << od << " is " << input_dims.d[od]; } if (!this->trt_context_->setBindingDimensions(index, input_dims)) { @@ -363,7 +387,7 @@ int TensorRTSubGraph::ReSize() { // Set actual input size nvinfer1::Dims input_dims = ConvertCudaDims(inputs_[i].Shape()); for (int od = 0; od < input_dims.nbDims; od++) { - MS_LOG(INFO) << "in tensor " << trt_in_tensor_name_[i] << " dims at " << od << " is " << input_dims.d[od]; + MS_LOG(DEBUG) << "in tensor " << trt_in_tensor_name_[i] << " dims at " << od << " is " << input_dims.d[od]; } if (!this->trt_context_->setBindingDimensions(index, input_dims)) { @@ -420,7 +444,7 @@ int TensorRTSubGraph::Execute() { new_shape[output_batchsize_index_] = runtime_->GetBatchSize(); } for (int od = 0; od < out_dims.nbDims; od++) { - MS_LOG(INFO) << "out tensor " << trt_out_tensor_name_[i] << " dims at " << od << " is " << new_shape[od]; + MS_LOG(DEBUG) << "out tensor " << trt_out_tensor_name_[i] << " dims at " << od << " is " << new_shape[od]; } outputs_[i].SetShape(new_shape); @@ -438,7 +462,7 @@ int TensorRTSubGraph::Execute() { return RET_OK; } -nvinfer1::ITensor *TensorRTSubGraph::FindTensorRTInputs(TensorRTOp *cur_op, const mindspore::MSTensor &in_tensor) { +ITensorHelper TensorRTSubGraph::FindTensorRTInputs(TensorRTOp *cur_op, const mindspore::MSTensor &in_tensor) { for (auto input_op : cur_op->in_ops()) { for (size_t i = 0; i < input_op->outputs().size(); i++) { auto out_tensor = input_op->outputs().at(i); @@ -447,6 +471,6 @@ nvinfer1::ITensor *TensorRTSubGraph::FindTensorRTInputs(TensorRTOp *cur_op, cons } } } - return nullptr; + return ITensorHelper{}; } } // namespace mindspore::lite diff --git a/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.h b/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.h index e7867363857..67f9bd5237c 100644 --- a/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.h +++ b/mindspore/lite/src/delegate/tensorrt/tensorrt_subgraph.h @@ -67,7 +67,9 @@ class TensorRTSubGraph : public kernel::Kernel { nvinfer1::ITensor *SetTensorRTNetworkInput(const mindspore::MSTensor &in_tensor); - static nvinfer1::ITensor *FindTensorRTInputs(TensorRTOp *cur_op, const mindspore::MSTensor &in_tensor); + ITensorHelper FindTensorRTInputs(TensorRTOp *cur_op, const mindspore::MSTensor &in_tensor); + + int MarkOutputs(); std::vector all_ops_{}; // subgraph input nodes. diff --git a/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.cc b/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.cc index 043d457cfb0..69484a1d670 100644 --- a/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.cc +++ b/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.cc @@ -254,4 +254,37 @@ void SetCudaDevice(std::shared_ptr device_info_) { } MS_LOG(INFO) << "cuda is running on device: " << device; } +Format GetOutputFormat(Format input_format, nvinfer1::Permutation perm) { + if (input_format == Format::NHWC) { + if (perm.order[0] == 0 && perm.order[1] == 3 && perm.order[2] == 2 && perm.order[3] == 1) { + return Format::NCHW; + } + } else if (input_format == Format::NCHW) { + if (perm.order[0] == 0 && perm.order[1] == 2 && perm.order[2] == 3 && perm.order[3] == 1) { + return Format::NHWC; + } + } + MS_LOG(WARNING) << "transpose out format needs to check for " << input_format; + return input_format; +} +int ConvertAxisFromNHWC2NCHW(int nhwc_axis) { + // N0H1W2C3->N0C1H2W3 + if (nhwc_axis > kNHWC_C) { + return nhwc_axis; + } + switch (nhwc_axis) { + case kNHWC_N: + return kNCHW_N; + case kNHWC_H: + return kNCHW_H; + case kNHWC_W: + return kNCHW_W; + case kNHWC_C: + return kNCHW_C; + default: + MS_LOG(ERROR) << "invalid input axis for nhwc: " << nhwc_axis; + } + return nhwc_axis; +} + } // namespace mindspore::lite diff --git a/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.h b/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.h index c01c100fe29..64206a050fa 100644 --- a/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.h +++ b/mindspore/lite/src/delegate/tensorrt/tensorrt_utils.h @@ -23,6 +23,10 @@ #include "schema/ops_generated.h" #include "nnacl/pack.h" +#define kNCHW_N 0 +#define kNCHW_C 1 +#define kNCHW_H 2 +#define kNCHW_W 3 namespace mindspore::lite { struct ActivationParams { nvinfer1::ActivationType activation_type; @@ -61,5 +65,9 @@ nvinfer1::Weights TransposeWeight(const mindspore::MSTensor &ms_tensor, float ** nvinfer1::Weights ConvertWeight(const mindspore::MSTensor &ms_tensor); void SetCudaDevice(std::shared_ptr device_info_); + +Format GetOutputFormat(Format input_format, nvinfer1::Permutation perm); + +int ConvertAxisFromNHWC2NCHW(int nhwc_axis); } // namespace mindspore::lite #endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_UTILS_H_