forked from mindspore-Ecosystem/mindspore
!22945 [MSLITE] optimize Execute time for tensorrt delegate by remove unnecessary transpose op
Merge pull request !22945 from Liu_Xuu/trt_0903_transpose
This commit is contained in:
commit
277cf0d892
|
@ -15,7 +15,6 @@
|
|||
*/
|
||||
|
||||
#include "src/delegate/tensorrt/op/activation_tensorrt.h"
|
||||
#include "src/delegate/tensorrt/tensorrt_utils.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
int ActivationTensorRT::IsSupport(const schema::Primitive *primitive,
|
||||
|
@ -58,8 +57,8 @@ int ActivationTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
}
|
||||
float alpha = activation_op->alpha();
|
||||
|
||||
nvinfer1::IActivationLayer *activation_layer =
|
||||
ActivationTensorRT::AddActivation(network, activation_op->activation_type(), alpha, tensorrt_in_tensors_[0]);
|
||||
nvinfer1::IActivationLayer *activation_layer = ActivationTensorRT::AddActivation(
|
||||
network, activation_op->activation_type(), alpha, tensorrt_in_tensors_[0].trt_tensor_);
|
||||
if (activation_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add activation op failed for TensorRT.";
|
||||
return RET_ERROR;
|
||||
|
@ -67,7 +66,7 @@ int ActivationTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
|
||||
activation_layer->setName(op_name_.c_str());
|
||||
activation_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
|
||||
this->AddInnerOutTensors(activation_layer->getOutput(0));
|
||||
this->AddInnerOutTensors(ITensorHelper{activation_layer->getOutput(0), tensorrt_in_tensors_[0].format_});
|
||||
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -24,7 +24,7 @@ int ConcateTensorRT::IsSupport(const schema::Primitive *primitive, const std::ve
|
|||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() < 1) {
|
||||
if (in_tensors.size() != INPUT_SIZE2) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
@ -50,11 +50,46 @@ int ConcateTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
return RET_ERROR;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *trt_input_tensors[tensorrt_in_tensors_.size()];
|
||||
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims !=
|
||||
tensorrt_in_tensors_[1].trt_tensor_->getDimensions().nbDims) {
|
||||
MS_LOG(ERROR) << "dims of inputs is invalid for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
// make sure two inputs have same format
|
||||
Format out_format = tensorrt_in_tensors_[0].format_;
|
||||
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D) {
|
||||
if (tensorrt_in_tensors_[0].format_ == tensorrt_in_tensors_[1].format_) {
|
||||
for (size_t i = 0; i < tensorrt_in_tensors_.size(); i++) {
|
||||
trt_input_tensors[i] = tensorrt_in_tensors_[i].trt_tensor_;
|
||||
}
|
||||
} else {
|
||||
// when inputs format are different, change to NHWC
|
||||
out_format = Format::NHWC;
|
||||
int transpose_tensor_index = tensorrt_in_tensors_[0].format_ == Format::NCHW ? 0 : 1;
|
||||
trt_input_tensors[1 - transpose_tensor_index] = tensorrt_in_tensors_[1 - transpose_tensor_index].trt_tensor_;
|
||||
nvinfer1::IShuffleLayer *transpose_layer =
|
||||
NCHW2NHWC(network, *tensorrt_in_tensors_[transpose_tensor_index].trt_tensor_);
|
||||
if (transpose_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "op action convert failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
trt_input_tensors[transpose_tensor_index] = transpose_layer->getOutput(0);
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 0; i < tensorrt_in_tensors_.size(); i++) {
|
||||
trt_input_tensors[i] = tensorrt_in_tensors_[i].trt_tensor_;
|
||||
}
|
||||
}
|
||||
|
||||
int axis = RET_INVALID_OP_ATTR;
|
||||
axis = concate_op->axis();
|
||||
|
||||
nvinfer1::ITensor *trt_input_tensors[tensorrt_in_tensors_.size()];
|
||||
std::copy(tensorrt_in_tensors_.begin(), tensorrt_in_tensors_.end(), trt_input_tensors);
|
||||
if (out_format == Format::NCHW) {
|
||||
// when inputs all NCHW, change axis
|
||||
axis = ConvertAxisFromNHWC2NCHW(axis);
|
||||
MS_LOG(INFO) << "concate axis change to " << axis << " when using NCHW format.";
|
||||
}
|
||||
|
||||
nvinfer1::IConcatenationLayer *concate_layer =
|
||||
network->addConcatenation(trt_input_tensors, static_cast<int>(tensorrt_in_tensors_.size()));
|
||||
|
@ -68,8 +103,7 @@ int ConcateTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
}
|
||||
concate_layer->setName(op_name_.c_str());
|
||||
concate_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
|
||||
this->AddInnerOutTensors(concate_layer->getOutput(0));
|
||||
|
||||
this->AddInnerOutTensors(ITensorHelper{concate_layer->getOutput(0), out_format});
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
|
||||
#include "src/delegate/tensorrt/op/convolution_tensorrt.h"
|
||||
#include "src/delegate/tensorrt/op/activation_tensorrt.h"
|
||||
#include "src/delegate/tensorrt/tensorrt_utils.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
constexpr int BIAS_INDEX = 2;
|
||||
|
@ -28,7 +27,7 @@ int ConvolutionTensorRT::IsSupport(const schema::Primitive *primitive,
|
|||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != 2 && in_tensors.size() != 3) {
|
||||
if (in_tensors.size() != INPUT_SIZE2 && in_tensors.size() != INPUT_SIZE3) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
@ -36,6 +35,10 @@ int ConvolutionTensorRT::IsSupport(const schema::Primitive *primitive,
|
|||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors[0].format() != Format::NHWC && in_tensors[0].format() != Format::NCHW) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor format of " << in_tensors[0].format();
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -49,13 +52,19 @@ int ConvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
MS_LOG(ERROR) << "op action convert failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
// transpose: NHWC->NCHW
|
||||
nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(network, *tensorrt_in_tensors_[0]);
|
||||
if (transpose_layer_in == nullptr) {
|
||||
MS_LOG(ERROR) << "transpose: NHWC->NCHW failed";
|
||||
return RET_ERROR;
|
||||
|
||||
nvinfer1::ITensor *conv_input = tensorrt_in_tensors_[0].trt_tensor_;
|
||||
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
|
||||
tensorrt_in_tensors_[0].format_ == Format::NHWC) {
|
||||
// transpose: NHWC->NCHW
|
||||
nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(network, *tensorrt_in_tensors_[0].trt_tensor_);
|
||||
if (transpose_layer_in == nullptr) {
|
||||
MS_LOG(ERROR) << "transpose: NHWC->NCHW failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
|
||||
conv_input = transpose_layer_in->getOutput(0);
|
||||
}
|
||||
transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
|
||||
|
||||
// transpose weight
|
||||
const mindspore::MSTensor &weight_tensor = in_tensors_[1];
|
||||
|
@ -86,7 +95,7 @@ int ConvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
}
|
||||
|
||||
nvinfer1::IConvolutionLayer *conv_layer =
|
||||
network->addConvolutionNd(*transpose_layer_in->getOutput(0), nbOutputMaps, kernelSize, kernelWeights, biasWeights);
|
||||
network->addConvolutionNd(*conv_input, nbOutputMaps, kernelSize, kernelWeights, biasWeights);
|
||||
|
||||
if (conv_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "ConvolutionLayer failed";
|
||||
|
@ -111,15 +120,8 @@ int ConvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
activation_layer->setName((op_name_ + "_activation").c_str());
|
||||
}
|
||||
|
||||
// transpose: NCHW->NHWC
|
||||
nvinfer1::IShuffleLayer *transpose_layer_out = NCHW2NHWC(network, *activation_layer->getOutput(0));
|
||||
if (transpose_layer_out == nullptr) {
|
||||
MS_LOG(ERROR) << "op action convert failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
transpose_layer_out->setName((op_name_ + "_transpose2NHWC").c_str());
|
||||
transpose_layer_out->getOutput(0)->setName(out_tensors_[0].Name().c_str());
|
||||
this->AddInnerOutTensors(transpose_layer_out->getOutput(0));
|
||||
activation_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
|
||||
this->AddInnerOutTensors(ITensorHelper{activation_layer->getOutput(0), Format::NCHW});
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -16,7 +16,6 @@
|
|||
|
||||
#include "src/delegate/tensorrt/op/deconvolution_tensorrt.h"
|
||||
#include "src/delegate/tensorrt/op/activation_tensorrt.h"
|
||||
#include "src/delegate/tensorrt/tensorrt_utils.h"
|
||||
#include "nnacl/pack.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
|
@ -35,6 +34,10 @@ int DeconvolutionTensorRT::IsSupport(const schema::Primitive *primitive,
|
|||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors[0].format() != Format::NHWC && in_tensors[0].format() != Format::NCHW) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor format of " << in_tensors[0].format();
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
int DeconvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
||||
|
@ -47,13 +50,18 @@ int DeconvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
MS_LOG(ERROR) << "op action convert failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
// transpose: NHWC->NCHW
|
||||
nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(network, *tensorrt_in_tensors_[0]);
|
||||
if (transpose_layer_in == nullptr) {
|
||||
MS_LOG(ERROR) << "transpose: NHWC->NCHW failed";
|
||||
return RET_ERROR;
|
||||
nvinfer1::ITensor *deconv_input = tensorrt_in_tensors_[0].trt_tensor_;
|
||||
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
|
||||
tensorrt_in_tensors_[0].format_ == Format::NHWC) {
|
||||
// transpose: NHWC->NCHW
|
||||
nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(network, *tensorrt_in_tensors_[0].trt_tensor_);
|
||||
if (transpose_layer_in == nullptr) {
|
||||
MS_LOG(ERROR) << "transpose: NHWC->NCHW failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
|
||||
deconv_input = transpose_layer_in->getOutput(0);
|
||||
}
|
||||
transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
|
||||
|
||||
// transpose weight
|
||||
const mindspore::MSTensor &weight_tensor = in_tensors_[1];
|
||||
|
@ -83,8 +91,8 @@ int DeconvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
biasWeights.values = nullptr;
|
||||
}
|
||||
|
||||
nvinfer1::IDeconvolutionLayer *deconv_layer = network->addDeconvolutionNd(
|
||||
*transpose_layer_in->getOutput(0), nbOutputMaps, kernelSize, kernelWeights, biasWeights);
|
||||
nvinfer1::IDeconvolutionLayer *deconv_layer =
|
||||
network->addDeconvolutionNd(*deconv_input, nbOutputMaps, kernelSize, kernelWeights, biasWeights);
|
||||
|
||||
if (deconv_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "DeconvolutionLayer failed";
|
||||
|
@ -109,15 +117,8 @@ int DeconvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
activation_layer->setName((op_name_ + "_activation").c_str());
|
||||
}
|
||||
|
||||
// transpose: NCHW->NHWC
|
||||
nvinfer1::IShuffleLayer *transpose_layer_out = NCHW2NHWC(network, *activation_layer->getOutput(0));
|
||||
if (transpose_layer_out == nullptr) {
|
||||
MS_LOG(ERROR) << "op action convert failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
transpose_layer_out->setName((op_name_ + "_transpose2NHWC").c_str());
|
||||
transpose_layer_out->getOutput(0)->setName(out_tensors_[0].Name().c_str());
|
||||
this->AddInnerOutTensors(transpose_layer_out->getOutput(0));
|
||||
activation_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
|
||||
this->AddInnerOutTensors(ITensorHelper{activation_layer->getOutput(0), Format::NCHW});
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -80,26 +80,38 @@ int ElementWiseTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
MS_LOG(ERROR) << "network or input tensor size is invalid";
|
||||
return RET_ERROR;
|
||||
}
|
||||
first_in_tensor_index_ = strcmp(tensorrt_in_tensors_[0]->getName(), in_tensors_[0].Name().c_str()) == 0 ? 0 : 1;
|
||||
// add elementwise
|
||||
first_in_tensor_index_ =
|
||||
strcmp(tensorrt_in_tensors_[0].trt_tensor_->getName(), in_tensors_[0].Name().c_str()) == 0 ? 0 : 1;
|
||||
|
||||
if (this->tensorrt_in_tensors_.size() != INPUT_SIZE2) {
|
||||
// create ITensor from MS constant tensor of index 1 - first_in_tensor_index_
|
||||
nvinfer1::ITensor *constant_input = nullptr;
|
||||
if (this->in_tensors_[1 - first_in_tensor_index_].Shape().size() == 0) {
|
||||
constant_input = lite::ConvertScalarToITensor(network, this->in_tensors_[first_in_tensor_index_].Shape().size(),
|
||||
in_tensors_[1 - first_in_tensor_index_].Data().get());
|
||||
} else {
|
||||
constant_input = lite::ConvertConstantTensor(network, in_tensors_[1 - first_in_tensor_index_]);
|
||||
int ret = AddConstTensor(network);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "AddConstTensor failed for " << op_name_;
|
||||
return ret;
|
||||
}
|
||||
if (constant_input == nullptr) {
|
||||
MS_LOG(ERROR) << "create Itensor from constant tensor failed: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
this->AddInnerInTensors(constant_input);
|
||||
}
|
||||
|
||||
nvinfer1::IElementWiseLayer *cal_layer = network->addElementWise(
|
||||
*tensorrt_in_tensors_[first_in_tensor_index_], *tensorrt_in_tensors_[1 - first_in_tensor_index_], element_wise_op_);
|
||||
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
|
||||
tensorrt_in_tensors_[0].format_ != tensorrt_in_tensors_[1].format_) {
|
||||
// when inputs format are different, change to NHWC
|
||||
int transpose_input_tensor = tensorrt_in_tensors_[0].format_ == Format::NCHW ? 0 : 1;
|
||||
nvinfer1::IShuffleLayer *transpose_layer =
|
||||
NCHW2NHWC(network, *tensorrt_in_tensors_[transpose_input_tensor].trt_tensor_);
|
||||
if (transpose_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "op action convert failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
transpose_layer->setName((op_name_ + "_input_transpose2NHWC").c_str());
|
||||
tensorrt_in_tensors_[transpose_input_tensor].trt_tensor_ = transpose_layer->getOutput(0);
|
||||
tensorrt_in_tensors_[transpose_input_tensor].format_ = Format::NHWC;
|
||||
} else if (tensorrt_in_tensors_[0].format_ != tensorrt_in_tensors_[1].format_) {
|
||||
MS_LOG(ERROR) << "elementwise op inputs are in different format: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
nvinfer1::IElementWiseLayer *cal_layer =
|
||||
network->addElementWise(*tensorrt_in_tensors_[first_in_tensor_index_].trt_tensor_,
|
||||
*tensorrt_in_tensors_[1 - first_in_tensor_index_].trt_tensor_, element_wise_op_);
|
||||
|
||||
if (cal_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addElementWise failed for TensorRT.";
|
||||
|
@ -129,9 +141,8 @@ int ElementWiseTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
MS_LOG(WARNING) << "deal with scale and shift for pow op";
|
||||
}
|
||||
}
|
||||
|
||||
op_out_tensor->setName(out_tensors_[0].Name().c_str());
|
||||
this->AddInnerOutTensors(op_out_tensor);
|
||||
this->AddInnerOutTensors(ITensorHelper{op_out_tensor, tensorrt_in_tensors_[1].format_});
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -184,4 +195,26 @@ nvinfer1::ITensor *ElementWiseTensorRT::AddActivation(nvinfer1::INetworkDefiniti
|
|||
}
|
||||
return activation_out_tensor;
|
||||
}
|
||||
int ElementWiseTensorRT::AddConstTensor(nvinfer1::INetworkDefinition *network) {
|
||||
// create ITensor from MS constant tensor of index 1 - first_in_tensor_index_
|
||||
nvinfer1::ITensor *constant_input = nullptr;
|
||||
if (this->in_tensors_[1 - first_in_tensor_index_].Shape().size() == 0) {
|
||||
constant_input = lite::ConvertScalarToITensor(network, this->in_tensors_[first_in_tensor_index_].Shape().size(),
|
||||
in_tensors_[1 - first_in_tensor_index_].Data().get());
|
||||
if (constant_input == nullptr) {
|
||||
MS_LOG(ERROR) << "create Itensor from constant tensor failed: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
this->AddInnerInTensors(ITensorHelper{constant_input, tensorrt_in_tensors_[0].format_});
|
||||
} else {
|
||||
constant_input = lite::ConvertConstantTensor(network, in_tensors_[1 - first_in_tensor_index_]);
|
||||
if (constant_input == nullptr) {
|
||||
MS_LOG(ERROR) << "create Itensor from constant tensor failed: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
this->AddInnerInTensors(ITensorHelper{constant_input, Format::NHWC});
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -37,6 +37,8 @@ class ElementWiseTensorRT : public TensorRTOp {
|
|||
private:
|
||||
nvinfer1::ITensor *AddActivation(nvinfer1::INetworkDefinition *network, nvinfer1::ITensor *in_tensor);
|
||||
|
||||
int AddConstTensor(nvinfer1::INetworkDefinition *network);
|
||||
|
||||
nvinfer1::ElementWiseOperation element_wise_op_;
|
||||
|
||||
// index of first input MSTensor in the trt input tensor vector
|
||||
|
|
|
@ -59,15 +59,30 @@ int GatherTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
MS_LOG(ERROR) << "add a new tensor failed for TensorRT GatherTensorRTOp.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
nvinfer1::IGatherLayer *gather_layer =
|
||||
network->addGather(*tensorrt_in_tensors_[0], *add_tensor /* indices */, axis_ /* axis */);
|
||||
|
||||
nvinfer1::ITensor *gather_input = tensorrt_in_tensors_[0].trt_tensor_;
|
||||
Format out_format = tensorrt_in_tensors_[0].format_;
|
||||
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
|
||||
tensorrt_in_tensors_[0].format_ == Format::NCHW) {
|
||||
// transpose: NCHW->NHWC
|
||||
nvinfer1::IShuffleLayer *transpose_layer_in = NCHW2NHWC(network, *tensorrt_in_tensors_[0].trt_tensor_);
|
||||
if (transpose_layer_in == nullptr) {
|
||||
MS_LOG(ERROR) << "op action convert failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
transpose_layer_in->setName((op_name_ + "_transpose2NHWC").c_str());
|
||||
gather_input = transpose_layer_in->getOutput(0);
|
||||
out_format = Format::NHWC;
|
||||
}
|
||||
|
||||
nvinfer1::IGatherLayer *gather_layer = network->addGather(*gather_input, *add_tensor /* indices */, axis_ /* axis */);
|
||||
if (gather_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addGather failed for TensorRT.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
gather_layer->setName(op_name_.c_str());
|
||||
gather_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
|
||||
this->AddInnerOutTensors(gather_layer->getOutput(0));
|
||||
this->AddInnerOutTensors(ITensorHelper{gather_layer->getOutput(0), out_format});
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -43,7 +43,22 @@ int MatMulTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
transpose_b_ = primitive->transpose_b() ? nvinfer1::MatrixOperation::kTRANSPOSE : nvinfer1::MatrixOperation::kNONE;
|
||||
auto weight = ConvertTensorWithExpandDims(network, in_tensors_[1], in_tensors_[0].Shape().size());
|
||||
|
||||
auto matmul_layer = network->addMatrixMultiply(*tensorrt_in_tensors_[0], transpose_a_, *weight, transpose_b_);
|
||||
nvinfer1::ITensor *matmul_input = tensorrt_in_tensors_[0].trt_tensor_;
|
||||
Format out_format = tensorrt_in_tensors_[0].format_;
|
||||
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
|
||||
tensorrt_in_tensors_[0].format_ == Format::NCHW) {
|
||||
// transpose: NCHW->NHWC
|
||||
nvinfer1::IShuffleLayer *transpose_layer_in = NCHW2NHWC(network, *tensorrt_in_tensors_[0].trt_tensor_);
|
||||
if (transpose_layer_in == nullptr) {
|
||||
MS_LOG(ERROR) << "op action convert failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
transpose_layer_in->setName((op_name_ + "_transpose2NHWC").c_str());
|
||||
matmul_input = transpose_layer_in->getOutput(0);
|
||||
out_format = Format::NHWC;
|
||||
}
|
||||
|
||||
auto matmul_layer = network->addMatrixMultiply(*matmul_input, transpose_a_, *weight, transpose_b_);
|
||||
matmul_layer->setName(op_name_.c_str());
|
||||
nvinfer1::ITensor *out_tensor = matmul_layer->getOutput(0);
|
||||
|
||||
|
@ -56,7 +71,7 @@ int MatMulTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
}
|
||||
|
||||
out_tensor->setName(out_tensors_[0].Name().c_str());
|
||||
this->AddInnerOutTensors(out_tensor);
|
||||
this->AddInnerOutTensors(ITensorHelper{out_tensor, out_format});
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -49,6 +49,10 @@ int PadTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
|
|||
MS_LOG(ERROR) << "Unsupported padding mode: " << pad_primitive << ", for op: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors[0].format() != Format::NHWC && in_tensors[0].format() != Format::NCHW) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor format of " << in_tensors[0].format();
|
||||
return RET_ERROR;
|
||||
}
|
||||
constant_value_ = pad_primitive->constant_value();
|
||||
return RET_OK;
|
||||
}
|
||||
|
@ -56,18 +60,24 @@ int PadTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
|
|||
int PadTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
||||
mindspore::MSTensor &pad_tensor = in_tensors_[1];
|
||||
int element_cnt = std::accumulate(pad_tensor.Shape().begin(), pad_tensor.Shape().end(), 1, std::multiplies<int>());
|
||||
if (element_cnt != tensorrt_in_tensors_[0]->getDimensions().nbDims * 2) {
|
||||
if (element_cnt != tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims * 2) {
|
||||
MS_LOG(ERROR) << "pad tensor cnt is invalid. cnt: " << element_cnt
|
||||
<< ", input tensor dims cnt: " << tensorrt_in_tensors_[0]->getDimensions().nbDims;
|
||||
<< ", input tensor dims cnt: " << tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims;
|
||||
return RET_ERROR;
|
||||
}
|
||||
// transpose: NHWC->NCHW
|
||||
nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(network, *tensorrt_in_tensors_[0]);
|
||||
if (transpose_layer_in == nullptr) {
|
||||
MS_LOG(ERROR) << "transpose: NHWC->NCHW failed";
|
||||
return RET_ERROR;
|
||||
|
||||
nvinfer1::ITensor *pad_input = tensorrt_in_tensors_[0].trt_tensor_;
|
||||
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
|
||||
tensorrt_in_tensors_[0].format_ == Format::NHWC) {
|
||||
// transpose: NHWC->NCHW
|
||||
nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(network, *tensorrt_in_tensors_[0].trt_tensor_);
|
||||
if (transpose_layer_in == nullptr) {
|
||||
MS_LOG(ERROR) << "transpose: NHWC->NCHW failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
|
||||
pad_input = transpose_layer_in->getOutput(0);
|
||||
}
|
||||
transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
|
||||
|
||||
// trt 6 only support 2D padding
|
||||
const int *padding_data = reinterpret_cast<const int *>(in_tensors_[1].Data().get());
|
||||
|
@ -84,7 +94,7 @@ int PadTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
MS_LOG(INFO) << "prePadding: " << *(padding_data + 2) << ", " << *(padding_data + 4);
|
||||
MS_LOG(INFO) << "postPadding: " << *(padding_data + 3) << ", " << *(padding_data + 5);
|
||||
|
||||
padding_layer = network->addPadding(*transpose_layer_in->getOutput(0), prePadding, postPadding);
|
||||
padding_layer = network->addPadding(*pad_input, prePadding, postPadding);
|
||||
} else {
|
||||
MS_LOG(ERROR) << "need check for pad_tensor dims: " << op_name_
|
||||
<< ", pad_tensor ElementNum: " << pad_tensor.ElementNum();
|
||||
|
@ -95,17 +105,8 @@ int PadTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
return RET_ERROR;
|
||||
}
|
||||
padding_layer->setName(op_name_.c_str());
|
||||
|
||||
// transpose: NCHW->NHWC
|
||||
nvinfer1::IShuffleLayer *transpose_layer_out = NCHW2NHWC(network, *padding_layer->getOutput(0));
|
||||
if (transpose_layer_out == nullptr) {
|
||||
MS_LOG(ERROR) << "op action convert failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
transpose_layer_out->setName((op_name_ + "_transpose2NHWC").c_str());
|
||||
transpose_layer_out->getOutput(0)->setName(out_tensors_[0].Name().c_str());
|
||||
|
||||
this->AddInnerOutTensors(transpose_layer_out->getOutput(0));
|
||||
padding_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
|
||||
this->AddInnerOutTensors(ITensorHelper{padding_layer->getOutput(0), Format::NCHW});
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -34,6 +34,10 @@ int PoolTensorRT::IsSupport(const mindspore::schema::Primitive *primitive,
|
|||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors[0].format() != Format::NHWC && in_tensors[0].format() != Format::NCHW) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor format of " << in_tensors[0].format();
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -47,13 +51,18 @@ int PoolTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
MS_LOG(ERROR) << "invalid input tensor size: " << tensorrt_in_tensors_.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
// transpose: NHWC->NCHW
|
||||
nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(network, *tensorrt_in_tensors_[0]);
|
||||
if (transpose_layer_in == nullptr) {
|
||||
MS_LOG(ERROR) << "transpose: NHWC->NCHW failed";
|
||||
return RET_ERROR;
|
||||
nvinfer1::ITensor *pool_input = tensorrt_in_tensors_[0].trt_tensor_;
|
||||
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
|
||||
tensorrt_in_tensors_[0].format_ == Format::NHWC) {
|
||||
// transpose: NHWC->NCHW
|
||||
nvinfer1::IShuffleLayer *transpose_layer_in = NHWC2NCHW(network, *tensorrt_in_tensors_[0].trt_tensor_);
|
||||
if (transpose_layer_in == nullptr) {
|
||||
MS_LOG(ERROR) << "transpose: NHWC->NCHW failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
|
||||
pool_input = transpose_layer_in->getOutput(0);
|
||||
}
|
||||
transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
|
||||
|
||||
// pooling layer
|
||||
nvinfer1::PoolingType pooling_type = nvinfer1::PoolingType::kAVERAGE;
|
||||
|
@ -64,8 +73,7 @@ int PoolTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
}
|
||||
std::vector<int64_t> kernel_size_val = std::vector<int64_t>(kernel_size->begin(), kernel_size->end());
|
||||
nvinfer1::Dims windowSize = lite::ConvertCudaDims(kernel_size_val);
|
||||
nvinfer1::IPoolingLayer *pooling_layer =
|
||||
network->addPoolingNd(*transpose_layer_in->getOutput(0), pooling_type, windowSize);
|
||||
nvinfer1::IPoolingLayer *pooling_layer = network->addPoolingNd(*pool_input, pooling_type, windowSize);
|
||||
if (pooling_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addPoolingNd failed for TensorRT.";
|
||||
return RET_ERROR;
|
||||
|
@ -86,15 +94,8 @@ int PoolTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
}
|
||||
activation_layer->setName((op_name_ + "_activation").c_str());
|
||||
}
|
||||
// transpose: NCHW->NHWC
|
||||
nvinfer1::IShuffleLayer *transpose_layer_out = NCHW2NHWC(network, *activation_layer->getOutput(0));
|
||||
if (transpose_layer_out == nullptr) {
|
||||
MS_LOG(ERROR) << "op action convert failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
transpose_layer_out->setName((op_name_ + "_transpose2NHWC").c_str());
|
||||
transpose_layer_out->getOutput(0)->setName(out_tensors_[0].Name().c_str());
|
||||
this->AddInnerOutTensors(transpose_layer_out->getOutput(0));
|
||||
activation_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
|
||||
this->AddInnerOutTensors(ITensorHelper{activation_layer->getOutput(0), Format::NCHW});
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -28,7 +28,7 @@ int ReduceTensorRT::IsSupport(const schema::Primitive *primitive, const std::vec
|
|||
MS_LOG(ERROR) << "convert failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != 2) {
|
||||
if (in_tensors.size() != INPUT_SIZE2) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
}
|
||||
if (out_tensors.size() != 1) {
|
||||
|
@ -55,23 +55,17 @@ int ReduceTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
return RET_ERROR;
|
||||
}
|
||||
bool keep_dims = reduce_op->keep_dims();
|
||||
// axis
|
||||
uint32_t reduceAxes = 0;
|
||||
mindspore::MSTensor axis_tensor = this->in_tensors_[1];
|
||||
if (axis_tensor.Data() == nullptr) {
|
||||
MS_LOG(ERROR) << "invalid axis_tensor";
|
||||
return RET_ERROR;
|
||||
nvinfer1::ITensor *reduce_input = tensorrt_in_tensors_[0].trt_tensor_;
|
||||
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
|
||||
tensorrt_in_tensors_[0].format_ == Format::NCHW) {
|
||||
out_format_ = Format::NHWC;
|
||||
} else {
|
||||
out_format_ = tensorrt_in_tensors_[0].format_;
|
||||
}
|
||||
if (axis_tensor.DataType() != DataType::kNumberTypeInt32) {
|
||||
MS_LOG(WARNING) << "not int data type";
|
||||
}
|
||||
int *axis_data = reinterpret_cast<int *>(axis_tensor.MutableData());
|
||||
for (int i = 0; i < axis_tensor.ElementNum(); i++) {
|
||||
reduceAxes |= (16 - (1u << *axis_data));
|
||||
axis_data++;
|
||||
}
|
||||
MS_LOG(INFO) << "reduceAxes: " << reduceAxes;
|
||||
nvinfer1::IReduceLayer *layer = network->addReduce(*tensorrt_in_tensors_[0], reduce_op_, reduceAxes, keep_dims);
|
||||
|
||||
uint32_t reduceAxis = GetAxis();
|
||||
|
||||
nvinfer1::IReduceLayer *layer = network->addReduce(*reduce_input, reduce_op_, reduceAxis, keep_dims);
|
||||
if (layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addReduce failed for TensorRT.";
|
||||
return RET_ERROR;
|
||||
|
@ -84,7 +78,29 @@ int ReduceTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
return RET_ERROR;
|
||||
}
|
||||
out_tensor->setName(out_tensors_[0].Name().c_str());
|
||||
this->AddInnerOutTensors(out_tensor);
|
||||
this->AddInnerOutTensors(ITensorHelper{out_tensor, out_format_});
|
||||
return RET_OK;
|
||||
}
|
||||
uint32_t ReduceTensorRT::GetAxis() {
|
||||
// axis
|
||||
uint32_t reduceAxis = 0;
|
||||
mindspore::MSTensor axis_tensor = this->in_tensors_[1];
|
||||
if (axis_tensor.Data() == nullptr) {
|
||||
MS_LOG(ERROR) << "invalid axis_tensor";
|
||||
return reduceAxis;
|
||||
}
|
||||
if (axis_tensor.DataType() != DataType::kNumberTypeInt32) {
|
||||
MS_LOG(WARNING) << "not int data type";
|
||||
}
|
||||
int *axis_data = reinterpret_cast<int *>(axis_tensor.MutableData());
|
||||
bool need_transpose_axis =
|
||||
(out_format_ == Format::NCHW) && (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D);
|
||||
for (int i = 0; i < axis_tensor.ElementNum(); i++) {
|
||||
int format_axis_data = need_transpose_axis ? ConvertAxisFromNHWC2NCHW(*axis_data) : *axis_data;
|
||||
reduceAxis |= (16 - (1u << format_axis_data));
|
||||
axis_data++;
|
||||
}
|
||||
MS_LOG(INFO) << "reduceAxis: " << reduceAxis;
|
||||
return reduceAxis;
|
||||
}
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -36,6 +36,7 @@ class ReduceTensorRT : public TensorRTOp {
|
|||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
|
||||
private:
|
||||
uint32_t GetAxis();
|
||||
std::map<schema::ReduceMode, nvinfer1::ReduceOperation> reduce_ops_ = {
|
||||
{schema::ReduceMode::ReduceMode_ReduceMean, nvinfer1::ReduceOperation::kAVG},
|
||||
{schema::ReduceMode::ReduceMode_ReduceMax, nvinfer1::ReduceOperation::kMAX},
|
||||
|
@ -44,6 +45,7 @@ class ReduceTensorRT : public TensorRTOp {
|
|||
{schema::ReduceMode::ReduceMode_ReduceSum, nvinfer1::ReduceOperation::kSUM},
|
||||
};
|
||||
nvinfer1::ReduceOperation reduce_op_;
|
||||
Format out_format_;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_REDUCE_TENSORRT_H_
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
#include <numeric>
|
||||
#include <functional>
|
||||
#include "src/delegate/tensorrt/op/scale_tensorrt.h"
|
||||
#include "src/delegate/tensorrt/op/activation_tensorrt.h"
|
||||
#include "src/delegate/tensorrt/tensorrt_utils.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
|
@ -53,14 +54,26 @@ int ScaleTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
}
|
||||
|
||||
schema::ActivationType activation_type = scale_op->activation_type();
|
||||
nvinfer1::ITensor *scale_in_tensor = tensorrt_in_tensors_[0];
|
||||
// unsqueeze input Itensor to 4 dims
|
||||
nvinfer1::ITensor *scale_in_tensor = tensorrt_in_tensors_[0].trt_tensor_;
|
||||
Format out_format = in_tensors_[0].format();
|
||||
if (in_tensors_[0].Shape().size() < INPUT_SIZE4) {
|
||||
// unsqueeze input Itensor to 4 dims
|
||||
scale_in_tensor = AddUnsqueezeOp(network);
|
||||
if (scale_in_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "AddUnsqueezeOp failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
} else if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == 4 &&
|
||||
tensorrt_in_tensors_[0].format_ == Format::NCHW) {
|
||||
// transpose: NCHW->NHWC
|
||||
nvinfer1::IShuffleLayer *transpose_layer_in = NCHW2NHWC(network, *tensorrt_in_tensors_[0].trt_tensor_);
|
||||
if (transpose_layer_in == nullptr) {
|
||||
MS_LOG(ERROR) << "op action convert failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
transpose_layer_in->setName((op_name_ + "_transpose2NHWC").c_str());
|
||||
scale_in_tensor = transpose_layer_in->getOutput(0);
|
||||
out_format = Format::NHWC;
|
||||
}
|
||||
// mode of scale
|
||||
size_t axis = scale_op->axis();
|
||||
|
@ -100,18 +113,27 @@ int ScaleTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
return RET_ERROR;
|
||||
}
|
||||
cal_layer->setName(op_name_.c_str());
|
||||
nvinfer1::ITensor *op_out_tensor = cal_layer->getOutput(0);
|
||||
if (op_out_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "addScaleNd output tensor is invalid for: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
// add activation
|
||||
nvinfer1::ITensor *activation_tensor = cal_layer->getOutput(0);
|
||||
if (activation_type != schema::ActivationType::ActivationType_NO_ACTIVATION) {
|
||||
MS_LOG(WARNING) << "need activation for: " << op_name_;
|
||||
auto activation_layer = ActivationTensorRT::AddActivation(network, activation_type, 0, cal_layer->getOutput(0));
|
||||
if (activation_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addActivation for scale failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
activation_layer->setName((op_name_ + "_activation").c_str());
|
||||
activation_tensor = activation_layer->getOutput(0);
|
||||
}
|
||||
|
||||
// squeeze to origin dim
|
||||
nvinfer1::ITensor *op_out_tensor = activation_tensor;
|
||||
if (activation_tensor->getDimensions().nbDims > static_cast<int>(out_tensors_[0].Shape().size())) {
|
||||
op_out_tensor = AddSqueezeOp(activation_tensor, network);
|
||||
}
|
||||
|
||||
op_out_tensor->setName(out_tensors_[0].Name().c_str());
|
||||
this->AddInnerOutTensors(op_out_tensor);
|
||||
this->AddInnerOutTensors(ITensorHelper{op_out_tensor, out_format});
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -136,7 +158,7 @@ nvinfer1::ScaleMode ScaleTensorRT::GetScaleMode(size_t axis) {
|
|||
}
|
||||
|
||||
nvinfer1::ITensor *ScaleTensorRT::AddUnsqueezeOp(nvinfer1::INetworkDefinition *network) {
|
||||
nvinfer1::IShuffleLayer *unsqueeze_layer = network->addShuffle(*this->tensorrt_in_tensors_[0]);
|
||||
nvinfer1::IShuffleLayer *unsqueeze_layer = network->addShuffle(*this->tensorrt_in_tensors_[0].trt_tensor_);
|
||||
if (unsqueeze_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addShuffle failed for: " << op_name_;
|
||||
return nullptr;
|
||||
|
@ -150,4 +172,17 @@ nvinfer1::ITensor *ScaleTensorRT::AddUnsqueezeOp(nvinfer1::INetworkDefinition *n
|
|||
unsqueeze_layer->setReshapeDimensions(unsqueeze_dims);
|
||||
return unsqueeze_layer->getOutput(0);
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *ScaleTensorRT::AddSqueezeOp(nvinfer1::ITensor *in_tensor, nvinfer1::INetworkDefinition *network) {
|
||||
nvinfer1::IShuffleLayer *squeeze_layer = network->addShuffle(*in_tensor);
|
||||
if (squeeze_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addShuffle failed for: " << op_name_;
|
||||
return nullptr;
|
||||
}
|
||||
squeeze_layer->setName((op_name_ + "_squeeze").c_str());
|
||||
nvinfer1::Dims squeeze_dims = lite::ConvertCudaDims(out_tensors_[0].Shape());
|
||||
MS_LOG(INFO) << "squeeze_dims cnt for scale: " << squeeze_dims.nbDims;
|
||||
squeeze_layer->setReshapeDimensions(squeeze_dims);
|
||||
return squeeze_layer->getOutput(0);
|
||||
}
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -38,6 +38,8 @@ class ScaleTensorRT : public TensorRTOp {
|
|||
private:
|
||||
nvinfer1::ITensor *AddUnsqueezeOp(nvinfer1::INetworkDefinition *network);
|
||||
|
||||
nvinfer1::ITensor *AddSqueezeOp(nvinfer1::ITensor *in_tensor, nvinfer1::INetworkDefinition *network);
|
||||
|
||||
nvinfer1::ScaleMode GetScaleMode(size_t axis);
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -38,7 +38,7 @@ int ShapeTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
MS_LOG(ERROR) << "network is invalid";
|
||||
return RET_ERROR;
|
||||
}
|
||||
nvinfer1::IShapeLayer *shape_layer = network->addShape(*tensorrt_in_tensors_[0]);
|
||||
nvinfer1::IShapeLayer *shape_layer = network->addShape(*tensorrt_in_tensors_[0].trt_tensor_);
|
||||
|
||||
if (shape_layer == nullptr) {
|
||||
MS_LOG(DEBUG) << "add shape op failed for TensorRT.";
|
||||
|
@ -46,7 +46,7 @@ int ShapeTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
}
|
||||
shape_layer->setName(op_name_.c_str());
|
||||
shape_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
|
||||
this->AddInnerOutTensors(shape_layer->getOutput(0));
|
||||
this->AddInnerOutTensors(ITensorHelper{shape_layer->getOutput(0), tensorrt_in_tensors_[0].format_});
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -72,58 +72,56 @@ int ShuffleTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
MS_LOG(ERROR) << "network is invalid";
|
||||
return RET_ERROR;
|
||||
}
|
||||
nvinfer1::IShuffleLayer *shuffle_layer = network->addShuffle(*tensorrt_in_tensors_[0]);
|
||||
|
||||
nvinfer1::ITensor *shuffler_input = tensorrt_in_tensors_[0].trt_tensor_;
|
||||
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == 4 &&
|
||||
tensorrt_in_tensors_[0].format_ == Format::NCHW && !tensorrt_in_tensors_[0].trt_tensor_->isNetworkInput()) {
|
||||
// network input tensor format can be NCHW
|
||||
nvinfer1::IShuffleLayer *transpose_layer = NCHW2NHWC(network, *tensorrt_in_tensors_[0].trt_tensor_);
|
||||
if (transpose_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "create transpose layer failed for " << op_name_;
|
||||
}
|
||||
transpose_layer->setName((op_name_ + "_transpose_in").c_str());
|
||||
shuffler_input = transpose_layer->getOutput(0);
|
||||
}
|
||||
|
||||
nvinfer1::IShuffleLayer *shuffle_layer = network->addShuffle(*shuffler_input);
|
||||
if (shuffle_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add Shuffle op failed for TensorRT.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
shuffle_layer->setName(op_name_.c_str());
|
||||
|
||||
int ret = RET_OK;
|
||||
switch (type_) {
|
||||
case schema::PrimitiveType_Unsqueeze: {
|
||||
int ret = AddUnsqueezeOp(shuffle_layer);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "AddUnSqueezeOp failed.";
|
||||
return ret;
|
||||
}
|
||||
ret = AddUnsqueezeOp(shuffle_layer);
|
||||
break;
|
||||
}
|
||||
case schema::PrimitiveType_Squeeze: {
|
||||
int ret = AddSqueezeOp(shuffle_layer);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "AddSqueezeOp failed.";
|
||||
return ret;
|
||||
}
|
||||
ret = AddSqueezeOp(shuffle_layer);
|
||||
break;
|
||||
}
|
||||
case schema::PrimitiveType_Transpose: {
|
||||
int ret = AddTransposeOp(shuffle_layer);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "AddTransposeOpss failed.";
|
||||
return ret;
|
||||
}
|
||||
ret = AddTransposeOp(shuffle_layer);
|
||||
break;
|
||||
}
|
||||
case schema::PrimitiveType_Reshape: {
|
||||
int ret = AddReshapeOp(shuffle_layer);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "AddReshapeOp failed.";
|
||||
return ret;
|
||||
}
|
||||
ret = AddReshapeOp(shuffle_layer);
|
||||
break;
|
||||
}
|
||||
case schema::PrimitiveType_Flatten: {
|
||||
int ret = AddFlattenOp(shuffle_layer);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "AddFlattenOp failed.";
|
||||
return ret;
|
||||
}
|
||||
ret = AddFlattenOp(shuffle_layer);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
MS_LOG(ERROR) << "Unsupported op type.";
|
||||
MS_LOG(ERROR) << "Unsupported op type for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "AddOp failed for " << op_name_;
|
||||
return ret;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *out_tensor = shuffle_layer->getOutput(0);
|
||||
if (out_tensor == nullptr) {
|
||||
|
@ -131,7 +129,7 @@ int ShuffleTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
return RET_ERROR;
|
||||
}
|
||||
out_tensor->setName(out_tensors_[0].Name().c_str());
|
||||
this->AddInnerOutTensors(out_tensor);
|
||||
this->AddInnerOutTensors(ITensorHelper{out_tensor, Format::NHWC});
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -177,7 +175,7 @@ int ShuffleTensorRT::AddUnsqueezeOp(nvinfer1::IShuffleLayer *shuffle_layer) {
|
|||
MS_LOG(WARNING) << "AddUnsqueezeOp size of in tensort needs check: " << in_tensors_.size();
|
||||
}
|
||||
// axis
|
||||
auto unsqueeze_shape = tensorrt_in_tensors_[0]->getDimensions();
|
||||
auto unsqueeze_shape = tensorrt_in_tensors_[0].trt_tensor_->getDimensions();
|
||||
std::vector<int64_t> new_shape(unsqueeze_shape.d, unsqueeze_shape.d + unsqueeze_shape.nbDims);
|
||||
auto axis = unsqueeze_op->axis();
|
||||
|
||||
|
@ -229,7 +227,7 @@ int ShuffleTensorRT::AddReshapeOp(nvinfer1::IShuffleLayer *shuffle_layer) {
|
|||
MS_LOG(ERROR) << "invalid shape tensor for reshape " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
shuffle_layer->setInput(1, *tensorrt_in_tensors_[1]);
|
||||
shuffle_layer->setInput(1, *tensorrt_in_tensors_[1].trt_tensor_);
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -53,7 +53,22 @@ int SliceTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
nvinfer1::Dims size_dims = lite::ConvertCudaDims(out_tensors_[0].Shape());
|
||||
nvinfer1::Dims stride_dims = lite::ConvertCudaDims(stride.Data().get(), stride.ElementNum());
|
||||
|
||||
nvinfer1::ISliceLayer *slice_layer = network->addSlice(*tensorrt_in_tensors_[0], start_dims, size_dims, stride_dims);
|
||||
nvinfer1::ITensor *slice_input = tensorrt_in_tensors_[0].trt_tensor_;
|
||||
Format out_format = tensorrt_in_tensors_[0].format_;
|
||||
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == 4 &&
|
||||
tensorrt_in_tensors_[0].format_ == Format::NCHW) {
|
||||
// transpose: NCHW->NHWC
|
||||
nvinfer1::IShuffleLayer *transpose_layer_in = NCHW2NHWC(network, *tensorrt_in_tensors_[0].trt_tensor_);
|
||||
if (transpose_layer_in == nullptr) {
|
||||
MS_LOG(ERROR) << "op action convert failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
transpose_layer_in->setName((op_name_ + "_transpose2NHWC").c_str());
|
||||
slice_input = transpose_layer_in->getOutput(0);
|
||||
out_format = Format::NHWC;
|
||||
}
|
||||
|
||||
nvinfer1::ISliceLayer *slice_layer = network->addSlice(*slice_input, start_dims, size_dims, stride_dims);
|
||||
if (slice_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add Slice op failed for TensorRT: " << op_name_;
|
||||
return RET_ERROR;
|
||||
|
@ -65,7 +80,7 @@ int SliceTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
return RET_ERROR;
|
||||
}
|
||||
out_tensor->setName(out_tensors_[0].Name().c_str());
|
||||
this->AddInnerOutTensors(out_tensor);
|
||||
this->AddInnerOutTensors(ITensorHelper{out_tensor, out_format});
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -23,19 +23,10 @@ int SoftMaxTensorRT::IsSupport(const schema::Primitive *primitive, const std::ve
|
|||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (primitive->value_type() == schema::PrimitiveType::PrimitiveType_LogSoftmax) {
|
||||
with_log_ = true;
|
||||
auto softmax_op = primitive->value_as_LogSoftmax();
|
||||
if (softmax_op == nullptr) {
|
||||
MS_LOG(ERROR) << "LogSoftmax convert failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
} else {
|
||||
auto softmax_op = primitive->value_as_Softmax();
|
||||
if (softmax_op == nullptr) {
|
||||
MS_LOG(ERROR) << "convert failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
softmax_op_ = primitive->value_as_Softmax();
|
||||
if (softmax_op_ == nullptr) {
|
||||
MS_LOG(ERROR) << "convert failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
if (in_tensors.size() != 1) {
|
||||
|
@ -48,7 +39,6 @@ int SoftMaxTensorRT::IsSupport(const schema::Primitive *primitive, const std::ve
|
|||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int SoftMaxTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
||||
if (network == nullptr) {
|
||||
MS_LOG(ERROR) << "network is invalid";
|
||||
|
@ -66,58 +56,36 @@ int SoftMaxTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
MS_LOG(ERROR) << "softmax output tensor create failed for TensorRT.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (with_log_) {
|
||||
nvinfer1::IUnaryLayer *log_layer = network->addUnary(*out_tensor, nvinfer1::UnaryOperation::kLOG);
|
||||
if (log_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add log op failed for TensorRT.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
log_layer->setName((op_name_ + "_log").c_str());
|
||||
out_tensor = log_layer->getOutput(0);
|
||||
if (out_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "softmax log output tensor create failed for TensorRT.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
}
|
||||
out_tensor->setName(out_tensors_[0].Name().c_str());
|
||||
this->AddInnerOutTensors(out_tensor);
|
||||
this->AddInnerOutTensors(ITensorHelper{out_tensor, tensorrt_in_tensors_[0].format_});
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
nvinfer1::ISoftMaxLayer *SoftMaxTensorRT::AddSoftMaxOp(nvinfer1::INetworkDefinition *network) {
|
||||
nvinfer1::ISoftMaxLayer *current_layer_ = network->addSoftMax(*this->GetInnerInTensors()[0]);
|
||||
nvinfer1::ISoftMaxLayer *current_layer_ = network->addSoftMax(*tensorrt_in_tensors_[0].trt_tensor_);
|
||||
if (current_layer_ == nullptr) {
|
||||
MS_LOG(ERROR) << "add softmax op failed for TensorRT.";
|
||||
return nullptr;
|
||||
}
|
||||
std::vector<int64_t> axis_val;
|
||||
if (with_log_) {
|
||||
auto softmax_op = this->GetPrimitive()->value_as_LogSoftmax();
|
||||
if (softmax_op == nullptr) {
|
||||
MS_LOG(ERROR) << "LogSoftmax convert failed";
|
||||
return nullptr;
|
||||
}
|
||||
int64_t axis = softmax_op->axis();
|
||||
axis_val.push_back(axis);
|
||||
} else {
|
||||
auto softmax_op = this->GetPrimitive()->value_as_Softmax();
|
||||
if (softmax_op == nullptr) {
|
||||
MS_LOG(ERROR) << "Softmax convert failed";
|
||||
return nullptr;
|
||||
}
|
||||
auto axis = softmax_op->axis();
|
||||
axis_val = std::vector<int64_t>(axis->begin(), axis->end());
|
||||
}
|
||||
auto axis = softmax_op_->axis();
|
||||
axis_val = std::vector<int64_t>(axis->begin(), axis->end());
|
||||
|
||||
if (axis_val.size() != 1) {
|
||||
MS_LOG(WARNING) << "axis needs check";
|
||||
}
|
||||
|
||||
if (axis_val[0] >= this->tensorrt_in_tensors_[0]->getDimensions().nbDims) {
|
||||
if (axis_val[0] >= this->tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims) {
|
||||
MS_LOG(ERROR) << "axis is larger than input tensor dims.";
|
||||
return nullptr;
|
||||
}
|
||||
current_layer_->setAxes(axis_val[0]);
|
||||
int64_t axis_format_value = axis_val[0];
|
||||
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == 4 &&
|
||||
tensorrt_in_tensors_[0].format_ == Format::NCHW) {
|
||||
// transpose axis to NCHW
|
||||
axis_format_value = ConvertAxisFromNHWC2NCHW(axis_val[0]);
|
||||
}
|
||||
current_layer_->setAxes(axis_format_value);
|
||||
return current_layer_;
|
||||
}
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -34,8 +34,9 @@ class SoftMaxTensorRT : public TensorRTOp {
|
|||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
|
||||
private:
|
||||
bool with_log_ = false;
|
||||
nvinfer1::ISoftMaxLayer *AddSoftMaxOp(nvinfer1::INetworkDefinition *network);
|
||||
|
||||
const schema::Softmax *softmax_op_;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_SOFTMAX_TENSORRT_H_
|
||||
|
|
|
@ -19,13 +19,13 @@
|
|||
namespace mindspore::lite {
|
||||
const schema::Primitive *TensorRTOp::GetPrimitive() { return this->op_primitive_; }
|
||||
|
||||
void TensorRTOp::AddInnerInTensors(nvinfer1::ITensor *tensor) { this->tensorrt_in_tensors_.push_back(tensor); }
|
||||
void TensorRTOp::AddInnerInTensors(ITensorHelper tensor) { this->tensorrt_in_tensors_.push_back(tensor); }
|
||||
|
||||
void TensorRTOp::AddInnerOutTensors(nvinfer1::ITensor *tensor) { this->tensorrt_out_tensors_.push_back(tensor); }
|
||||
void TensorRTOp::AddInnerOutTensors(ITensorHelper tensor) { this->tensorrt_out_tensors_.push_back(tensor); }
|
||||
|
||||
std::vector<nvinfer1::ITensor *> &TensorRTOp::GetInnerOutTensor() { return this->tensorrt_out_tensors_; }
|
||||
std::vector<ITensorHelper> &TensorRTOp::GetInnerOutTensor() { return this->tensorrt_out_tensors_; }
|
||||
|
||||
std::vector<nvinfer1::ITensor *> &TensorRTOp::GetInnerInTensors() { return this->tensorrt_in_tensors_; }
|
||||
std::vector<ITensorHelper> &TensorRTOp::GetInnerInTensors() { return this->tensorrt_in_tensors_; }
|
||||
|
||||
std::string TensorRTOp::GetOpName() { return this->op_name_; }
|
||||
|
||||
|
|
|
@ -23,12 +23,18 @@
|
|||
#include "include/api/kernel.h"
|
||||
#include "src/common/log_adapter.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "src/delegate/tensorrt/tensorrt_utils.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
constexpr int INPUT_SIZE2 = 2;
|
||||
constexpr int INPUT_SIZE3 = 3;
|
||||
constexpr int INPUT_SIZE4 = 4;
|
||||
|
||||
struct ITensorHelper {
|
||||
nvinfer1::ITensor *trt_tensor_{nullptr};
|
||||
mindspore::Format format_;
|
||||
};
|
||||
|
||||
class TensorRTOp {
|
||||
public:
|
||||
explicit TensorRTOp(const schema::Primitive *primitive, std::vector<mindspore::MSTensor> in_tensors,
|
||||
|
@ -51,13 +57,13 @@ class TensorRTOp {
|
|||
|
||||
const schema::Primitive *GetPrimitive();
|
||||
|
||||
void AddInnerInTensors(nvinfer1::ITensor *tensor);
|
||||
void AddInnerInTensors(ITensorHelper tensor);
|
||||
|
||||
void AddInnerOutTensors(nvinfer1::ITensor *tensor);
|
||||
void AddInnerOutTensors(ITensorHelper tensor);
|
||||
|
||||
std::vector<nvinfer1::ITensor *> &GetInnerOutTensor();
|
||||
std::vector<ITensorHelper> &GetInnerOutTensor();
|
||||
|
||||
std::vector<nvinfer1::ITensor *> &GetInnerInTensors();
|
||||
std::vector<ITensorHelper> &GetInnerInTensors();
|
||||
|
||||
std::string GetOpName();
|
||||
|
||||
|
@ -86,9 +92,9 @@ class TensorRTOp {
|
|||
|
||||
std::vector<mindspore::MSTensor> out_tensors_;
|
||||
|
||||
std::vector<nvinfer1::ITensor *> tensorrt_in_tensors_;
|
||||
std::vector<ITensorHelper> tensorrt_in_tensors_;
|
||||
|
||||
std::vector<nvinfer1::ITensor *> tensorrt_out_tensors_;
|
||||
std::vector<ITensorHelper> tensorrt_out_tensors_;
|
||||
|
||||
std::vector<TensorRTOp *> in_ops_;
|
||||
|
||||
|
|
|
@ -44,7 +44,7 @@ int UnaryTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
MS_LOG(ERROR) << "network or input tensor is invalid";
|
||||
return RET_ERROR;
|
||||
}
|
||||
nvinfer1::IUnaryLayer *cal_layer = network->addUnary(*tensorrt_in_tensors_[0], unary_op_);
|
||||
nvinfer1::IUnaryLayer *cal_layer = network->addUnary(*tensorrt_in_tensors_[0].trt_tensor_, unary_op_);
|
||||
if (cal_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addUnary failed for: " << op_name_;
|
||||
return RET_ERROR;
|
||||
|
@ -53,7 +53,7 @@ int UnaryTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
|
||||
nvinfer1::ITensor *op_out_tensor = cal_layer->getOutput(0);
|
||||
op_out_tensor->setName(out_tensors_[0].Name().c_str());
|
||||
this->AddInnerOutTensors(op_out_tensor);
|
||||
this->AddInnerOutTensors(ITensorHelper{op_out_tensor, tensorrt_in_tensors_[0].format_});
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -187,6 +187,7 @@ nvinfer1::ITensor *TensorRTSubGraph::SetTensorRTNetworkInput(const mindspore::MS
|
|||
|
||||
// only support NHWC HW dim resize
|
||||
if (input_hw_index_ != -1) {
|
||||
MS_LOG(INFO) << "input tensor format: " << in_tensor.format();
|
||||
input_hw_index_ = in_tensor.format() == Format::NHWC ? 1 : /* NCHW*/ 2;
|
||||
input_dims.d[input_hw_index_] = -1;
|
||||
input_dims.d[input_hw_index_ + 1] = -1;
|
||||
|
@ -208,19 +209,20 @@ int TensorRTSubGraph::BuildTensorRTGraph() {
|
|||
MS_LOG(ERROR) << "SetTensorRTNetworkInput failed for " << in_tensor.Name();
|
||||
return RET_ERROR;
|
||||
}
|
||||
cur_op->AddInnerInTensors(trt_tensor);
|
||||
cur_op->AddInnerInTensors(ITensorHelper{trt_tensor, in_tensor.format()});
|
||||
continue;
|
||||
}
|
||||
|
||||
auto trt_tensor = FindTensorRTInputs(cur_op, in_tensor);
|
||||
// weight tensor
|
||||
if (trt_tensor == nullptr) {
|
||||
ITensorHelper trt_tensor = FindTensorRTInputs(cur_op, in_tensor);
|
||||
if (trt_tensor.trt_tensor_ == nullptr) {
|
||||
// weight tensor
|
||||
if (trt_specific_weight_nodes_.find(cur_op->type()) == trt_specific_weight_nodes_.end()) {
|
||||
if (in_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "Weight Tensor is nullptr.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
trt_tensor = lite::ConvertConstantTensor(this->network_, in_tensor);
|
||||
trt_tensor.trt_tensor_ = lite::ConvertConstantTensor(this->network_, in_tensor);
|
||||
trt_tensor.format_ = Format::NHWC;
|
||||
MS_LOG(INFO) << "auto convert constant tensor for: " << cur_op->GetOpName();
|
||||
cur_op->AddInnerInTensors(trt_tensor);
|
||||
}
|
||||
|
@ -236,16 +238,44 @@ int TensorRTSubGraph::BuildTensorRTGraph() {
|
|||
}
|
||||
}
|
||||
|
||||
ret = MarkOutputs();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "MarkOutputs failed in TensorRT network";
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = BuildEngine();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Create engine failed in TensorRT network";
|
||||
return ret;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int TensorRTSubGraph::MarkOutputs() {
|
||||
// Mark NetWork Output Tensor.
|
||||
for (auto out_tensor : outputs_) {
|
||||
for (auto out_op : this->out_ops_) {
|
||||
for (size_t index = 0; index < out_op->outputs().size(); index++) {
|
||||
if (out_op->outputs()[index] == out_tensor) {
|
||||
out_op->GetInnerOutTensor()[index]->setName(out_tensor.Name().c_str());
|
||||
nvinfer1::ITensor *out_trt_tensor = out_op->GetInnerOutTensor()[index].trt_tensor_;
|
||||
if (out_op->GetInnerOutTensor()[index].trt_tensor_->getDimensions().nbDims == 4 &&
|
||||
out_op->GetInnerOutTensor()[index].format_ == Format::NCHW) {
|
||||
// transpose subgraph output from nchw to nhwc
|
||||
nvinfer1::IShuffleLayer *transpose_layer_out =
|
||||
NCHW2NHWC(network_, *out_op->GetInnerOutTensor()[index].trt_tensor_);
|
||||
if (transpose_layer_out == nullptr) {
|
||||
MS_LOG(ERROR) << "op action convert failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
transpose_layer_out->setName((out_tensor.Name() + "_transpose2NHWC").c_str());
|
||||
}
|
||||
|
||||
out_trt_tensor->setName(out_tensor.Name().c_str());
|
||||
MS_LOG(INFO) << "markOutput for: " << out_tensor.Name();
|
||||
this->network_->markOutput(*out_op->GetInnerOutTensor()[index]);
|
||||
for (int n = 0; n < out_op->GetInnerOutTensor()[index]->getDimensions().nbDims; n++) {
|
||||
if (out_op->GetInnerOutTensor()[index]->getDimensions().d[n] == -1) {
|
||||
this->network_->markOutput(*out_trt_tensor);
|
||||
for (int n = 0; n < out_trt_tensor->getDimensions().nbDims; n++) {
|
||||
if (out_trt_tensor->getDimensions().d[n] == -1) {
|
||||
output_batchsize_index_ = n;
|
||||
break;
|
||||
}
|
||||
|
@ -254,12 +284,6 @@ int TensorRTSubGraph::BuildTensorRTGraph() {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
ret = BuildEngine();
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "Create engine failed in TensorRT network";
|
||||
return ret;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -292,7 +316,7 @@ int TensorRTSubGraph::Prepare() {
|
|||
trt_in_tensor_name_.push_back(tensor.Name());
|
||||
nvinfer1::Dims input_dims = ConvertCudaDims(tensor.Shape());
|
||||
for (int od = 0; od < input_dims.nbDims; od++) {
|
||||
MS_LOG(INFO) << "in tensor " << tensor.Name() << " dims at " << od << " is " << input_dims.d[od];
|
||||
MS_LOG(DEBUG) << "in tensor " << tensor.Name() << " dims at " << od << " is " << input_dims.d[od];
|
||||
}
|
||||
|
||||
if (!this->trt_context_->setBindingDimensions(index, input_dims)) {
|
||||
|
@ -363,7 +387,7 @@ int TensorRTSubGraph::ReSize() {
|
|||
// Set actual input size
|
||||
nvinfer1::Dims input_dims = ConvertCudaDims(inputs_[i].Shape());
|
||||
for (int od = 0; od < input_dims.nbDims; od++) {
|
||||
MS_LOG(INFO) << "in tensor " << trt_in_tensor_name_[i] << " dims at " << od << " is " << input_dims.d[od];
|
||||
MS_LOG(DEBUG) << "in tensor " << trt_in_tensor_name_[i] << " dims at " << od << " is " << input_dims.d[od];
|
||||
}
|
||||
|
||||
if (!this->trt_context_->setBindingDimensions(index, input_dims)) {
|
||||
|
@ -420,7 +444,7 @@ int TensorRTSubGraph::Execute() {
|
|||
new_shape[output_batchsize_index_] = runtime_->GetBatchSize();
|
||||
}
|
||||
for (int od = 0; od < out_dims.nbDims; od++) {
|
||||
MS_LOG(INFO) << "out tensor " << trt_out_tensor_name_[i] << " dims at " << od << " is " << new_shape[od];
|
||||
MS_LOG(DEBUG) << "out tensor " << trt_out_tensor_name_[i] << " dims at " << od << " is " << new_shape[od];
|
||||
}
|
||||
outputs_[i].SetShape(new_shape);
|
||||
|
||||
|
@ -438,7 +462,7 @@ int TensorRTSubGraph::Execute() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *TensorRTSubGraph::FindTensorRTInputs(TensorRTOp *cur_op, const mindspore::MSTensor &in_tensor) {
|
||||
ITensorHelper TensorRTSubGraph::FindTensorRTInputs(TensorRTOp *cur_op, const mindspore::MSTensor &in_tensor) {
|
||||
for (auto input_op : cur_op->in_ops()) {
|
||||
for (size_t i = 0; i < input_op->outputs().size(); i++) {
|
||||
auto out_tensor = input_op->outputs().at(i);
|
||||
|
@ -447,6 +471,6 @@ nvinfer1::ITensor *TensorRTSubGraph::FindTensorRTInputs(TensorRTOp *cur_op, cons
|
|||
}
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
return ITensorHelper{};
|
||||
}
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -67,7 +67,9 @@ class TensorRTSubGraph : public kernel::Kernel {
|
|||
|
||||
nvinfer1::ITensor *SetTensorRTNetworkInput(const mindspore::MSTensor &in_tensor);
|
||||
|
||||
static nvinfer1::ITensor *FindTensorRTInputs(TensorRTOp *cur_op, const mindspore::MSTensor &in_tensor);
|
||||
ITensorHelper FindTensorRTInputs(TensorRTOp *cur_op, const mindspore::MSTensor &in_tensor);
|
||||
|
||||
int MarkOutputs();
|
||||
|
||||
std::vector<TensorRTOp *> all_ops_{};
|
||||
// subgraph input nodes.
|
||||
|
|
|
@ -254,4 +254,37 @@ void SetCudaDevice(std::shared_ptr<GPUDeviceInfo> device_info_) {
|
|||
}
|
||||
MS_LOG(INFO) << "cuda is running on device: " << device;
|
||||
}
|
||||
Format GetOutputFormat(Format input_format, nvinfer1::Permutation perm) {
|
||||
if (input_format == Format::NHWC) {
|
||||
if (perm.order[0] == 0 && perm.order[1] == 3 && perm.order[2] == 2 && perm.order[3] == 1) {
|
||||
return Format::NCHW;
|
||||
}
|
||||
} else if (input_format == Format::NCHW) {
|
||||
if (perm.order[0] == 0 && perm.order[1] == 2 && perm.order[2] == 3 && perm.order[3] == 1) {
|
||||
return Format::NHWC;
|
||||
}
|
||||
}
|
||||
MS_LOG(WARNING) << "transpose out format needs to check for " << input_format;
|
||||
return input_format;
|
||||
}
|
||||
int ConvertAxisFromNHWC2NCHW(int nhwc_axis) {
|
||||
// N0H1W2C3->N0C1H2W3
|
||||
if (nhwc_axis > kNHWC_C) {
|
||||
return nhwc_axis;
|
||||
}
|
||||
switch (nhwc_axis) {
|
||||
case kNHWC_N:
|
||||
return kNCHW_N;
|
||||
case kNHWC_H:
|
||||
return kNCHW_H;
|
||||
case kNHWC_W:
|
||||
return kNCHW_W;
|
||||
case kNHWC_C:
|
||||
return kNCHW_C;
|
||||
default:
|
||||
MS_LOG(ERROR) << "invalid input axis for nhwc: " << nhwc_axis;
|
||||
}
|
||||
return nhwc_axis;
|
||||
}
|
||||
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -23,6 +23,10 @@
|
|||
#include "schema/ops_generated.h"
|
||||
#include "nnacl/pack.h"
|
||||
|
||||
#define kNCHW_N 0
|
||||
#define kNCHW_C 1
|
||||
#define kNCHW_H 2
|
||||
#define kNCHW_W 3
|
||||
namespace mindspore::lite {
|
||||
struct ActivationParams {
|
||||
nvinfer1::ActivationType activation_type;
|
||||
|
@ -61,5 +65,9 @@ nvinfer1::Weights TransposeWeight(const mindspore::MSTensor &ms_tensor, float **
|
|||
nvinfer1::Weights ConvertWeight(const mindspore::MSTensor &ms_tensor);
|
||||
|
||||
void SetCudaDevice(std::shared_ptr<GPUDeviceInfo> device_info_);
|
||||
|
||||
Format GetOutputFormat(Format input_format, nvinfer1::Permutation perm);
|
||||
|
||||
int ConvertAxisFromNHWC2NCHW(int nhwc_axis);
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_UTILS_H_
|
||||
|
|
Loading…
Reference in New Issue