[MSLITE] add tensorrt softmax op and fix bugs for diverse networks compatibility

This commit is contained in:
Liu_Xuu 2021-09-27 17:04:51 +08:00
parent 7476deba66
commit dc7484f05f
18 changed files with 67 additions and 51 deletions

View File

@ -65,7 +65,7 @@ int ActivationTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
}
activation_layer->setName(op_name_.c_str());
activation_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
activation_layer->getOutput(0)->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(ITensorHelper{activation_layer->getOutput(0), tensorrt_in_tensors_[0].format_});
return RET_OK;

View File

@ -24,7 +24,7 @@ int ConcateTensorRT::IsSupport(const schema::Primitive *primitive, const std::ve
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
return RET_ERROR;
}
if (in_tensors.size() != INPUT_SIZE2) {
if (in_tensors.size() < INPUT_SIZE2) {
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
return RET_ERROR;
}
@ -102,7 +102,7 @@ int ConcateTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
concate_layer->setAxis(axis);
}
concate_layer->setName(op_name_.c_str());
concate_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
concate_layer->getOutput(0)->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(ITensorHelper{concate_layer->getOutput(0), out_format});
return RET_OK;
}

View File

@ -119,8 +119,7 @@ int ConvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
}
activation_layer->setName((op_name_ + "_activation").c_str());
}
activation_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
activation_layer->getOutput(0)->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(ITensorHelper{activation_layer->getOutput(0), Format::NCHW});
return RET_OK;
}

View File

@ -116,8 +116,7 @@ int DeconvolutionTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
}
activation_layer->setName((op_name_ + "_activation").c_str());
}
activation_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
activation_layer->getOutput(0)->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(ITensorHelper{activation_layer->getOutput(0), Format::NCHW});
return RET_OK;
}
@ -150,14 +149,27 @@ void DeconvolutionTensorRT::SetAttributes(const schema::Conv2dTransposeFusion *m
decon_layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
} else {
auto padding = ms_op->pad_list();
if (padding != nullptr) {
auto padding_val = std::vector<int64_t>(padding->begin(), padding->end());
nvinfer1::Dims dims{};
dims.nbDims = 2;
dims.d[0] = padding_val[0];
dims.d[1] = padding_val[2];
decon_layer->setPaddingNd(dims);
auto out_pad = ms_op->output_paddings();
if (padding == nullptr || out_pad == nullptr) {
MS_LOG(WARNING) << "on pad value of " << op_name_;
return;
}
auto padding_val = std::vector<int64_t>(padding->begin(), padding->end());
auto out_pad_val = std::vector<int64_t>(out_pad->begin(), out_pad->end()); // h, w
if (out_pad_val.size() != DIMENSION_2D || padding_val.size() != DIMENSION_4D) {
MS_LOG(ERROR) << "invalid size of pad " << op_name_;
return;
}
nvinfer1::Dims dims_pre{};
dims_pre.nbDims = 2;
dims_pre.d[0] = padding_val[0]; // up
dims_pre.d[1] = padding_val[2]; // left
decon_layer->setPrePadding(dims_pre);
nvinfer1::Dims dims_post{};
dims_post.nbDims = 2;
dims_post.d[0] = padding_val[1] - out_pad_val[0]; // down
dims_post.d[1] = padding_val[3] - out_pad_val[1]; // right
decon_layer->setPostPadding(dims_post);
}
}

View File

@ -81,7 +81,7 @@ int ElementWiseTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
return RET_ERROR;
}
first_in_tensor_index_ =
strcmp(tensorrt_in_tensors_[0].trt_tensor_->getName(), in_tensors_[0].Name().c_str()) == 0 ? 0 : 1;
SameDims(tensorrt_in_tensors_[0].trt_tensor_->getDimensions(), in_tensors_[0].Shape()) ? 0 : 1;
if (this->tensorrt_in_tensors_.size() != INPUT_SIZE2) {
int ret = AddConstTensor(network);
@ -153,7 +153,7 @@ int ElementWiseTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
MS_LOG(WARNING) << "deal with scale and shift for pow op";
}
}
op_out_tensor->setName(out_tensors_[0].Name().c_str());
op_out_tensor->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(ITensorHelper{op_out_tensor, tensorrt_in_tensors_[1].format_});
MS_LOG(DEBUG) << "output " << GetTensorFormat(op_out_tensor, tensorrt_in_tensors_[1].format_);
return RET_OK;
@ -217,7 +217,7 @@ int ElementWiseTensorRT::AddConstTensor(nvinfer1::INetworkDefinition *network) {
in_tensors_[1 - first_in_tensor_index_].Data().get(),
in_tensors_[1 - first_in_tensor_index_].DataType());
if (constant_input == nullptr) {
MS_LOG(ERROR) << "create Itensor from constant tensor failed: " << op_name_;
MS_LOG(ERROR) << "create Itensor from scalar tensor failed: " << op_name_;
return RET_ERROR;
}
this->AddInnerInTensors(ITensorHelper{constant_input, tensorrt_in_tensors_[0].format_});

View File

@ -95,7 +95,7 @@ int GatherTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
return RET_ERROR;
}
gather_layer->setName(op_name_.c_str());
gather_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
gather_layer->getOutput(0)->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(ITensorHelper{gather_layer->getOutput(0), out_format});
return RET_OK;
}

View File

@ -74,8 +74,7 @@ int MatMulTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
bias_layer->setName(bias_layer_name.c_str());
out_tensor = bias_layer->getOutput(0);
}
out_tensor->setName(out_tensors_[0].Name().c_str());
out_tensor->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(ITensorHelper{out_tensor, out_format});
return RET_OK;
}

View File

@ -105,7 +105,7 @@ int PadTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
return RET_ERROR;
}
padding_layer->setName(op_name_.c_str());
padding_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
padding_layer->getOutput(0)->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(ITensorHelper{padding_layer->getOutput(0), Format::NCHW});
return RET_OK;
}

View File

@ -89,7 +89,7 @@ int PoolTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
}
activation_layer->setName((op_name_ + "_activation").c_str());
}
activation_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
activation_layer->getOutput(0)->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(ITensorHelper{activation_layer->getOutput(0), Format::NCHW});
MS_LOG(DEBUG) << "output " << GetTensorFormat(activation_layer->getOutput(0), Format::NCHW);
return RET_OK;
@ -174,14 +174,14 @@ int PoolTensorRT::ParseParams() {
void PoolTensorRT::AddParams(nvinfer1::IPoolingLayer *pooling_layer) {
nvinfer1::Dims stride_dims = ConvertCudaDims(stride_);
pooling_layer->setStrideNd(stride_dims);
pooling_layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
if (pad_mode_ != schema::PadMode::PadMode_SAME && pad_mode_ != schema::PadMode::PadMode_PAD) {
MS_LOG(WARNING) << "needs check pad mode of " << EnumNamePadMode(pad_mode_) << " for node: " << op_name_;
if (pad_mode_ == schema::PadMode::PadMode_SAME) {
pooling_layer->setPaddingMode(nvinfer1::PaddingMode::kSAME_UPPER);
} else {
nvinfer1::Dims dims{};
dims.nbDims = DIMENSION_2D;
dims.d[0] = padding_[0];
dims.d[1] = padding_[DIMENSION_2D];
pooling_layer->setPaddingNd(dims);
}
nvinfer1::Dims dims{};
dims.nbDims = DIMENSION_2D;
dims.d[0] = padding_[1];
dims.d[1] = padding_[DIMENSION_2D];
pooling_layer->setPaddingNd(dims);
}
} // namespace mindspore::lite

View File

@ -58,6 +58,7 @@ int ReduceTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
bool keep_dims = reduce_op->keep_dims();
out_format_ = tensorrt_in_tensors_[0].format_;
nvinfer1::ITensor *shuffler_input = tensorrt_in_tensors_[0].trt_tensor_;
MS_LOG(DEBUG) << "origin input " << GetTensorFormat(shuffler_input, out_format_);
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
!SameDims(tensorrt_in_tensors_[0].trt_tensor_->getDimensions(), in_tensors_[0].Shape())) {
if (tensorrt_in_tensors_[0].format_ == Format::NCHW) {
@ -65,6 +66,7 @@ int ReduceTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
nvinfer1::IShuffleLayer *transpose_layer = NCHW2NHWC(network, *tensorrt_in_tensors_[0].trt_tensor_);
if (transpose_layer == nullptr) {
MS_LOG(ERROR) << "create transpose layer failed for " << op_name_;
return RET_ERROR;
}
transpose_layer->setName((op_name_ + "_transpose_in").c_str());
shuffler_input = transpose_layer->getOutput(0);
@ -76,22 +78,22 @@ int ReduceTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
nvinfer1::ITensor *reduce_input = shuffler_input;
// 4 dims support reduce at each axis
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims != DIMENSION_4D) {
nvinfer1::IShuffleLayer *unsqueeze_layer = network->addShuffle(*tensorrt_in_tensors_[0].trt_tensor_);
if (reduce_input->getDimensions().nbDims < DIMENSION_4D) {
nvinfer1::IShuffleLayer *unsqueeze_layer = network->addShuffle(*reduce_input);
if (unsqueeze_layer == nullptr) {
MS_LOG(ERROR) << "add Shuffle op failed for TensorRT.";
return RET_ERROR;
}
unsqueeze_layer->setName((op_name_ + "_unsqueeze4dims").c_str());
nvinfer1::Dims unsqueeze_dims = tensorrt_in_tensors_[0].trt_tensor_->getDimensions();
for (int i = unsqueeze_dims.nbDims; i < 4; i++) {
nvinfer1::Dims unsqueeze_dims = reduce_input->getDimensions();
for (int i = unsqueeze_dims.nbDims; i < DIMENSION_4D; i++) {
unsqueeze_dims.d[i] = 1;
}
unsqueeze_dims.nbDims = 4;
unsqueeze_dims.nbDims = DIMENSION_4D;
unsqueeze_layer->setReshapeDimensions(unsqueeze_dims);
reduce_input = unsqueeze_layer->getOutput(0);
}
MS_LOG(DEBUG) << "after transpose and expand dims " << GetTensorFormat(reduce_input, out_format_);
uint32_t reduceAxis = GetAxis();
nvinfer1::IReduceLayer *layer = network->addReduce(*reduce_input, reduce_op_, reduceAxis, keep_dims);
@ -118,8 +120,9 @@ int ReduceTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
MS_LOG(ERROR) << "addReduce output tensor create failed for TensorRT.";
return RET_ERROR;
}
out_tensor->setName(out_tensors_[0].Name().c_str());
out_tensor->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(ITensorHelper{out_tensor, out_format_});
MS_LOG(DEBUG) << "output " << GetTensorFormat(out_tensor, out_format_);
return RET_OK;
}
uint32_t ReduceTensorRT::GetAxis() {
@ -134,10 +137,10 @@ uint32_t ReduceTensorRT::GetAxis() {
MS_LOG(WARNING) << "not int data type";
}
int *axis_data = reinterpret_cast<int *>(axis_tensor.MutableData());
uint32_t base = std::pow(2, in_tensors_[0].Shape().size());
// uint32_t base = std::pow(2, DIMENSION_4D);
for (int i = 0; i < axis_tensor.ElementNum(); i++) {
int format_axis_data = *axis_data;
reduceAxis |= (base - (1u << format_axis_data));
reduceAxis |= 1u << format_axis_data;
axis_data++;
}
MS_LOG(DEBUG) << "reduceAxis: " << reduceAxis;

View File

@ -128,8 +128,7 @@ int ScaleTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
if (activation_tensor->getDimensions().nbDims > static_cast<int>(out_tensors_[0].Shape().size())) {
op_out_tensor = AddSqueezeOp(activation_tensor, network);
}
op_out_tensor->setName(out_tensors_[0].Name().c_str());
op_out_tensor->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(ITensorHelper{op_out_tensor, out_format_});
MS_LOG(DEBUG) << "output " << GetTensorFormat(op_out_tensor, out_format_);
return RET_OK;

View File

@ -45,7 +45,7 @@ int ShapeTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
return RET_ERROR;
}
shape_layer->setName(op_name_.c_str());
shape_layer->getOutput(0)->setName(out_tensors_[0].Name().c_str());
shape_layer->getOutput(0)->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(ITensorHelper{shape_layer->getOutput(0), tensorrt_in_tensors_[0].format_});
return RET_OK;
}

View File

@ -145,7 +145,7 @@ int ShuffleTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
MS_LOG(ERROR) << "output tensor create failed";
return RET_ERROR;
}
out_tensor->setName(out_tensors_[0].Name().c_str());
out_tensor->setName((op_name_ + "_output").c_str());
MS_LOG(DEBUG) << "output " << GetTensorFormat(out_tensor, out_format_);
this->AddInnerOutTensors(ITensorHelper{out_tensor, out_format_});
return RET_OK;
@ -232,9 +232,9 @@ int ShuffleTensorRT::AddTransposeOp(nvinfer1::IShuffleLayer *shuffle_layer) {
}
shuffle_layer->setFirstTranspose(perm);
if (perm_ternsor.ElementNum() == DIMENSION_4D) {
if (out_format_ == Format::NHWC && perm.order[1] == 3 && perm.order[2] == 1 && perm.order[3] == 2) {
if (perm.order[1] == 3 && perm.order[2] == 1 && perm.order[3] == 2) {
out_format_ = Format::NCHW;
} else if (out_format_ == Format::NCHW && perm.order[1] == 2 && perm.order[2] == 3 && perm.order[3] == 1) {
} else if (perm.order[1] == 2 && perm.order[2] == 3 && perm.order[3] == 1) {
out_format_ = Format::NHWC;
} else {
MS_LOG(WARNING) << "input format and perm order is invalid: " << op_name_;

View File

@ -79,7 +79,7 @@ int SliceTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
MS_LOG(ERROR) << "output tensor create failed";
return RET_ERROR;
}
out_tensor->setName(out_tensors_[0].Name().c_str());
out_tensor->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(ITensorHelper{out_tensor, out_format});
return RET_OK;
}

View File

@ -56,7 +56,7 @@ int SoftMaxTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
MS_LOG(ERROR) << "softmax output tensor create failed for TensorRT.";
return RET_ERROR;
}
out_tensor->setName(out_tensors_[0].Name().c_str());
out_tensor->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(ITensorHelper{out_tensor, tensorrt_in_tensors_[0].format_});
return RET_OK;
}
@ -69,7 +69,6 @@ nvinfer1::ISoftMaxLayer *SoftMaxTensorRT::AddSoftMaxOp(nvinfer1::INetworkDefinit
}
auto axis = softmax_op_->axis();
auto axis_val = std::vector<int64_t>(axis->begin(), axis->end());
if (axis_val.size() != 1) {
MS_LOG(WARNING) << "axis needs check";
}
@ -84,7 +83,9 @@ nvinfer1::ISoftMaxLayer *SoftMaxTensorRT::AddSoftMaxOp(nvinfer1::INetworkDefinit
// transpose axis to NCHW
axis_format_value = ConvertAxisFromNHWC2NCHW(axis_val[0]);
}
current_layer_->setAxes(axis_format_value);
uint32_t axis_bit = 1 << axis_format_value;
MS_LOG(DEBUG) << op_name_ << " set axis to " << axis_bit;
current_layer_->setAxes(axis_bit);
return current_layer_;
}
} // namespace mindspore::lite

View File

@ -52,7 +52,7 @@ int UnaryTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
cal_layer->setName(op_name_.c_str());
nvinfer1::ITensor *op_out_tensor = cal_layer->getOutput(0);
op_out_tensor->setName(out_tensors_[0].Name().c_str());
op_out_tensor->setName((op_name_ + "_output").c_str());
this->AddInnerOutTensors(ITensorHelper{op_out_tensor, tensorrt_in_tensors_[0].format_});
return RET_OK;
}

View File

@ -98,6 +98,7 @@ Status TensorRTDelegate::Init() {
{schema::PrimitiveType_Reshape, GetTensorRTOp<ShuffleTensorRT>},
{schema::PrimitiveType_Transpose, GetTensorRTOp<ShuffleTensorRT>},
{schema::PrimitiveType_Flatten, GetTensorRTOp<ShuffleTensorRT>},
{schema::PrimitiveType_Softmax, GetTensorRTOp<SoftMaxTensorRT>},
{schema::PrimitiveType_Sqrt, GetTensorRTOp<UnaryTensorRT>},
};
unsupport_hw_op_lists_ = {schema::PrimitiveType_Reshape};

View File

@ -268,7 +268,8 @@ int TensorRTSubGraph::MarkOutputs() {
if (out_op->outputs()[index] == out_tensor) {
nvinfer1::ITensor *out_trt_tensor = out_op->GetInnerOutTensor()[index].trt_tensor_;
if (out_op->GetInnerOutTensor()[index].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
out_op->GetInnerOutTensor()[index].format_ == Format::NCHW) {
out_op->GetInnerOutTensor()[index].format_ == Format::NCHW &&
!SameDims(out_op->GetInnerOutTensor()[index].trt_tensor_->getDimensions(), out_tensor.Shape())) {
// transpose subgraph output from nchw to nhwc
nvinfer1::IShuffleLayer *transpose_layer_out =
NCHW2NHWC(network_, *out_op->GetInnerOutTensor()[index].trt_tensor_);
@ -277,6 +278,7 @@ int TensorRTSubGraph::MarkOutputs() {
return RET_ERROR;
}
transpose_layer_out->setName((out_tensor.Name() + "_transpose2NHWC").c_str());
out_trt_tensor = transpose_layer_out->getOutput(0);
}
out_trt_tensor->setName(out_tensor.Name().c_str());