forked from mindspore-Ecosystem/mindspore
!25632 [MSLITE] add max, bias_add, argmax op and modify bugs for ocr models in tensorrt delegate
Merge pull request !25632 from Liu_Xuu/trt_1029_shengwen10_merge
This commit is contained in:
commit
d39ad14ce5
|
@ -51,31 +51,33 @@ int ConcateTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
}
|
||||
|
||||
nvinfer1::ITensor *trt_input_tensors[tensorrt_in_tensors_.size()];
|
||||
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims !=
|
||||
tensorrt_in_tensors_[1].trt_tensor_->getDimensions().nbDims) {
|
||||
MS_LOG(ERROR) << "dims of inputs is invalid for " << op_name_;
|
||||
return RET_ERROR;
|
||||
int input_nbDims = tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims;
|
||||
Format out_format = tensorrt_in_tensors_[0].format_;
|
||||
|
||||
for (size_t i = 0; i < tensorrt_in_tensors_.size(); i++) {
|
||||
if (tensorrt_in_tensors_[i].trt_tensor_->getDimensions().nbDims != input_nbDims) {
|
||||
MS_LOG(ERROR) << "dims of inputs is invalid for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
// keep origin format if all input format are the same
|
||||
if (input_nbDims == DIMENSION_4D && tensorrt_in_tensors_[i].format_ != out_format) {
|
||||
out_format = Format::NHWC;
|
||||
}
|
||||
}
|
||||
|
||||
// make sure two inputs have same format
|
||||
Format out_format = tensorrt_in_tensors_[0].format_;
|
||||
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D) {
|
||||
if (tensorrt_in_tensors_[0].format_ == tensorrt_in_tensors_[1].format_) {
|
||||
for (size_t i = 0; i < tensorrt_in_tensors_.size(); i++) {
|
||||
// make sure all inputs are same format
|
||||
if (input_nbDims == DIMENSION_4D) {
|
||||
for (size_t i = 0; i < tensorrt_in_tensors_.size(); i++) {
|
||||
if (tensorrt_in_tensors_[i].format_ == out_format) {
|
||||
trt_input_tensors[i] = tensorrt_in_tensors_[i].trt_tensor_;
|
||||
} else {
|
||||
nvinfer1::IShuffleLayer *transpose_layer = NCHW2NHWC(network, *tensorrt_in_tensors_[i].trt_tensor_);
|
||||
if (transpose_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "op action convert failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
trt_input_tensors[i] = transpose_layer->getOutput(0);
|
||||
}
|
||||
} else {
|
||||
// when inputs format are different, change to NHWC
|
||||
out_format = Format::NHWC;
|
||||
int transpose_tensor_index = tensorrt_in_tensors_[0].format_ == Format::NCHW ? 0 : 1;
|
||||
trt_input_tensors[1 - transpose_tensor_index] = tensorrt_in_tensors_[1 - transpose_tensor_index].trt_tensor_;
|
||||
nvinfer1::IShuffleLayer *transpose_layer =
|
||||
NCHW2NHWC(network, *tensorrt_in_tensors_[transpose_tensor_index].trt_tensor_);
|
||||
if (transpose_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "op action convert failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
trt_input_tensors[transpose_tensor_index] = transpose_layer->getOutput(0);
|
||||
}
|
||||
} else {
|
||||
for (size_t i = 0; i < tensorrt_in_tensors_.size(); i++) {
|
||||
|
@ -85,6 +87,9 @@ int ConcateTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
|
||||
int axis = RET_INVALID_OP_ATTR;
|
||||
axis = concate_op->axis();
|
||||
if (axis == -1) {
|
||||
axis = input_nbDims - 1;
|
||||
}
|
||||
if (trt_input_tensors[0]->getDimensions().nbDims == DIMENSION_4D && out_format == Format::NCHW) {
|
||||
// when inputs all NCHW, change axis
|
||||
axis = ConvertAxisFromNHWC2NCHW(axis);
|
||||
|
|
|
@ -32,7 +32,8 @@ int ElementWiseTensorRT::IsSupport(const schema::Primitive *primitive,
|
|||
{schema::PrimitiveType_SubFusion, nvinfer1::ElementWiseOperation::kSUB},
|
||||
{schema::PrimitiveType_MulFusion, nvinfer1::ElementWiseOperation::kPROD},
|
||||
{schema::PrimitiveType_Minimum, nvinfer1::ElementWiseOperation::kMIN},
|
||||
};
|
||||
{schema::PrimitiveType_Maximum, nvinfer1::ElementWiseOperation::kMAX},
|
||||
{schema::PrimitiveType_BiasAdd, nvinfer1::ElementWiseOperation::kSUM}};
|
||||
auto iter_op = element_wise_ops.find(this->type_);
|
||||
if (iter_op != element_wise_ops.end()) {
|
||||
element_wise_op_ = iter_op->second;
|
||||
|
@ -68,7 +69,8 @@ int ElementWiseTensorRT::IsSupport(const schema::Primitive *primitive,
|
|||
}
|
||||
|
||||
// if constant tensor is scalar, it needs to know another input tensor's shape to broadcast
|
||||
if (in_tensors[0].Shape()[0] == -1 && in_tensors[1].Shape().size() == 0) {
|
||||
if ((in_tensors[0].Shape().size() > 0 && in_tensors[0].Shape()[0] == -1 && in_tensors[1].Shape().size() == 0) ||
|
||||
(in_tensors[1].Shape().size() > 0 && in_tensors[1].Shape()[0] == -1 && in_tensors[0].Shape().size() == 0)) {
|
||||
MS_LOG(ERROR) << "invalid all input tensor shape unknown for: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
@ -81,22 +83,20 @@ int ElementWiseTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
MS_LOG(ERROR) << "network or input tensor size is invalid";
|
||||
return RET_ERROR;
|
||||
}
|
||||
first_in_tensor_index_ =
|
||||
SameDims(tensorrt_in_tensors_[0].trt_tensor_->getDimensions(), in_tensors_[0].Shape()) ? 0 : 1;
|
||||
input_x_index_ = SameTensor(tensorrt_in_tensors_[0].trt_tensor_, &in_tensors_[0]) ? 0 : 1;
|
||||
|
||||
if (this->tensorrt_in_tensors_.size() != INPUT_SIZE2) {
|
||||
int ret = AddConstTensor(network);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "AddConstTensor failed for " << op_name_;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
MS_LOG(DEBUG) << "before transpose "
|
||||
<< GetTensorFormat(tensorrt_in_tensors_[first_in_tensor_index_].trt_tensor_,
|
||||
tensorrt_in_tensors_[first_in_tensor_index_].format_);
|
||||
<< GetTensorFormat(tensorrt_in_tensors_[input_x_index_].trt_tensor_,
|
||||
tensorrt_in_tensors_[input_x_index_].format_);
|
||||
MS_LOG(DEBUG) << "before transpose "
|
||||
<< GetTensorFormat(tensorrt_in_tensors_[1 - first_in_tensor_index_].trt_tensor_,
|
||||
tensorrt_in_tensors_[1 - first_in_tensor_index_].format_);
|
||||
<< GetTensorFormat(tensorrt_in_tensors_[1 - input_x_index_].trt_tensor_,
|
||||
tensorrt_in_tensors_[1 - input_x_index_].format_);
|
||||
|
||||
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
|
||||
tensorrt_in_tensors_[0].format_ != tensorrt_in_tensors_[1].format_) {
|
||||
|
@ -116,15 +116,15 @@ int ElementWiseTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
return RET_ERROR;
|
||||
}
|
||||
MS_LOG(DEBUG) << "after transpose "
|
||||
<< GetTensorFormat(tensorrt_in_tensors_[first_in_tensor_index_].trt_tensor_,
|
||||
tensorrt_in_tensors_[first_in_tensor_index_].format_);
|
||||
<< GetTensorFormat(tensorrt_in_tensors_[input_x_index_].trt_tensor_,
|
||||
tensorrt_in_tensors_[input_x_index_].format_);
|
||||
MS_LOG(DEBUG) << "after transpose "
|
||||
<< GetTensorFormat(tensorrt_in_tensors_[1 - first_in_tensor_index_].trt_tensor_,
|
||||
tensorrt_in_tensors_[1 - first_in_tensor_index_].format_);
|
||||
<< GetTensorFormat(tensorrt_in_tensors_[1 - input_x_index_].trt_tensor_,
|
||||
tensorrt_in_tensors_[1 - input_x_index_].format_);
|
||||
|
||||
nvinfer1::IElementWiseLayer *cal_layer =
|
||||
network->addElementWise(*tensorrt_in_tensors_[first_in_tensor_index_].trt_tensor_,
|
||||
*tensorrt_in_tensors_[1 - first_in_tensor_index_].trt_tensor_, element_wise_op_);
|
||||
network->addElementWise(*tensorrt_in_tensors_[input_x_index_].trt_tensor_,
|
||||
*tensorrt_in_tensors_[1 - input_x_index_].trt_tensor_, element_wise_op_);
|
||||
|
||||
if (cal_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addElementWise failed for TensorRT.";
|
||||
|
@ -210,26 +210,60 @@ nvinfer1::ITensor *ElementWiseTensorRT::AddActivation(nvinfer1::INetworkDefiniti
|
|||
return activation_out_tensor;
|
||||
}
|
||||
int ElementWiseTensorRT::AddConstTensor(nvinfer1::INetworkDefinition *network) {
|
||||
// create ITensor from MS constant tensor of index 1 - first_in_tensor_index_
|
||||
nvinfer1::ITensor *constant_input = nullptr;
|
||||
if (this->in_tensors_[1 - first_in_tensor_index_].Shape().size() == 0 ||
|
||||
this->in_tensors_[1 - first_in_tensor_index_].ElementNum() == 1) {
|
||||
constant_input = lite::ConvertScalarToITensor(network, this->in_tensors_[first_in_tensor_index_].Shape().size(),
|
||||
in_tensors_[1 - first_in_tensor_index_].Data().get(),
|
||||
in_tensors_[1 - first_in_tensor_index_].DataType());
|
||||
int const_tensor_index = (in_tensors_[0].Data() != nullptr && in_tensors_[0].IsConst()) ? 0 : 1;
|
||||
if (this->in_tensors_[const_tensor_index].Shape().size() == 0 ||
|
||||
this->in_tensors_[const_tensor_index].ElementNum() == 1) {
|
||||
constant_input = lite::ConvertScalarToITensor(network, this->in_tensors_[1 - const_tensor_index].Shape().size(),
|
||||
in_tensors_[const_tensor_index].Data().get(),
|
||||
in_tensors_[const_tensor_index].DataType(), op_name_);
|
||||
if (constant_input == nullptr) {
|
||||
MS_LOG(ERROR) << "create Itensor from scalar tensor failed: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
this->AddInnerInTensors(ITensorHelper{constant_input, tensorrt_in_tensors_[0].format_});
|
||||
} else {
|
||||
constant_input = lite::ConvertConstantTensor(network, in_tensors_[1 - first_in_tensor_index_]);
|
||||
} else if (this->in_tensors_[const_tensor_index].Shape().size() ==
|
||||
this->in_tensors_[1 - const_tensor_index].Shape().size()) {
|
||||
constant_input = lite::ConvertConstantTensor(network, in_tensors_[const_tensor_index], op_name_);
|
||||
if (constant_input == nullptr) {
|
||||
MS_LOG(ERROR) << "create Itensor from constant tensor failed: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
this->AddInnerInTensors(ITensorHelper{constant_input, Format::NHWC});
|
||||
} else if (this->in_tensors_[const_tensor_index].Shape().size() == 1 &&
|
||||
this->in_tensors_[const_tensor_index].ElementNum() >= 1) {
|
||||
constant_input = ConvertTensorWithExpandDims(network, in_tensors_[const_tensor_index],
|
||||
in_tensors_[1 - const_tensor_index].Shape().size(), op_name_);
|
||||
if (constant_input == nullptr) {
|
||||
MS_LOG(ERROR) << "create Itensor from ConvertTensorWithExpandDims failed: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
this->AddInnerInTensors(ITensorHelper{constant_input, Format::NHWC});
|
||||
} else {
|
||||
MS_LOG(ERROR) << "const tensor value needs check: " << op_name_;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
bool ElementWiseTensorRT::SameTensor(nvinfer1::ITensor *trt_tensor, mindspore::MSTensor *ms_tensor) {
|
||||
if (SameDims(trt_tensor->getDimensions(), ms_tensor->Shape())) {
|
||||
return true;
|
||||
}
|
||||
if (ms_tensor->Shape().size() == DIMENSION_4D) {
|
||||
// nhwc nchw
|
||||
auto nchw_shape = NHWC2NCHW(ms_tensor->Shape());
|
||||
if (SameDims(trt_tensor->getDimensions(), nchw_shape)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
auto str_name = strstr(trt_tensor->getName(), ms_tensor->Name().c_str());
|
||||
if (str_name != nullptr) {
|
||||
return true;
|
||||
}
|
||||
str_name = strstr(ms_tensor->Name().c_str(), trt_tensor->getName());
|
||||
if (str_name != nullptr) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace mindspore::lite
|
||||
|
|
|
@ -39,10 +39,12 @@ class ElementWiseTensorRT : public TensorRTOp {
|
|||
|
||||
int AddConstTensor(nvinfer1::INetworkDefinition *network);
|
||||
|
||||
bool SameTensor(nvinfer1::ITensor *trt_tensor, mindspore::MSTensor *ms_tensor);
|
||||
|
||||
nvinfer1::ElementWiseOperation element_wise_op_;
|
||||
|
||||
// index of first input MSTensor in the trt input tensor vector
|
||||
size_t first_in_tensor_index_ = 0;
|
||||
size_t input_x_index_ = 0;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_ELEMENTWISE_TENSORRT_H_
|
||||
|
|
|
@ -56,7 +56,7 @@ int GatherTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
|
||||
nvinfer1::ITensor *gather_input = this->tensorrt_in_tensors_[0].trt_tensor_;
|
||||
if (in_tensors_[0].IsConst()) {
|
||||
gather_input = lite::ConvertConstantTensor(network, this->in_tensors_[0]);
|
||||
gather_input = lite::ConvertConstantTensor(network, this->in_tensors_[0], op_name_);
|
||||
MS_LOG(DEBUG) << "gather input is const tensor " << op_name_;
|
||||
}
|
||||
if (gather_input == nullptr) {
|
||||
|
@ -66,7 +66,7 @@ int GatherTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
|
||||
nvinfer1::ITensor *indices_tensor = this->tensorrt_in_tensors_[tensorrt_in_tensors_.size() - 1].trt_tensor_;
|
||||
if (in_tensors_[1].IsConst()) {
|
||||
indices_tensor = lite::ConvertConstantTensor(network, this->in_tensors_[1]);
|
||||
indices_tensor = lite::ConvertConstantTensor(network, this->in_tensors_[1], op_name_);
|
||||
MS_LOG(DEBUG) << "gather indices is const tensor " << op_name_;
|
||||
}
|
||||
if (indices_tensor == nullptr) {
|
||||
|
|
|
@ -46,7 +46,7 @@ int MatMulTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
transpose_a_ = nvinfer1::MatrixOperation::kNONE;
|
||||
transpose_b_ = nvinfer1::MatrixOperation::kTRANSPOSE;
|
||||
}
|
||||
auto weight = ConvertTensorWithExpandDims(network, in_tensors_[1], in_tensors_[0].Shape().size());
|
||||
auto weight = ConvertTensorWithExpandDims(network, in_tensors_[1], in_tensors_[0].Shape().size(), op_name_);
|
||||
|
||||
nvinfer1::ITensor *matmul_input = tensorrt_in_tensors_[0].trt_tensor_;
|
||||
Format out_format = tensorrt_in_tensors_[0].format_;
|
||||
|
@ -68,7 +68,7 @@ int MatMulTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
nvinfer1::ITensor *out_tensor = matmul_layer->getOutput(0);
|
||||
|
||||
if (in_tensors_.size() == BIAS_INDEX + 1) {
|
||||
auto bias = ConvertTensorWithExpandDims(network, in_tensors_[BIAS_INDEX], in_tensors_[0].Shape().size());
|
||||
auto bias = ConvertTensorWithExpandDims(network, in_tensors_[BIAS_INDEX], in_tensors_[0].Shape().size(), op_name_);
|
||||
auto bias_layer = network->addElementWise(*matmul_layer->getOutput(0), *bias, nvinfer1::ElementWiseOperation::kSUM);
|
||||
auto bias_layer_name = op_name_ + "_bias";
|
||||
bias_layer->setName(bias_layer_name.c_str());
|
||||
|
|
|
@ -77,6 +77,7 @@ int PadTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
}
|
||||
transpose_layer_in->setName((op_name_ + "_transpose2NCHW").c_str());
|
||||
pad_input = transpose_layer_in->getOutput(0);
|
||||
MS_LOG(DEBUG) << "after transpose " << GetTensorFormat(pad_input, Format::NCHW);
|
||||
}
|
||||
|
||||
// trt 6 only support 2D padding
|
||||
|
@ -91,8 +92,8 @@ int PadTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
|||
}
|
||||
nvinfer1::DimsHW prePadding{*(padding_data + 2), *(padding_data + 4)};
|
||||
nvinfer1::DimsHW postPadding{*(padding_data + 3), *(padding_data + 5)};
|
||||
MS_LOG(DEBUG) << "prePadding: " << *(padding_data + 2) << ", " << *(padding_data + 4);
|
||||
MS_LOG(DEBUG) << "postPadding: " << *(padding_data + 3) << ", " << *(padding_data + 5);
|
||||
MS_LOG(DEBUG) << op_name_ << " prePadding: " << prePadding.d[0] << ", " << prePadding.d[1]
|
||||
<< "; postPadding: " << postPadding.d[0] << ", " << postPadding.d[1];
|
||||
|
||||
padding_layer = network->addPadding(*pad_input, prePadding, postPadding);
|
||||
} else {
|
||||
|
|
|
@ -154,10 +154,11 @@ int PoolTensorRT::ParseParams() {
|
|||
|
||||
auto padding = pool_primitive->pad();
|
||||
if (padding == nullptr) {
|
||||
MS_LOG(ERROR) << "get padding failed: " << op_name_;
|
||||
return RET_ERROR;
|
||||
MS_LOG(INFO) << "get padding is null, set to default 0: " << op_name_;
|
||||
padding_ = {0, 0, 0, 0};
|
||||
} else {
|
||||
padding_ = std::vector<int64_t>(padding->begin(), padding->end());
|
||||
}
|
||||
padding_ = std::vector<int64_t>(padding->begin(), padding->end());
|
||||
|
||||
pad_mode_ = pool_primitive->pad_mode();
|
||||
activation_type_ = pool_primitive->activation_type();
|
||||
|
|
|
@ -100,10 +100,6 @@ int ResizeTensorRT::SetOutputDims(nvinfer1::ITensor *resize_in_tensor, nvinfer1:
|
|||
}
|
||||
resize_layer->setInput(1, *tensorrt_in_tensors_[1].trt_tensor_);
|
||||
} else {
|
||||
if (in_tensors_[1].ElementNum() != resize_in_tensor->getDimensions().nbDims) {
|
||||
MS_LOG(ERROR) << "output shape tensor value is invalid for " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
switch (in_tensors_[1].DataType()) {
|
||||
case DataType::kNumberTypeFloat32: {
|
||||
const float *shape_data_fp32 = static_cast<const float *>(shape_data);
|
||||
|
@ -119,11 +115,25 @@ int ResizeTensorRT::SetOutputDims(nvinfer1::ITensor *resize_in_tensor, nvinfer1:
|
|||
}
|
||||
break;
|
||||
}
|
||||
case DataType::kNumberTypeInt32: {
|
||||
const int *shape_data_fp16 = static_cast<const int *>(shape_data);
|
||||
for (int i = 0; i < in_tensors_[1].ElementNum(); i++) {
|
||||
out_shape.push_back(*(shape_data_fp16 + i));
|
||||
}
|
||||
break;
|
||||
}
|
||||
default:
|
||||
MS_LOG(WARNING) << op_name_
|
||||
<< " more datatype need to check: " << static_cast<int>(in_tensors_[1].DataType());
|
||||
break;
|
||||
}
|
||||
if (out_shape.size() == DIMENSION_2D &&
|
||||
tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D) {
|
||||
// out_shape: origin_n, out_shape[0], out_shape[1], origin_c
|
||||
out_shape.insert(out_shape.begin(),
|
||||
tensorrt_in_tensors_[0].trt_tensor_->getDimensions().d[0]); // batch size is dynamic
|
||||
out_shape.push_back(in_tensors_[0].Shape()[kNHWC_C]); // channel is const
|
||||
}
|
||||
if (SameDims(out_shape, out_tensors_[0].Shape())) {
|
||||
// static dims
|
||||
resize_layer->setOutputDimensions(ConvertCudaDims(out_shape));
|
||||
|
@ -137,8 +147,12 @@ int ResizeTensorRT::SetOutputDims(nvinfer1::ITensor *resize_in_tensor, nvinfer1:
|
|||
std::copy(out_shape.begin(), out_shape.end(), scales);
|
||||
resize_layer->setScales(scales, out_shape.size());
|
||||
} else {
|
||||
MS_LOG(ERROR) << "output shape tensor value is invalid for " << op_name_;
|
||||
return RET_ERROR;
|
||||
MS_LOG(DEBUG) << op_name_ << " output shape tensor value is const, but set to scales for dynamic input shape";
|
||||
float scales[out_tensors_[0].Shape().size()];
|
||||
for (size_t i = 0; i < out_tensors_[0].Shape().size(); i++) {
|
||||
scales[i] = static_cast<float>(out_tensors_[0].Shape()[i]) / static_cast<float>(in_tensors_[0].Shape()[i]);
|
||||
}
|
||||
resize_layer->setScales(scales, out_tensors_[0].Shape().size());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -165,23 +165,21 @@ int ShuffleTensorRT::AddSqueezeOp(nvinfer1::IShuffleLayer *shuffle_layer) {
|
|||
}
|
||||
|
||||
// axis
|
||||
auto squeeze_shape = in_tensors_[0].Shape();
|
||||
auto begin = std::begin(squeeze_shape);
|
||||
auto squeeze_shape = std::vector<int64_t>(in_tensors_[0].Shape().begin(), in_tensors_[0].Shape().end());
|
||||
auto axis = squeeze_op->axis();
|
||||
if (axis == nullptr) {
|
||||
MS_LOG(ERROR) << "AddSqueezeOp has invalid axis";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < axis->size(); i++) {
|
||||
for (int i = axis->size() - 1; i >= 0; i--) {
|
||||
if (squeeze_shape[axis->Get(i)] != 1) {
|
||||
MS_LOG(WARNING) << "squeeze_shape value is not 1, need check";
|
||||
}
|
||||
squeeze_shape.erase(begin + axis->Get(i));
|
||||
squeeze_shape.erase(squeeze_shape.begin() + axis->Get(i));
|
||||
}
|
||||
|
||||
nvinfer1::Dims squeeze_dims = lite::ConvertCudaDims(squeeze_shape);
|
||||
MS_LOG(DEBUG) << "AddSqueezeOp: " << op_name_ << " squeeze_dims.nbDims: " << squeeze_dims.nbDims;
|
||||
|
||||
shuffle_layer->setReshapeDimensions(squeeze_dims);
|
||||
return shuffle_layer->getOutput(0) == nullptr ? RET_ERROR : RET_OK;
|
||||
|
@ -218,7 +216,7 @@ int ShuffleTensorRT::AddTransposeOp(nvinfer1::IShuffleLayer *shuffle_layer) {
|
|||
MS_LOG(ERROR) << "AddTransposeOp convert failed";
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors_.size() != 2) {
|
||||
if (in_tensors_.size() != INPUT_SIZE2) {
|
||||
MS_LOG(ERROR) << "AddTransposeOp size of in tensort needs check: " << in_tensors_.size();
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
|
|
@ -73,18 +73,19 @@ nvinfer1::ISoftMaxLayer *SoftMaxTensorRT::AddSoftMaxOp(nvinfer1::INetworkDefinit
|
|||
MS_LOG(WARNING) << "axis needs check";
|
||||
}
|
||||
|
||||
if (axis_val[0] >= this->tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims) {
|
||||
if (axis_val[0] >= tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims) {
|
||||
MS_LOG(ERROR) << "axis is larger than input tensor dims.";
|
||||
return nullptr;
|
||||
}
|
||||
int64_t axis_format_value = axis_val[0];
|
||||
int64_t axis_format_value =
|
||||
(axis_val[0] == -1) ? tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims - 1 : axis_val[0];
|
||||
if (tensorrt_in_tensors_[0].trt_tensor_->getDimensions().nbDims == DIMENSION_4D &&
|
||||
tensorrt_in_tensors_[0].format_ == Format::NCHW) {
|
||||
// transpose axis to NCHW
|
||||
axis_format_value = ConvertAxisFromNHWC2NCHW(axis_val[0]);
|
||||
axis_format_value = ConvertAxisFromNHWC2NCHW(axis_format_value);
|
||||
}
|
||||
uint32_t axis_bit = 1 << axis_format_value;
|
||||
MS_LOG(DEBUG) << op_name_ << " set axis to " << axis_bit;
|
||||
MS_LOG(DEBUG) << op_name_ << " axis_value is " << axis_format_value << ", set axis to " << axis_bit;
|
||||
current_layer_->setAxes(axis_bit);
|
||||
return current_layer_;
|
||||
}
|
||||
|
|
|
@ -44,12 +44,8 @@ const std::vector<TensorRTOp *> &TensorRTOp::in_ops() const { return this->in_op
|
|||
const std::vector<TensorRTOp *> &TensorRTOp::out_ops() const { return this->out_ops_; }
|
||||
|
||||
bool TensorRTOp::IsShapeKnown() {
|
||||
if (this->in_tensors_[0].Shape().size() == 0) {
|
||||
if (this->in_tensors_.size() == 1 && this->in_tensors_[0].Shape().size() == 0) {
|
||||
return false;
|
||||
} else {
|
||||
if (this->in_tensors_[0].Shape()[0] == -1) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,88 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "src/delegate/tensorrt/op/topk_tensorrt.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
int TopKTensorRT::IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) {
|
||||
if (!IsShapeKnown()) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor unknown shape: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (in_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported input tensor size, size is " << in_tensors.size();
|
||||
}
|
||||
if (out_tensors.size() != 1) {
|
||||
MS_LOG(ERROR) << "Unsupported output tensor size, size is " << out_tensors.size();
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int TopKTensorRT::AddInnerOp(nvinfer1::INetworkDefinition *network) {
|
||||
if (network == nullptr || this->tensorrt_in_tensors_.size() != 1) {
|
||||
MS_LOG(ERROR) << "network or input tensor is invalid";
|
||||
return RET_ERROR;
|
||||
}
|
||||
nvinfer1::TopKOperation red_op = nvinfer1::TopKOperation::kMAX;
|
||||
int axis_value = 0;
|
||||
int topk = 0;
|
||||
bool keep_dims = false;
|
||||
if (type_ == schema::PrimitiveType_ArgMaxFusion) {
|
||||
red_op = nvinfer1::TopKOperation::kMAX;
|
||||
auto max_prim = op_primitive_->value_as_ArgMaxFusion();
|
||||
if (max_prim == nullptr) {
|
||||
MS_LOG(ERROR) << "convert ArgMaxFusion failed: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
axis_value = max_prim->axis();
|
||||
topk = max_prim->top_k();
|
||||
keep_dims = max_prim->keep_dims();
|
||||
} else if (type_ == schema::PrimitiveType_ArgMinFusion) {
|
||||
red_op = nvinfer1::TopKOperation::kMIN;
|
||||
auto mim_prim = op_primitive_->value_as_ArgMinFusion();
|
||||
if (mim_prim == nullptr) {
|
||||
MS_LOG(ERROR) << "convert ArgMinFusion failed: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
axis_value = mim_prim->axis();
|
||||
topk = mim_prim->top_k();
|
||||
keep_dims = mim_prim->keep_dims();
|
||||
} else {
|
||||
MS_LOG(ERROR) << "invalid op primitive for " << op_name_;
|
||||
}
|
||||
if (keep_dims) {
|
||||
MS_LOG(WARNING) << "keep dims is unsupported for " << op_name_;
|
||||
}
|
||||
|
||||
if (tensorrt_in_tensors_[0].format_ == Format::NCHW) {
|
||||
axis_value = ConvertAxisFromNHWC2NCHW(axis_value);
|
||||
}
|
||||
uint32_t reduce_axes = 1 << axis_value;
|
||||
|
||||
nvinfer1::ITopKLayer *topk_layer = network->addTopK(*tensorrt_in_tensors_[0].trt_tensor_, red_op, topk, reduce_axes);
|
||||
if (topk_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "addTopK failed for: " << op_name_;
|
||||
return RET_ERROR;
|
||||
}
|
||||
topk_layer->setName(op_name_.c_str());
|
||||
|
||||
nvinfer1::ITensor *op_out_tensor = topk_layer->getOutput(0);
|
||||
op_out_tensor->setName((op_name_ + "_output").c_str());
|
||||
this->AddInnerOutTensors(ITensorHelper{op_out_tensor, tensorrt_in_tensors_[0].format_});
|
||||
return RET_OK;
|
||||
}
|
||||
} // namespace mindspore::lite
|
|
@ -0,0 +1,38 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_TOPK_TENSORRT_H_
|
||||
#define MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_TOPK_TENSORRT_H_
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include "src/delegate/tensorrt/op/tensorrt_op.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
class TopKTensorRT : public TensorRTOp {
|
||||
public:
|
||||
TopKTensorRT(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors, const std::string &name)
|
||||
: TensorRTOp(primitive, in_tensors, out_tensors, name) {}
|
||||
|
||||
~TopKTensorRT() override = default;
|
||||
|
||||
int AddInnerOp(nvinfer1::INetworkDefinition *network) override;
|
||||
|
||||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_DELEGATE_TENSORRT_OP_TOPK_TENSORRT_H_
|
|
@ -37,6 +37,7 @@
|
|||
#include "src/delegate/tensorrt/op/pool_tensorrt.h"
|
||||
#include "src/delegate/tensorrt/op/pad_tensorrt.h"
|
||||
#include "src/delegate/tensorrt/op/resize_tensorrt.h"
|
||||
#include "src/delegate/tensorrt/op/topk_tensorrt.h"
|
||||
|
||||
namespace mindspore::lite {
|
||||
TensorRTDelegate::~TensorRTDelegate() {
|
||||
|
@ -85,6 +86,8 @@ Status TensorRTDelegate::Init() {
|
|||
{schema::PrimitiveType_MulFusion, GetTensorRTOp<ElementWiseTensorRT>},
|
||||
{schema::PrimitiveType_Eltwise, GetTensorRTOp<ElementWiseTensorRT>},
|
||||
{schema::PrimitiveType_Minimum, GetTensorRTOp<ElementWiseTensorRT>},
|
||||
{schema::PrimitiveType_Maximum, GetTensorRTOp<ElementWiseTensorRT>},
|
||||
{schema::PrimitiveType_BiasAdd, GetTensorRTOp<ElementWiseTensorRT>},
|
||||
{schema::PrimitiveType_Gather, GetTensorRTOp<GatherTensorRT>},
|
||||
{schema::PrimitiveType_MatMul, GetTensorRTOp<MatMulTensorRT>},
|
||||
{schema::PrimitiveType_FullConnection, GetTensorRTOp<MatMulTensorRT>},
|
||||
|
@ -103,6 +106,7 @@ Status TensorRTDelegate::Init() {
|
|||
{schema::PrimitiveType_Flatten, GetTensorRTOp<ShuffleTensorRT>},
|
||||
{schema::PrimitiveType_ExpandDims, GetTensorRTOp<ShuffleTensorRT>},
|
||||
{schema::PrimitiveType_Softmax, GetTensorRTOp<SoftMaxTensorRT>},
|
||||
{schema::PrimitiveType_ArgMaxFusion, GetTensorRTOp<TopKTensorRT>},
|
||||
{schema::PrimitiveType_Sqrt, GetTensorRTOp<UnaryTensorRT>},
|
||||
};
|
||||
unsupport_hw_op_lists_ = {schema::PrimitiveType_Reshape};
|
||||
|
|
|
@ -105,8 +105,8 @@ int TensorRTSubGraph::SetDeviceConfig() {
|
|||
input_hw_index_ = -1;
|
||||
}
|
||||
|
||||
// config setMaxWorkspaceSize to 32 MB for max limit
|
||||
config_->setMaxWorkspaceSize(32 * (1 << 20));
|
||||
// config setMaxWorkspaceSize to 1024 MB for max limit
|
||||
config_->setMaxWorkspaceSize(1024 * (1 << 20));
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -231,7 +231,7 @@ int TensorRTSubGraph::BuildTensorRTGraph() {
|
|||
MS_LOG(ERROR) << "Weight Tensor data is nullptr.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
trt_tensor.trt_tensor_ = lite::ConvertConstantTensor(this->network_, in_tensor);
|
||||
trt_tensor.trt_tensor_ = lite::ConvertConstantTensor(this->network_, in_tensor, cur_op->GetOpName());
|
||||
trt_tensor.format_ = Format::NHWC;
|
||||
MS_LOG(INFO) << "auto convert constant tensor for: " << in_tensor.Name();
|
||||
cur_op->AddInnerInTensors(trt_tensor);
|
||||
|
|
|
@ -45,7 +45,8 @@ class TensorRTSubGraph : public kernel::Kernel {
|
|||
schema::PrimitiveType_MatMul, schema::PrimitiveType_PowFusion, schema::PrimitiveType_Eltwise,
|
||||
schema::PrimitiveType_ScaleFusion, schema::PrimitiveType_MulFusion, schema::PrimitiveType_Minimum,
|
||||
schema::PrimitiveType_StridedSlice, schema::PrimitiveType_PadFusion, schema::PrimitiveType_FullConnection,
|
||||
schema::PrimitiveType_Cast, schema::PrimitiveType_ExpandDims};
|
||||
schema::PrimitiveType_Cast, schema::PrimitiveType_ExpandDims, schema::PrimitiveType_Resize,
|
||||
schema::PrimitiveType_Maximum, schema::PrimitiveType_BiasAdd};
|
||||
if (!support_resize) {
|
||||
input_batchsize_index_ = -1;
|
||||
input_hw_index_ = -1;
|
||||
|
|
|
@ -70,6 +70,18 @@ std::vector<int64_t> ConvertMSShape(const nvinfer1::Dims dims) {
|
|||
return shape;
|
||||
}
|
||||
|
||||
std::vector<int64_t> NHWC2NCHW(std::vector<int64_t> nhwc_shape) {
|
||||
std::vector<int64_t> nchw_shape;
|
||||
if (nhwc_shape.size() != DIMENSION_4D) {
|
||||
return nhwc_shape;
|
||||
}
|
||||
nchw_shape.push_back(nhwc_shape[kNHWC_N]);
|
||||
nchw_shape.push_back(nhwc_shape[kNHWC_C]);
|
||||
nchw_shape.push_back(nhwc_shape[kNHWC_H]);
|
||||
nchw_shape.push_back(nhwc_shape[kNHWC_W]);
|
||||
return nchw_shape;
|
||||
}
|
||||
|
||||
nvinfer1::IShuffleLayer *SetTranspose(nvinfer1::INetworkDefinition *network, const nvinfer1::ITensor &input,
|
||||
nvinfer1::Permutation permutation) {
|
||||
nvinfer1::IShuffleLayer *layer = network->addShuffle(const_cast<nvinfer1::ITensor &>(input));
|
||||
|
@ -111,7 +123,8 @@ nvinfer1::IShuffleLayer *NCHW2NHWC(nvinfer1::INetworkDefinition *network, const
|
|||
return SetTranspose(network, input, perm);
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *ConvertConstantTensor(nvinfer1::INetworkDefinition *network, const mindspore::MSTensor &ms_tensor) {
|
||||
nvinfer1::ITensor *ConvertConstantTensor(nvinfer1::INetworkDefinition *network, const mindspore::MSTensor &ms_tensor,
|
||||
const std::string &op_name) {
|
||||
if (network == nullptr) {
|
||||
MS_LOG(ERROR) << "network is null for ConvertConstantTensor";
|
||||
return nullptr;
|
||||
|
@ -128,13 +141,13 @@ nvinfer1::ITensor *ConvertConstantTensor(nvinfer1::INetworkDefinition *network,
|
|||
MS_LOG(ERROR) << "create constant_tensor failed.";
|
||||
return nullptr;
|
||||
}
|
||||
auto name = ms_tensor.Name() + "_constant_layer";
|
||||
auto name = ms_tensor.Name() + "_" + op_name;
|
||||
constant_tensor->setName(name.c_str());
|
||||
return constant_tensor->getOutput(0);
|
||||
}
|
||||
|
||||
nvinfer1::ITensor *ConvertScalarToITensor(nvinfer1::INetworkDefinition *network, size_t shape_size, const void *value,
|
||||
const DataType data_type) {
|
||||
const DataType data_type, const std::string &op_name) {
|
||||
nvinfer1::Dims dims = ConvertCudaDims(1, shape_size);
|
||||
nvinfer1::Weights weights{ConvertDataType(data_type), value, 1};
|
||||
nvinfer1::IConstantLayer *constant_tensor = network->addConstant(dims, weights);
|
||||
|
@ -142,6 +155,8 @@ nvinfer1::ITensor *ConvertScalarToITensor(nvinfer1::INetworkDefinition *network,
|
|||
MS_LOG(ERROR) << "create constant_tensor failed.";
|
||||
return nullptr;
|
||||
}
|
||||
auto name = op_name + "_constant";
|
||||
constant_tensor->setName(name.c_str());
|
||||
return constant_tensor->getOutput(0);
|
||||
}
|
||||
|
||||
|
@ -170,7 +185,8 @@ ActivationParams ConvertActivationType(schema::ActivationType activation_type) {
|
|||
}
|
||||
|
||||
nvinfer1::ITensor *ConvertTensorWithExpandDims(nvinfer1::INetworkDefinition *network,
|
||||
const mindspore::MSTensor &ms_tensor, size_t expand_shape_size) {
|
||||
const mindspore::MSTensor &ms_tensor, size_t expand_shape_size,
|
||||
const std::string &op_name) {
|
||||
if (network == nullptr) {
|
||||
MS_LOG(ERROR) << "network is null for ConvertConstantTensor";
|
||||
return nullptr;
|
||||
|
@ -197,7 +213,7 @@ nvinfer1::ITensor *ConvertTensorWithExpandDims(nvinfer1::INetworkDefinition *net
|
|||
MS_LOG(ERROR) << "create constant_tensor failed.";
|
||||
return nullptr;
|
||||
}
|
||||
auto name = ms_tensor.Name() + "_constant_layer";
|
||||
auto name = ms_tensor.Name() + "_" + op_name;
|
||||
constant_tensor->setName(name.c_str());
|
||||
return constant_tensor->getOutput(0);
|
||||
}
|
||||
|
|
|
@ -28,6 +28,11 @@
|
|||
#define kNCHW_C 1
|
||||
#define kNCHW_H 2
|
||||
#define kNCHW_W 3
|
||||
#define kNHWC_N 0
|
||||
#define kNHWC_H 1
|
||||
#define kNHWC_W 2
|
||||
#define kNHWC_C 3
|
||||
|
||||
namespace mindspore::lite {
|
||||
struct ActivationParams {
|
||||
nvinfer1::ActivationType activation_type;
|
||||
|
@ -51,6 +56,8 @@ bool SameDims(nvinfer1::Dims dims, const std::vector<int64_t> &shape);
|
|||
|
||||
std::vector<int64_t> ConvertMSShape(const nvinfer1::Dims dims);
|
||||
|
||||
std::vector<int64_t> NHWC2NCHW(std::vector<int64_t> nhwc_shape);
|
||||
|
||||
nvinfer1::DataType ConvertDataType(DataType type_id);
|
||||
|
||||
nvinfer1::IShuffleLayer *NHWC2NCHW(nvinfer1::INetworkDefinition *network, const nvinfer1::ITensor &input);
|
||||
|
@ -59,13 +66,15 @@ nvinfer1::IShuffleLayer *NCHW2NHWC(nvinfer1::INetworkDefinition *network, const
|
|||
|
||||
ActivationParams ConvertActivationType(schema::ActivationType activation_type);
|
||||
|
||||
nvinfer1::ITensor *ConvertConstantTensor(nvinfer1::INetworkDefinition *network, const mindspore::MSTensor &ms_tensor);
|
||||
nvinfer1::ITensor *ConvertConstantTensor(nvinfer1::INetworkDefinition *network, const mindspore::MSTensor &ms_tensor,
|
||||
const std::string &op_name);
|
||||
|
||||
nvinfer1::ITensor *ConvertTensorWithExpandDims(nvinfer1::INetworkDefinition *network,
|
||||
const mindspore::MSTensor &ms_tensor, size_t expand_shape_size);
|
||||
const mindspore::MSTensor &ms_tensor, size_t expand_shape_size,
|
||||
const std::string &op_name);
|
||||
|
||||
nvinfer1::ITensor *ConvertScalarToITensor(nvinfer1::INetworkDefinition *network, size_t shape_size, const void *value,
|
||||
const DataType data_type);
|
||||
const DataType data_type, const std::string &op_name);
|
||||
|
||||
nvinfer1::Weights TransposeWeight(const mindspore::MSTensor &ms_tensor, void **pack_weight);
|
||||
|
||||
|
|
|
@ -1 +1,3 @@
|
|||
gender_resnet34_lzl.onnx;1:input.1
|
||||
ml_video_edit_person_divison_pic;1:blob1
|
||||
ml_video_edit_person_divison_video;2:blob1,blob2
|
||||
|
|
|
@ -4,18 +4,55 @@
|
|||
function Run_TensorRT() {
|
||||
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:./
|
||||
source /etc/profile
|
||||
local line_info model_info spec_acc_limit model_name input_num input_shapes \
|
||||
mode model_file input_files output_file data_path acc_limit enableFp16 \
|
||||
run_result
|
||||
|
||||
while read line; do
|
||||
model_name=${line%;*}
|
||||
# length=${#model_name}
|
||||
# input_shapes=${line:length+1}
|
||||
if [[ $model_name == \#* ]]; then
|
||||
continue
|
||||
line_info=${line}
|
||||
if [[ $line_info == \#* || $line_info == "" ]]; then
|
||||
continue
|
||||
fi
|
||||
echo ${model_name} >> "${run_tensorrt_log_file}"
|
||||
# inputshape needed later
|
||||
echo 'CUDA_VISILE_DEVICE='${cuda_device_id}' ./benchmark --modelFile='${basepath}'/'${model_name}'.ms --inDataFile='${basepath}'/../../input_output/input/'${model_name}'.ms.bin --benchmarkDataFile='${basepath}'/../../input_output/output/'${model_name}'.ms.out --device=GPU' >> "${run_tensorrt_log_file}"
|
||||
CUDA_VISILE_DEVICE=${cuda_device_id} ./benchmark --modelFile=$basepath/${model_name}.ms --inDataFile=${basepath}/../../input_output/input/${model_name}.ms.bin --benchmarkDataFile=${basepath}/../../input_output/output/${model_name}.ms.out --device=GPU >> ${run_tensorrt_log_file}
|
||||
#model_name.onnx;2:input1,input2;1,16,16,4;fp16 0.5
|
||||
model_info=`echo ${line_info} | awk -F ' ' '{print $1}'`
|
||||
spec_acc_limit=`echo ${line_info} | awk -F ' ' '{print $2}'`
|
||||
model_name=`echo ${model_info} | awk -F ';' '{print $1}'`
|
||||
input_info=`echo ${model_info} | awk -F ';' '{print $2}'`
|
||||
input_shapes=`echo ${model_info} | awk -F ';' '{print $3}'`
|
||||
mode=`echo ${model_info} | awk -F ';' '{print $3}'`
|
||||
input_num=`echo ${input_info} | sed 's/:/;/' | awk -F ';' '{print $1}'`
|
||||
if [[ ${model_name##*.} == "caffemodel" ]]; then
|
||||
model_name=${model_name%.*}
|
||||
fi
|
||||
echo "Benchmarking ${model_name} ......"
|
||||
model_file=${basepath}'/'${model_name}'.ms'
|
||||
input_files=""
|
||||
output_file=""
|
||||
data_path=${basepath}'/../../input_output/'
|
||||
if [[ ${input_num} == "" || ${input_num} == 1 ]]; then
|
||||
input_files=${data_path}'input/'${model_name}'.ms.bin'
|
||||
else
|
||||
for i in $(seq 1 $input_num)
|
||||
do
|
||||
input_files=${input_files}${data_path}'input/'${model_name}'.ms.bin_'$i','
|
||||
done
|
||||
fi
|
||||
output_file=${data_path}'output/'${model_name}'.ms.out'
|
||||
# set accuracy limitation
|
||||
acc_limit="0.5"
|
||||
if [[ ${spec_acc_limit} != "" ]]; then
|
||||
acc_limit="${spec_acc_limit}"
|
||||
elif [[ ${mode} == "fp16" ]]; then
|
||||
acc_limit="5"
|
||||
fi
|
||||
# whether enable fp16
|
||||
enableFp16="false"
|
||||
if [[ ${mode} == "fp16" ]]; then
|
||||
enableFp16="true"
|
||||
fi
|
||||
|
||||
echo 'CUDA_VISILE_DEVICE='${cuda_device_id}' ./benchmark --modelFile='${model_file}' --inputShapes='${input_shapes}' --inDataFile='${input_files}' --benchmarkDataFile='${output_file}' --enableFp16='${enableFp16}' --accuracyThreshold='${acc_limit}' --device=GPU' >> "${run_tensorrt_log_file}"
|
||||
CUDA_VISILE_DEVICE=${cuda_device_id} ./benchmark --modelFile=${model_file} --inputShapes=${input_shapes} --inDataFile=${input_files} --benchmarkDataFile=${output_file} --enableFp16=${enableFp16} --accuracyThreshold=${acc_limit} --device=GPU >> ${run_tensorrt_log_file}
|
||||
if [ $? = 0 ]; then
|
||||
run_result='TensorRT: '${model_name}' pass'; echo ${run_result} >> ${run_benchmark_result_file}
|
||||
else
|
||||
|
|
|
@ -40,7 +40,7 @@ function Run_Tensorrt() {
|
|||
# copy related files to benchmark_test
|
||||
cp -a ./tools/benchmark/benchmark ${benchmark_test_path}/benchmark || exit 1
|
||||
cp -a ./runtime/lib/lib*.so* ${benchmark_test_path}/ || exit 1
|
||||
cp -a ./runtime/third_party/glog/libglog.so.0 ${benchmark_test_path}/libglog.so.0 || exit 1
|
||||
cp -a ./runtime/third_party/glog/lib*.so* ${benchmark_test_path}/ || exit 1
|
||||
|
||||
echo "start push files to nvidia device ${device_ip} : ${cuda_device_id}"
|
||||
ssh tensorrt@${device_ip} "cd ${device_benchmark_test_path}; rm -rf ./*"
|
||||
|
|
Loading…
Reference in New Issue