make sigmoid and normalize support FP16

This commit is contained in:
CHEN YU 2022-06-23 14:50:12 +08:00
parent 88ca0b91e2
commit d543a5c7f0
8 changed files with 56 additions and 21 deletions

View File

@ -76,11 +76,12 @@ int ActivationTensorRT::AddInnerOp(TensorRTContext *ctx) {
TRTTensorCast(ctx, tensorrt_in_tensors_[0].trt_tensor_, nvinfer1::DataType::kFLOAT, op_name_ + "_cast_in");
}
auto runtime_precision_mode = runtime_->GetRuntimePrecisionMode();
auto activation_layer =
ActivationTensorRT::AddActivation(ctx, activation_op->activation_type(), alpha,
std::isfinite(activation_op->min_val()) ? activation_op->min_val() : FLT_MIN,
std::isfinite(activation_op->max_val()) ? activation_op->max_val() : FLT_MAX,
activation_input, device_id_, quant_type_);
activation_input, device_id_, quant_type_, runtime_precision_mode);
if (activation_layer == nullptr) {
MS_LOG(ERROR) << "add activation op failed for TensorRT.";
return RET_ERROR;
@ -102,10 +103,12 @@ int ActivationTensorRT::AddInnerOp(TensorRTContext *ctx) {
nvinfer1::ILayer *ActivationTensorRT::AddActivation(TensorRTContext *ctx, schema::ActivationType activation_type,
float alpha, float min_value, float max_value,
nvinfer1::ITensor *trt_in_tensor, uint32_t device_id,
schema::QuantType quant_type) {
schema::QuantType quant_type,
RuntimePrecisionMode runtime_precision_mode) {
bool has_custom_plugin = HasCustomActivationPlugin(activation_type);
// sigmoid precision is wrong for trt
if (quant_type == schema::QuantType_QUANT_NONE && has_custom_plugin) {
if (runtime_precision_mode == RuntimePrecisionMode::RuntimePrecisionMode_FP32 &&
quant_type == schema::QuantType_QUANT_NONE && has_custom_plugin) {
std::string layer_name = std::string(trt_in_tensor->getName()) + "_activation";
auto plugin = std::make_shared<ActivationOptPlugin>(layer_name.c_str(), activation_type, device_id);
MS_LOG(INFO) << "using opt plugin for " << layer_name;
@ -139,6 +142,18 @@ nvinfer1::ILayer *ActivationTensorRT::AddActivation(TensorRTContext *ctx, schema
return activation_layer;
}
if (activation_type == schema::ActivationType_SWISH) {
auto sigmoid_tensor = activation_layer->getOutput(0);
nvinfer1::ElementWiseOperation element_wise_op_ = nvinfer1::ElementWiseOperation::kPROD;
nvinfer1::IElementWiseLayer *swish_layer =
ctx->network()->addElementWise(*sigmoid_tensor, *trt_in_tensor, element_wise_op_);
if (swish_layer == nullptr) {
MS_LOG(ERROR) << "add activation op failed for TensorRT.";
return nullptr;
}
return swish_layer;
}
if (action_param.has_alpha) {
activation_layer->setAlpha(alpha);
}

View File

@ -34,10 +34,11 @@ class ActivationTensorRT : public TensorRTOp {
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
static nvinfer1::ILayer *AddActivation(TensorRTContext *ctx, schema::ActivationType activation_type, float alpha,
float min_value, float max_value, nvinfer1::ITensor *trt_in_tensor,
uint32_t device_id = 0,
schema::QuantType quant_type = schema::QuantType_QUANT_NONE);
static nvinfer1::ILayer *AddActivation(
TensorRTContext *ctx, schema::ActivationType activation_type, float alpha, float min_value, float max_value,
nvinfer1::ITensor *trt_in_tensor, uint32_t device_id = 0,
schema::QuantType quant_type = schema::QuantType_QUANT_NONE,
RuntimePrecisionMode runtime_precision_mode = RuntimePrecisionMode::RuntimePrecisionMode_FP32);
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_ACTIVATION_TENSORRT_H_

View File

@ -159,7 +159,9 @@ int NormalizeTensorRT::RunAsTrtOps(TensorRTContext *ctx) {
}
bool NormalizeTensorRT::RunOptPlugin() {
if (out_tensors_.size() == 1 && in_tensors_.size() == INPUT_SIZE3 && axis_ == in_tensors_[0].Shape().size() - 1 &&
auto precision_mode = runtime_->GetRuntimePrecisionMode();
if (precision_mode == RuntimePrecisionMode::RuntimePrecisionMode_FP32 && out_tensors_.size() == 1 &&
in_tensors_.size() == INPUT_SIZE3 && axis_ == in_tensors_[0].Shape().size() - 1 &&
in_tensors_[0].Shape()[axis_] < GET_THREADS) {
// insufficient shared memory
int dim_sum = std::accumulate(in_tensors_[0].Shape().begin(), in_tensors_[0].Shape().begin() + axis_, 1,

View File

@ -210,9 +210,9 @@ std::experimental::optional<ActivationParams> TryConvertActivationType(schema::A
{schema::ActivationType_RELU6, ActivationParams{nvinfer1::ActivationType::kCLIP, true, 0, true, 6}},
{schema::ActivationType_RELU1, ActivationParams{nvinfer1::ActivationType::kCLIP, true, 0, true, 1}},
{schema::ActivationType_HARD_TANH, ActivationParams{nvinfer1::ActivationType::kCLIP, true, -1, true, 1}},
{schema::ActivationType_SWISH, ActivationParams{nvinfer1::ActivationType::kSIGMOID, false, 0, false, 0}},
// using plugin
{schema::ActivationType_GELU, ActivationParams{nvinfer1::ActivationType::kTHRESHOLDED_RELU, false, 0, false, 0}},
{schema::ActivationType_SWISH, ActivationParams{nvinfer1::ActivationType::kTHRESHOLDED_RELU, false, 0, false, 0}}};
{schema::ActivationType_GELU, ActivationParams{nvinfer1::ActivationType::kTHRESHOLDED_RELU, false, 0, false, 0}}};
return action_map.find(activation_type) != action_map.end()
? std::experimental::optional<ActivationParams>(action_map[activation_type])
: std::experimental::nullopt;

View File

@ -75,12 +75,12 @@ int ActivationTensorRT::AddInnerOp(TensorRTContext *ctx) {
activation_input =
TRTTensorCast(ctx, tensorrt_in_tensors_[0].trt_tensor_, nvinfer1::DataType::kFLOAT, op_name_ + "_cast_in");
}
auto runtime_precision_mode = runtime_->GetRuntimePrecisionMode();
auto activation_layer =
ActivationTensorRT::AddActivation(ctx, activation_op->activation_type(), alpha,
std::isfinite(activation_op->min_val()) ? activation_op->min_val() : FLT_MIN,
std::isfinite(activation_op->max_val()) ? activation_op->max_val() : FLT_MAX,
activation_input, device_id_, quant_type_);
activation_input, device_id_, quant_type_, runtime_precision_mode);
if (activation_layer == nullptr) {
MS_LOG(ERROR) << "add activation op failed for TensorRT.";
return RET_ERROR;
@ -102,10 +102,12 @@ int ActivationTensorRT::AddInnerOp(TensorRTContext *ctx) {
nvinfer1::ILayer *ActivationTensorRT::AddActivation(TensorRTContext *ctx, schema::ActivationType activation_type,
float alpha, float min_value, float max_value,
nvinfer1::ITensor *trt_in_tensor, uint32_t device_id,
schema::QuantType quant_type) {
schema::QuantType quant_type,
RuntimePrecisionMode runtime_precision_mode) {
bool has_custom_plugin = HasCustomActivationPlugin(activation_type);
// sigmoid precision is wrong for trt
if (quant_type == schema::QuantType_QUANT_NONE && has_custom_plugin) {
if (runtime_precision_mode == RuntimePrecisionMode::RuntimePrecisionMode_FP32 &&
quant_type == schema::QuantType_QUANT_NONE && has_custom_plugin) {
std::string layer_name = std::string(trt_in_tensor->getName()) + "_activation";
auto plugin = std::make_shared<ActivationOptPlugin>(layer_name.c_str(), activation_type, device_id);
MS_LOG(INFO) << "using opt plugin for " << layer_name;
@ -139,6 +141,18 @@ nvinfer1::ILayer *ActivationTensorRT::AddActivation(TensorRTContext *ctx, schema
return activation_layer;
}
if (activation_type == schema::ActivationType_SWISH) {
auto sigmoid_tensor = activation_layer->getOutput(0);
nvinfer1::ElementWiseOperation element_wise_op_ = nvinfer1::ElementWiseOperation::kPROD;
nvinfer1::IElementWiseLayer *swish_layer =
ctx->network()->addElementWise(*sigmoid_tensor, *trt_in_tensor, element_wise_op_);
if (swish_layer == nullptr) {
MS_LOG(ERROR) << "add activation op failed for TensorRT.";
return nullptr;
}
return swish_layer;
}
if (action_param.has_alpha) {
activation_layer->setAlpha(alpha);
}

View File

@ -34,10 +34,11 @@ class ActivationTensorRT : public TensorRTOp {
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
const std::vector<mindspore::MSTensor> &out_tensors) override;
static nvinfer1::ILayer *AddActivation(TensorRTContext *ctx, schema::ActivationType activation_type, float alpha,
float min_value, float max_value, nvinfer1::ITensor *trt_in_tensor,
uint32_t device_id = 0,
schema::QuantType quant_type = schema::QuantType_QUANT_NONE);
static nvinfer1::ILayer *AddActivation(
TensorRTContext *ctx, schema::ActivationType activation_type, float alpha, float min_value, float max_value,
nvinfer1::ITensor *trt_in_tensor, uint32_t device_id = 0,
schema::QuantType quant_type = schema::QuantType_QUANT_NONE,
RuntimePrecisionMode runtime_precision_mode = RuntimePrecisionMode::RuntimePrecisionMode_FP32);
};
} // namespace mindspore::lite
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_ACTIVATION_TENSORRT_H_

View File

@ -160,7 +160,9 @@ int NormalizeTensorRT::RunAsTrtOps(TensorRTContext *ctx) {
}
bool NormalizeTensorRT::RunOptPlugin() {
if (out_tensors_.size() == 1 && in_tensors_.size() == INPUT_SIZE3 && axis_ == in_tensors_[0].Shape().size() - 1 &&
auto precision_mode = runtime_->GetRuntimePrecisionMode();
if (precision_mode == RuntimePrecisionMode::RuntimePrecisionMode_FP32 && out_tensors_.size() == 1 &&
in_tensors_.size() == INPUT_SIZE3 && axis_ == in_tensors_[0].Shape().size() - 1 &&
in_tensors_[0].Shape()[axis_] < GET_THREADS) {
// insufficient shared memory
int dim_sum = std::accumulate(in_tensors_[0].Shape().begin(), in_tensors_[0].Shape().begin() + axis_, 1,

View File

@ -210,9 +210,9 @@ std::experimental::optional<ActivationParams> TryConvertActivationType(schema::A
{schema::ActivationType_RELU6, ActivationParams{nvinfer1::ActivationType::kCLIP, true, 0, true, 6}},
{schema::ActivationType_RELU1, ActivationParams{nvinfer1::ActivationType::kCLIP, true, 0, true, 1}},
{schema::ActivationType_HARD_TANH, ActivationParams{nvinfer1::ActivationType::kCLIP, true, -1, true, 1}},
{schema::ActivationType_SWISH, ActivationParams{nvinfer1::ActivationType::kSIGMOID, false, 0, false, 0}},
// using plugin
{schema::ActivationType_GELU, ActivationParams{nvinfer1::ActivationType::kTHRESHOLDED_RELU, false, 0, false, 0}},
{schema::ActivationType_SWISH, ActivationParams{nvinfer1::ActivationType::kTHRESHOLDED_RELU, false, 0, false, 0}}};
{schema::ActivationType_GELU, ActivationParams{nvinfer1::ActivationType::kTHRESHOLDED_RELU, false, 0, false, 0}}};
return action_map.find(activation_type) != action_map.end()
? std::experimental::optional<ActivationParams>(action_map[activation_type])
: std::experimental::nullopt;