diff --git a/mindspore/lite/src/extendrt/delegate/tensorrt/op/activation_tensorrt.cc b/mindspore/lite/src/extendrt/delegate/tensorrt/op/activation_tensorrt.cc index 980d2ebe6b0..22e3e190373 100644 --- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/activation_tensorrt.cc +++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/activation_tensorrt.cc @@ -76,11 +76,12 @@ int ActivationTensorRT::AddInnerOp(TensorRTContext *ctx) { TRTTensorCast(ctx, tensorrt_in_tensors_[0].trt_tensor_, nvinfer1::DataType::kFLOAT, op_name_ + "_cast_in"); } + auto runtime_precision_mode = runtime_->GetRuntimePrecisionMode(); auto activation_layer = ActivationTensorRT::AddActivation(ctx, activation_op->activation_type(), alpha, std::isfinite(activation_op->min_val()) ? activation_op->min_val() : FLT_MIN, std::isfinite(activation_op->max_val()) ? activation_op->max_val() : FLT_MAX, - activation_input, device_id_, quant_type_); + activation_input, device_id_, quant_type_, runtime_precision_mode); if (activation_layer == nullptr) { MS_LOG(ERROR) << "add activation op failed for TensorRT."; return RET_ERROR; @@ -102,10 +103,12 @@ int ActivationTensorRT::AddInnerOp(TensorRTContext *ctx) { nvinfer1::ILayer *ActivationTensorRT::AddActivation(TensorRTContext *ctx, schema::ActivationType activation_type, float alpha, float min_value, float max_value, nvinfer1::ITensor *trt_in_tensor, uint32_t device_id, - schema::QuantType quant_type) { + schema::QuantType quant_type, + RuntimePrecisionMode runtime_precision_mode) { bool has_custom_plugin = HasCustomActivationPlugin(activation_type); // sigmoid precision is wrong for trt - if (quant_type == schema::QuantType_QUANT_NONE && has_custom_plugin) { + if (runtime_precision_mode == RuntimePrecisionMode::RuntimePrecisionMode_FP32 && + quant_type == schema::QuantType_QUANT_NONE && has_custom_plugin) { std::string layer_name = std::string(trt_in_tensor->getName()) + "_activation"; auto plugin = std::make_shared(layer_name.c_str(), activation_type, device_id); MS_LOG(INFO) << "using opt plugin for " << layer_name; @@ -139,6 +142,18 @@ nvinfer1::ILayer *ActivationTensorRT::AddActivation(TensorRTContext *ctx, schema return activation_layer; } + if (activation_type == schema::ActivationType_SWISH) { + auto sigmoid_tensor = activation_layer->getOutput(0); + nvinfer1::ElementWiseOperation element_wise_op_ = nvinfer1::ElementWiseOperation::kPROD; + nvinfer1::IElementWiseLayer *swish_layer = + ctx->network()->addElementWise(*sigmoid_tensor, *trt_in_tensor, element_wise_op_); + if (swish_layer == nullptr) { + MS_LOG(ERROR) << "add activation op failed for TensorRT."; + return nullptr; + } + return swish_layer; + } + if (action_param.has_alpha) { activation_layer->setAlpha(alpha); } diff --git a/mindspore/lite/src/extendrt/delegate/tensorrt/op/activation_tensorrt.h b/mindspore/lite/src/extendrt/delegate/tensorrt/op/activation_tensorrt.h index 8b71d5e91ed..c01a747d454 100644 --- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/activation_tensorrt.h +++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/activation_tensorrt.h @@ -34,10 +34,11 @@ class ActivationTensorRT : public TensorRTOp { int IsSupport(const schema::Primitive *primitive, const std::vector &in_tensors, const std::vector &out_tensors) override; - static nvinfer1::ILayer *AddActivation(TensorRTContext *ctx, schema::ActivationType activation_type, float alpha, - float min_value, float max_value, nvinfer1::ITensor *trt_in_tensor, - uint32_t device_id = 0, - schema::QuantType quant_type = schema::QuantType_QUANT_NONE); + static nvinfer1::ILayer *AddActivation( + TensorRTContext *ctx, schema::ActivationType activation_type, float alpha, float min_value, float max_value, + nvinfer1::ITensor *trt_in_tensor, uint32_t device_id = 0, + schema::QuantType quant_type = schema::QuantType_QUANT_NONE, + RuntimePrecisionMode runtime_precision_mode = RuntimePrecisionMode::RuntimePrecisionMode_FP32); }; } // namespace mindspore::lite #endif // MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_ACTIVATION_TENSORRT_H_ diff --git a/mindspore/lite/src/extendrt/delegate/tensorrt/op/normalize_tensorrt.cc b/mindspore/lite/src/extendrt/delegate/tensorrt/op/normalize_tensorrt.cc index 188346ce71a..88255d40e3d 100644 --- a/mindspore/lite/src/extendrt/delegate/tensorrt/op/normalize_tensorrt.cc +++ b/mindspore/lite/src/extendrt/delegate/tensorrt/op/normalize_tensorrt.cc @@ -159,7 +159,9 @@ int NormalizeTensorRT::RunAsTrtOps(TensorRTContext *ctx) { } bool NormalizeTensorRT::RunOptPlugin() { - if (out_tensors_.size() == 1 && in_tensors_.size() == INPUT_SIZE3 && axis_ == in_tensors_[0].Shape().size() - 1 && + auto precision_mode = runtime_->GetRuntimePrecisionMode(); + if (precision_mode == RuntimePrecisionMode::RuntimePrecisionMode_FP32 && out_tensors_.size() == 1 && + in_tensors_.size() == INPUT_SIZE3 && axis_ == in_tensors_[0].Shape().size() - 1 && in_tensors_[0].Shape()[axis_] < GET_THREADS) { // insufficient shared memory int dim_sum = std::accumulate(in_tensors_[0].Shape().begin(), in_tensors_[0].Shape().begin() + axis_, 1, diff --git a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_utils.cc b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_utils.cc index b18d1389a25..fcd5abeff93 100644 --- a/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_utils.cc +++ b/mindspore/lite/src/extendrt/delegate/tensorrt/tensorrt_utils.cc @@ -210,9 +210,9 @@ std::experimental::optional TryConvertActivationType(schema::A {schema::ActivationType_RELU6, ActivationParams{nvinfer1::ActivationType::kCLIP, true, 0, true, 6}}, {schema::ActivationType_RELU1, ActivationParams{nvinfer1::ActivationType::kCLIP, true, 0, true, 1}}, {schema::ActivationType_HARD_TANH, ActivationParams{nvinfer1::ActivationType::kCLIP, true, -1, true, 1}}, + {schema::ActivationType_SWISH, ActivationParams{nvinfer1::ActivationType::kSIGMOID, false, 0, false, 0}}, // using plugin - {schema::ActivationType_GELU, ActivationParams{nvinfer1::ActivationType::kTHRESHOLDED_RELU, false, 0, false, 0}}, - {schema::ActivationType_SWISH, ActivationParams{nvinfer1::ActivationType::kTHRESHOLDED_RELU, false, 0, false, 0}}}; + {schema::ActivationType_GELU, ActivationParams{nvinfer1::ActivationType::kTHRESHOLDED_RELU, false, 0, false, 0}}}; return action_map.find(activation_type) != action_map.end() ? std::experimental::optional(action_map[activation_type]) : std::experimental::nullopt; diff --git a/mindspore/lite/src/runtime/delegate/tensorrt/op/activation_tensorrt.cc b/mindspore/lite/src/runtime/delegate/tensorrt/op/activation_tensorrt.cc index e78ec89dddc..a091983c2f1 100644 --- a/mindspore/lite/src/runtime/delegate/tensorrt/op/activation_tensorrt.cc +++ b/mindspore/lite/src/runtime/delegate/tensorrt/op/activation_tensorrt.cc @@ -75,12 +75,12 @@ int ActivationTensorRT::AddInnerOp(TensorRTContext *ctx) { activation_input = TRTTensorCast(ctx, tensorrt_in_tensors_[0].trt_tensor_, nvinfer1::DataType::kFLOAT, op_name_ + "_cast_in"); } - + auto runtime_precision_mode = runtime_->GetRuntimePrecisionMode(); auto activation_layer = ActivationTensorRT::AddActivation(ctx, activation_op->activation_type(), alpha, std::isfinite(activation_op->min_val()) ? activation_op->min_val() : FLT_MIN, std::isfinite(activation_op->max_val()) ? activation_op->max_val() : FLT_MAX, - activation_input, device_id_, quant_type_); + activation_input, device_id_, quant_type_, runtime_precision_mode); if (activation_layer == nullptr) { MS_LOG(ERROR) << "add activation op failed for TensorRT."; return RET_ERROR; @@ -102,10 +102,12 @@ int ActivationTensorRT::AddInnerOp(TensorRTContext *ctx) { nvinfer1::ILayer *ActivationTensorRT::AddActivation(TensorRTContext *ctx, schema::ActivationType activation_type, float alpha, float min_value, float max_value, nvinfer1::ITensor *trt_in_tensor, uint32_t device_id, - schema::QuantType quant_type) { + schema::QuantType quant_type, + RuntimePrecisionMode runtime_precision_mode) { bool has_custom_plugin = HasCustomActivationPlugin(activation_type); // sigmoid precision is wrong for trt - if (quant_type == schema::QuantType_QUANT_NONE && has_custom_plugin) { + if (runtime_precision_mode == RuntimePrecisionMode::RuntimePrecisionMode_FP32 && + quant_type == schema::QuantType_QUANT_NONE && has_custom_plugin) { std::string layer_name = std::string(trt_in_tensor->getName()) + "_activation"; auto plugin = std::make_shared(layer_name.c_str(), activation_type, device_id); MS_LOG(INFO) << "using opt plugin for " << layer_name; @@ -139,6 +141,18 @@ nvinfer1::ILayer *ActivationTensorRT::AddActivation(TensorRTContext *ctx, schema return activation_layer; } + if (activation_type == schema::ActivationType_SWISH) { + auto sigmoid_tensor = activation_layer->getOutput(0); + nvinfer1::ElementWiseOperation element_wise_op_ = nvinfer1::ElementWiseOperation::kPROD; + nvinfer1::IElementWiseLayer *swish_layer = + ctx->network()->addElementWise(*sigmoid_tensor, *trt_in_tensor, element_wise_op_); + if (swish_layer == nullptr) { + MS_LOG(ERROR) << "add activation op failed for TensorRT."; + return nullptr; + } + return swish_layer; + } + if (action_param.has_alpha) { activation_layer->setAlpha(alpha); } diff --git a/mindspore/lite/src/runtime/delegate/tensorrt/op/activation_tensorrt.h b/mindspore/lite/src/runtime/delegate/tensorrt/op/activation_tensorrt.h index 81292b520c5..b2aa4834788 100644 --- a/mindspore/lite/src/runtime/delegate/tensorrt/op/activation_tensorrt.h +++ b/mindspore/lite/src/runtime/delegate/tensorrt/op/activation_tensorrt.h @@ -34,10 +34,11 @@ class ActivationTensorRT : public TensorRTOp { int IsSupport(const schema::Primitive *primitive, const std::vector &in_tensors, const std::vector &out_tensors) override; - static nvinfer1::ILayer *AddActivation(TensorRTContext *ctx, schema::ActivationType activation_type, float alpha, - float min_value, float max_value, nvinfer1::ITensor *trt_in_tensor, - uint32_t device_id = 0, - schema::QuantType quant_type = schema::QuantType_QUANT_NONE); + static nvinfer1::ILayer *AddActivation( + TensorRTContext *ctx, schema::ActivationType activation_type, float alpha, float min_value, float max_value, + nvinfer1::ITensor *trt_in_tensor, uint32_t device_id = 0, + schema::QuantType quant_type = schema::QuantType_QUANT_NONE, + RuntimePrecisionMode runtime_precision_mode = RuntimePrecisionMode::RuntimePrecisionMode_FP32); }; } // namespace mindspore::lite #endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_ACTIVATION_TENSORRT_H_ diff --git a/mindspore/lite/src/runtime/delegate/tensorrt/op/normalize_tensorrt.cc b/mindspore/lite/src/runtime/delegate/tensorrt/op/normalize_tensorrt.cc index ec5a5ab4007..a431438536d 100644 --- a/mindspore/lite/src/runtime/delegate/tensorrt/op/normalize_tensorrt.cc +++ b/mindspore/lite/src/runtime/delegate/tensorrt/op/normalize_tensorrt.cc @@ -160,7 +160,9 @@ int NormalizeTensorRT::RunAsTrtOps(TensorRTContext *ctx) { } bool NormalizeTensorRT::RunOptPlugin() { - if (out_tensors_.size() == 1 && in_tensors_.size() == INPUT_SIZE3 && axis_ == in_tensors_[0].Shape().size() - 1 && + auto precision_mode = runtime_->GetRuntimePrecisionMode(); + if (precision_mode == RuntimePrecisionMode::RuntimePrecisionMode_FP32 && out_tensors_.size() == 1 && + in_tensors_.size() == INPUT_SIZE3 && axis_ == in_tensors_[0].Shape().size() - 1 && in_tensors_[0].Shape()[axis_] < GET_THREADS) { // insufficient shared memory int dim_sum = std::accumulate(in_tensors_[0].Shape().begin(), in_tensors_[0].Shape().begin() + axis_, 1, diff --git a/mindspore/lite/src/runtime/delegate/tensorrt/tensorrt_utils.cc b/mindspore/lite/src/runtime/delegate/tensorrt/tensorrt_utils.cc index 1e43b4b5fd0..4f22c1909e5 100644 --- a/mindspore/lite/src/runtime/delegate/tensorrt/tensorrt_utils.cc +++ b/mindspore/lite/src/runtime/delegate/tensorrt/tensorrt_utils.cc @@ -210,9 +210,9 @@ std::experimental::optional TryConvertActivationType(schema::A {schema::ActivationType_RELU6, ActivationParams{nvinfer1::ActivationType::kCLIP, true, 0, true, 6}}, {schema::ActivationType_RELU1, ActivationParams{nvinfer1::ActivationType::kCLIP, true, 0, true, 1}}, {schema::ActivationType_HARD_TANH, ActivationParams{nvinfer1::ActivationType::kCLIP, true, -1, true, 1}}, + {schema::ActivationType_SWISH, ActivationParams{nvinfer1::ActivationType::kSIGMOID, false, 0, false, 0}}, // using plugin - {schema::ActivationType_GELU, ActivationParams{nvinfer1::ActivationType::kTHRESHOLDED_RELU, false, 0, false, 0}}, - {schema::ActivationType_SWISH, ActivationParams{nvinfer1::ActivationType::kTHRESHOLDED_RELU, false, 0, false, 0}}}; + {schema::ActivationType_GELU, ActivationParams{nvinfer1::ActivationType::kTHRESHOLDED_RELU, false, 0, false, 0}}}; return action_map.find(activation_type) != action_map.end() ? std::experimental::optional(action_map[activation_type]) : std::experimental::nullopt;