forked from mindspore-Ecosystem/mindspore
make sigmoid and normalize support FP16
This commit is contained in:
parent
88ca0b91e2
commit
d543a5c7f0
|
@ -76,11 +76,12 @@ int ActivationTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
|||
TRTTensorCast(ctx, tensorrt_in_tensors_[0].trt_tensor_, nvinfer1::DataType::kFLOAT, op_name_ + "_cast_in");
|
||||
}
|
||||
|
||||
auto runtime_precision_mode = runtime_->GetRuntimePrecisionMode();
|
||||
auto activation_layer =
|
||||
ActivationTensorRT::AddActivation(ctx, activation_op->activation_type(), alpha,
|
||||
std::isfinite(activation_op->min_val()) ? activation_op->min_val() : FLT_MIN,
|
||||
std::isfinite(activation_op->max_val()) ? activation_op->max_val() : FLT_MAX,
|
||||
activation_input, device_id_, quant_type_);
|
||||
activation_input, device_id_, quant_type_, runtime_precision_mode);
|
||||
if (activation_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add activation op failed for TensorRT.";
|
||||
return RET_ERROR;
|
||||
|
@ -102,10 +103,12 @@ int ActivationTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
|||
nvinfer1::ILayer *ActivationTensorRT::AddActivation(TensorRTContext *ctx, schema::ActivationType activation_type,
|
||||
float alpha, float min_value, float max_value,
|
||||
nvinfer1::ITensor *trt_in_tensor, uint32_t device_id,
|
||||
schema::QuantType quant_type) {
|
||||
schema::QuantType quant_type,
|
||||
RuntimePrecisionMode runtime_precision_mode) {
|
||||
bool has_custom_plugin = HasCustomActivationPlugin(activation_type);
|
||||
// sigmoid precision is wrong for trt
|
||||
if (quant_type == schema::QuantType_QUANT_NONE && has_custom_plugin) {
|
||||
if (runtime_precision_mode == RuntimePrecisionMode::RuntimePrecisionMode_FP32 &&
|
||||
quant_type == schema::QuantType_QUANT_NONE && has_custom_plugin) {
|
||||
std::string layer_name = std::string(trt_in_tensor->getName()) + "_activation";
|
||||
auto plugin = std::make_shared<ActivationOptPlugin>(layer_name.c_str(), activation_type, device_id);
|
||||
MS_LOG(INFO) << "using opt plugin for " << layer_name;
|
||||
|
@ -139,6 +142,18 @@ nvinfer1::ILayer *ActivationTensorRT::AddActivation(TensorRTContext *ctx, schema
|
|||
return activation_layer;
|
||||
}
|
||||
|
||||
if (activation_type == schema::ActivationType_SWISH) {
|
||||
auto sigmoid_tensor = activation_layer->getOutput(0);
|
||||
nvinfer1::ElementWiseOperation element_wise_op_ = nvinfer1::ElementWiseOperation::kPROD;
|
||||
nvinfer1::IElementWiseLayer *swish_layer =
|
||||
ctx->network()->addElementWise(*sigmoid_tensor, *trt_in_tensor, element_wise_op_);
|
||||
if (swish_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add activation op failed for TensorRT.";
|
||||
return nullptr;
|
||||
}
|
||||
return swish_layer;
|
||||
}
|
||||
|
||||
if (action_param.has_alpha) {
|
||||
activation_layer->setAlpha(alpha);
|
||||
}
|
||||
|
|
|
@ -34,10 +34,11 @@ class ActivationTensorRT : public TensorRTOp {
|
|||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
|
||||
static nvinfer1::ILayer *AddActivation(TensorRTContext *ctx, schema::ActivationType activation_type, float alpha,
|
||||
float min_value, float max_value, nvinfer1::ITensor *trt_in_tensor,
|
||||
uint32_t device_id = 0,
|
||||
schema::QuantType quant_type = schema::QuantType_QUANT_NONE);
|
||||
static nvinfer1::ILayer *AddActivation(
|
||||
TensorRTContext *ctx, schema::ActivationType activation_type, float alpha, float min_value, float max_value,
|
||||
nvinfer1::ITensor *trt_in_tensor, uint32_t device_id = 0,
|
||||
schema::QuantType quant_type = schema::QuantType_QUANT_NONE,
|
||||
RuntimePrecisionMode runtime_precision_mode = RuntimePrecisionMode::RuntimePrecisionMode_FP32);
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_EXTENDRT_DELEGATE_TENSORRT_OP_ACTIVATION_TENSORRT_H_
|
||||
|
|
|
@ -159,7 +159,9 @@ int NormalizeTensorRT::RunAsTrtOps(TensorRTContext *ctx) {
|
|||
}
|
||||
|
||||
bool NormalizeTensorRT::RunOptPlugin() {
|
||||
if (out_tensors_.size() == 1 && in_tensors_.size() == INPUT_SIZE3 && axis_ == in_tensors_[0].Shape().size() - 1 &&
|
||||
auto precision_mode = runtime_->GetRuntimePrecisionMode();
|
||||
if (precision_mode == RuntimePrecisionMode::RuntimePrecisionMode_FP32 && out_tensors_.size() == 1 &&
|
||||
in_tensors_.size() == INPUT_SIZE3 && axis_ == in_tensors_[0].Shape().size() - 1 &&
|
||||
in_tensors_[0].Shape()[axis_] < GET_THREADS) {
|
||||
// insufficient shared memory
|
||||
int dim_sum = std::accumulate(in_tensors_[0].Shape().begin(), in_tensors_[0].Shape().begin() + axis_, 1,
|
||||
|
|
|
@ -210,9 +210,9 @@ std::experimental::optional<ActivationParams> TryConvertActivationType(schema::A
|
|||
{schema::ActivationType_RELU6, ActivationParams{nvinfer1::ActivationType::kCLIP, true, 0, true, 6}},
|
||||
{schema::ActivationType_RELU1, ActivationParams{nvinfer1::ActivationType::kCLIP, true, 0, true, 1}},
|
||||
{schema::ActivationType_HARD_TANH, ActivationParams{nvinfer1::ActivationType::kCLIP, true, -1, true, 1}},
|
||||
{schema::ActivationType_SWISH, ActivationParams{nvinfer1::ActivationType::kSIGMOID, false, 0, false, 0}},
|
||||
// using plugin
|
||||
{schema::ActivationType_GELU, ActivationParams{nvinfer1::ActivationType::kTHRESHOLDED_RELU, false, 0, false, 0}},
|
||||
{schema::ActivationType_SWISH, ActivationParams{nvinfer1::ActivationType::kTHRESHOLDED_RELU, false, 0, false, 0}}};
|
||||
{schema::ActivationType_GELU, ActivationParams{nvinfer1::ActivationType::kTHRESHOLDED_RELU, false, 0, false, 0}}};
|
||||
return action_map.find(activation_type) != action_map.end()
|
||||
? std::experimental::optional<ActivationParams>(action_map[activation_type])
|
||||
: std::experimental::nullopt;
|
||||
|
|
|
@ -75,12 +75,12 @@ int ActivationTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
|||
activation_input =
|
||||
TRTTensorCast(ctx, tensorrt_in_tensors_[0].trt_tensor_, nvinfer1::DataType::kFLOAT, op_name_ + "_cast_in");
|
||||
}
|
||||
|
||||
auto runtime_precision_mode = runtime_->GetRuntimePrecisionMode();
|
||||
auto activation_layer =
|
||||
ActivationTensorRT::AddActivation(ctx, activation_op->activation_type(), alpha,
|
||||
std::isfinite(activation_op->min_val()) ? activation_op->min_val() : FLT_MIN,
|
||||
std::isfinite(activation_op->max_val()) ? activation_op->max_val() : FLT_MAX,
|
||||
activation_input, device_id_, quant_type_);
|
||||
activation_input, device_id_, quant_type_, runtime_precision_mode);
|
||||
if (activation_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add activation op failed for TensorRT.";
|
||||
return RET_ERROR;
|
||||
|
@ -102,10 +102,12 @@ int ActivationTensorRT::AddInnerOp(TensorRTContext *ctx) {
|
|||
nvinfer1::ILayer *ActivationTensorRT::AddActivation(TensorRTContext *ctx, schema::ActivationType activation_type,
|
||||
float alpha, float min_value, float max_value,
|
||||
nvinfer1::ITensor *trt_in_tensor, uint32_t device_id,
|
||||
schema::QuantType quant_type) {
|
||||
schema::QuantType quant_type,
|
||||
RuntimePrecisionMode runtime_precision_mode) {
|
||||
bool has_custom_plugin = HasCustomActivationPlugin(activation_type);
|
||||
// sigmoid precision is wrong for trt
|
||||
if (quant_type == schema::QuantType_QUANT_NONE && has_custom_plugin) {
|
||||
if (runtime_precision_mode == RuntimePrecisionMode::RuntimePrecisionMode_FP32 &&
|
||||
quant_type == schema::QuantType_QUANT_NONE && has_custom_plugin) {
|
||||
std::string layer_name = std::string(trt_in_tensor->getName()) + "_activation";
|
||||
auto plugin = std::make_shared<ActivationOptPlugin>(layer_name.c_str(), activation_type, device_id);
|
||||
MS_LOG(INFO) << "using opt plugin for " << layer_name;
|
||||
|
@ -139,6 +141,18 @@ nvinfer1::ILayer *ActivationTensorRT::AddActivation(TensorRTContext *ctx, schema
|
|||
return activation_layer;
|
||||
}
|
||||
|
||||
if (activation_type == schema::ActivationType_SWISH) {
|
||||
auto sigmoid_tensor = activation_layer->getOutput(0);
|
||||
nvinfer1::ElementWiseOperation element_wise_op_ = nvinfer1::ElementWiseOperation::kPROD;
|
||||
nvinfer1::IElementWiseLayer *swish_layer =
|
||||
ctx->network()->addElementWise(*sigmoid_tensor, *trt_in_tensor, element_wise_op_);
|
||||
if (swish_layer == nullptr) {
|
||||
MS_LOG(ERROR) << "add activation op failed for TensorRT.";
|
||||
return nullptr;
|
||||
}
|
||||
return swish_layer;
|
||||
}
|
||||
|
||||
if (action_param.has_alpha) {
|
||||
activation_layer->setAlpha(alpha);
|
||||
}
|
||||
|
|
|
@ -34,10 +34,11 @@ class ActivationTensorRT : public TensorRTOp {
|
|||
int IsSupport(const schema::Primitive *primitive, const std::vector<mindspore::MSTensor> &in_tensors,
|
||||
const std::vector<mindspore::MSTensor> &out_tensors) override;
|
||||
|
||||
static nvinfer1::ILayer *AddActivation(TensorRTContext *ctx, schema::ActivationType activation_type, float alpha,
|
||||
float min_value, float max_value, nvinfer1::ITensor *trt_in_tensor,
|
||||
uint32_t device_id = 0,
|
||||
schema::QuantType quant_type = schema::QuantType_QUANT_NONE);
|
||||
static nvinfer1::ILayer *AddActivation(
|
||||
TensorRTContext *ctx, schema::ActivationType activation_type, float alpha, float min_value, float max_value,
|
||||
nvinfer1::ITensor *trt_in_tensor, uint32_t device_id = 0,
|
||||
schema::QuantType quant_type = schema::QuantType_QUANT_NONE,
|
||||
RuntimePrecisionMode runtime_precision_mode = RuntimePrecisionMode::RuntimePrecisionMode_FP32);
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif // MINDSPORE_LITE_SRC_RUNTIME_DELEGATE_TENSORRT_OP_ACTIVATION_TENSORRT_H_
|
||||
|
|
|
@ -160,7 +160,9 @@ int NormalizeTensorRT::RunAsTrtOps(TensorRTContext *ctx) {
|
|||
}
|
||||
|
||||
bool NormalizeTensorRT::RunOptPlugin() {
|
||||
if (out_tensors_.size() == 1 && in_tensors_.size() == INPUT_SIZE3 && axis_ == in_tensors_[0].Shape().size() - 1 &&
|
||||
auto precision_mode = runtime_->GetRuntimePrecisionMode();
|
||||
if (precision_mode == RuntimePrecisionMode::RuntimePrecisionMode_FP32 && out_tensors_.size() == 1 &&
|
||||
in_tensors_.size() == INPUT_SIZE3 && axis_ == in_tensors_[0].Shape().size() - 1 &&
|
||||
in_tensors_[0].Shape()[axis_] < GET_THREADS) {
|
||||
// insufficient shared memory
|
||||
int dim_sum = std::accumulate(in_tensors_[0].Shape().begin(), in_tensors_[0].Shape().begin() + axis_, 1,
|
||||
|
|
|
@ -210,9 +210,9 @@ std::experimental::optional<ActivationParams> TryConvertActivationType(schema::A
|
|||
{schema::ActivationType_RELU6, ActivationParams{nvinfer1::ActivationType::kCLIP, true, 0, true, 6}},
|
||||
{schema::ActivationType_RELU1, ActivationParams{nvinfer1::ActivationType::kCLIP, true, 0, true, 1}},
|
||||
{schema::ActivationType_HARD_TANH, ActivationParams{nvinfer1::ActivationType::kCLIP, true, -1, true, 1}},
|
||||
{schema::ActivationType_SWISH, ActivationParams{nvinfer1::ActivationType::kSIGMOID, false, 0, false, 0}},
|
||||
// using plugin
|
||||
{schema::ActivationType_GELU, ActivationParams{nvinfer1::ActivationType::kTHRESHOLDED_RELU, false, 0, false, 0}},
|
||||
{schema::ActivationType_SWISH, ActivationParams{nvinfer1::ActivationType::kTHRESHOLDED_RELU, false, 0, false, 0}}};
|
||||
{schema::ActivationType_GELU, ActivationParams{nvinfer1::ActivationType::kTHRESHOLDED_RELU, false, 0, false, 0}}};
|
||||
return action_map.find(activation_type) != action_map.end()
|
||||
? std::experimental::optional<ActivationParams>(action_map[activation_type])
|
||||
: std::experimental::nullopt;
|
||||
|
|
Loading…
Reference in New Issue