From ba9bbfadf8e17eeffdb7b749197b92dd7f79b191 Mon Sep 17 00:00:00 2001 From: wilfChen Date: Sun, 25 Apr 2021 09:53:47 +0800 Subject: [PATCH] gpu inference mixed precision --- include/api/context.h | 12 ++++++++++++ .../optimizer/trt_pass/trt_converter_context.cc | 9 +++++++++ mindspore/ccsrc/cxx_api/context.cc | 11 +++++++++++ mindspore/ccsrc/cxx_api/graph/gpu/gpu_graph_impl.cc | 4 ++-- mindspore/ccsrc/runtime/device/gpu/trt_loader.h | 2 +- mindspore/core/utils/ms_context.h | 1 + 6 files changed, 36 insertions(+), 3 deletions(-) diff --git a/include/api/context.h b/include/api/context.h index ef4664b8c0a..154b67c86b3 100644 --- a/include/api/context.h +++ b/include/api/context.h @@ -116,8 +116,20 @@ class MS_API NvidiaGPUDeviceInfo : public DeviceInfoContext { void SetGpuTrtInferMode(bool gpu_trt_infer_mode); bool GetGpuTrtInferMode() const; + + inline void SetPrecisionMode(const std::string &precison_mode); + inline std::string GetPrecisionMode() const; + + private: + void SetPrecisionMode(const std::vector &precision_mode); + std::vector GetPrecisionModeChar() const; }; +void NvidiaGPUDeviceInfo::SetPrecisionMode(const std::string &precision_mode) { + SetPrecisionMode(StringToChar(precision_mode)); +} +std::string NvidiaGPUDeviceInfo::GetPrecisionMode() const { return CharToString(GetPrecisionModeChar()); } + class MS_API Ascend910DeviceInfo : public DeviceInfoContext { public: enum DeviceType GetDeviceType() const override { return DeviceType::kAscend910; }; diff --git a/mindspore/ccsrc/backend/optimizer/trt_pass/trt_converter_context.cc b/mindspore/ccsrc/backend/optimizer/trt_pass/trt_converter_context.cc index 1e9403ca1af..bff99837e68 100644 --- a/mindspore/ccsrc/backend/optimizer/trt_pass/trt_converter_context.cc +++ b/mindspore/ccsrc/backend/optimizer/trt_pass/trt_converter_context.cc @@ -198,6 +198,15 @@ bool TrtConverterContext::Serialize(std::string *model) { MS_EXCEPTION_IF_NULL(model); builder_->setMaxBatchSize(batch_size_); config_->setMaxWorkspaceSize(workspace_size_); + + // Set precision mode + const auto &context = MsContext::GetInstance(); + const auto &precision_mode = context->get_param(MS_CTX_INFER_PRECISION_MODE); + if (precision_mode == "fp16") { + MS_LOG(WARNING) << "Inference with mixed precision mode. It will take few minutes for operators selection."; + config_->setFlag(nvinfer1::BuilderFlag::kFP16); + } + engine_ = TrtPtr(builder_->buildEngineWithConfig(*network_, *config_)); MS_EXCEPTION_IF_NULL(engine_); diff --git a/mindspore/ccsrc/cxx_api/context.cc b/mindspore/ccsrc/cxx_api/context.cc index b0c11792162..efdb44974fe 100644 --- a/mindspore/ccsrc/cxx_api/context.cc +++ b/mindspore/ccsrc/cxx_api/context.cc @@ -27,6 +27,7 @@ constexpr auto kModelOptionKirinNpuFrequency = "mindspore.option.kirin_npu.frequ constexpr auto kModelOptionDeviceID = "mindspore.option.device_id"; constexpr auto kModelOptionNvidiaGpuDeviceID = kModelOptionDeviceID; constexpr auto kModelOptionNvidiaGpuTrtInferMode = "mindspore.option.nvidia_gpu.trt_infer_mode"; +constexpr auto kModelOptionNvidiaGpuPrecisionMode = "mindspore.option.nvidia_gpu.precision_mode"; constexpr auto kModelOptionAscend910DeviceID = kModelOptionDeviceID; constexpr auto kModelOptionAscend310DeviceID = kModelOptionDeviceID; constexpr auto kModelOptionAscend310DumpCfgPath = "mindspore.option.ascend310.dump_config_file_path"; @@ -153,6 +154,16 @@ bool NvidiaGPUDeviceInfo::GetGpuTrtInferMode() const { return GetValue(data_, kModelOptionNvidiaGpuTrtInferMode); } +void NvidiaGPUDeviceInfo::SetPrecisionMode(const std::vector &precision_mode) { + MS_EXCEPTION_IF_NULL(data_); + data_->params[kModelOptionNvidiaGpuPrecisionMode] = CharToString(precision_mode); +} +std::vector NvidiaGPUDeviceInfo::GetPrecisionModeChar() const { + MS_EXCEPTION_IF_NULL(data_); + const std::string &ref = GetValue(data_, kModelOptionNvidiaGpuPrecisionMode); + return StringToChar(ref); +} + void Ascend910DeviceInfo::SetDeviceID(uint32_t device_id) { MS_EXCEPTION_IF_NULL(data_); data_->params[kModelOptionAscend910DeviceID] = device_id; diff --git a/mindspore/ccsrc/cxx_api/graph/gpu/gpu_graph_impl.cc b/mindspore/ccsrc/cxx_api/graph/gpu/gpu_graph_impl.cc index 2891c3cdbb8..26601bd6136 100644 --- a/mindspore/ccsrc/cxx_api/graph/gpu/gpu_graph_impl.cc +++ b/mindspore/ccsrc/cxx_api/graph/gpu/gpu_graph_impl.cc @@ -63,8 +63,8 @@ Status GPUGraphImpl::InitEnv() { if (gpu_info == nullptr) { return kMCDeviceError; } - auto enable_trt = gpu_info->GetGpuTrtInferMode(); - ms_context->set_param(MS_CTX_ENABLE_INFER_OPT, enable_trt); + ms_context->set_param(MS_CTX_ENABLE_INFER_OPT, gpu_info->GetGpuTrtInferMode()); + ms_context->set_param(MS_CTX_INFER_PRECISION_MODE, gpu_info->GetPrecisionMode()); session_impl_ = session::SessionFactory::Get().Create(kGpuInferenceDevice); if (session_impl_ == nullptr) { diff --git a/mindspore/ccsrc/runtime/device/gpu/trt_loader.h b/mindspore/ccsrc/runtime/device/gpu/trt_loader.h index d5035748e75..251a26906a5 100644 --- a/mindspore/ccsrc/runtime/device/gpu/trt_loader.h +++ b/mindspore/ccsrc/runtime/device/gpu/trt_loader.h @@ -31,7 +31,7 @@ class TrtLoader { std::shared_ptr CreateInferBuilder(nvinfer1::ILogger *logger); std::shared_ptr CreateInferRuntime(nvinfer1::ILogger *logger); - bool nvinfer_loaded() { return nvinfer_loaded_; } + bool nvinfer_loaded() const { return nvinfer_loaded_; } private: bool nvinfer_loaded_; diff --git a/mindspore/core/utils/ms_context.h b/mindspore/core/utils/ms_context.h index e7c95d68a4f..4be6a3629e2 100644 --- a/mindspore/core/utils/ms_context.h +++ b/mindspore/core/utils/ms_context.h @@ -118,6 +118,7 @@ enum MsCtxParam : unsigned { MS_CTX_ENV_CONFIG_PATH, MS_CTX_TUNE_MODE, MS_CTX_GRAPH_KERNEL_FLAGS, + MS_CTX_INFER_PRECISION_MODE, // GPU inference precision mode configured by Serving or Unify API. MS_CTX_TYPE_STRING_END, // parameter numbers of each type