From 52c58735fc6711169cfba4992578abe2e960e4b4 Mon Sep 17 00:00:00 2001 From: dinghao Date: Sat, 27 Jun 2020 10:01:06 +0800 Subject: [PATCH] fix serving bugs --- mindspore/ccsrc/CMakeLists.txt | 13 +- mindspore/ccsrc/session/session.cc | 112 ++++++++++++------ mindspore/ccsrc/utils/log_adapter.cc | 32 +++-- serving/core/server.cc | 36 ++++-- serving/core/util/file_system_operation.cc | 5 +- serving/core/util/option_parser.cc | 40 ++++--- serving/core/util/option_parser.h | 3 +- serving/core/version_control/model.cc | 1 - .../version_control/version_controller.cc | 14 +-- .../core/version_control/version_controller.h | 1 - serving/cpp_example/ms_client.cc | 2 +- 11 files changed, 157 insertions(+), 102 deletions(-) diff --git a/mindspore/ccsrc/CMakeLists.txt b/mindspore/ccsrc/CMakeLists.txt index 8109e608c5c..cc5845cbf15 100644 --- a/mindspore/ccsrc/CMakeLists.txt +++ b/mindspore/ccsrc/CMakeLists.txt @@ -277,10 +277,11 @@ endif () if (USE_GLOG) target_link_libraries(inference PRIVATE mindspore::glog) -else() - if (CMAKE_SYSTEM_NAME MATCHES "Linux") - target_link_options(inference PRIVATE -Wl,-init,mindspore_log_init) - elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin") - set_target_properties(inference PROPERTIES MACOSX_RPATH ON) - endif () endif() + +if (CMAKE_SYSTEM_NAME MATCHES "Linux") + target_link_options(inference PRIVATE -Wl,-init,common_log_init) +elseif (CMAKE_SYSTEM_NAME MATCHES "Darwin") + set_target_properties(inference PROPERTIES MACOSX_RPATH ON) +endif () + diff --git a/mindspore/ccsrc/session/session.cc b/mindspore/ccsrc/session/session.cc index 90e02b37ff1..ae70fc77aa5 100644 --- a/mindspore/ccsrc/session/session.cc +++ b/mindspore/ccsrc/session/session.cc @@ -33,9 +33,14 @@ namespace py = pybind11; namespace mindspore::inference { std::shared_ptr LoadModel(const char *model_buf, size_t size, const std::string &device) { - inference::Session::RegAllOp(); - auto anf_graph = lite::AnfConverter::RunAnfConverter(model_buf, size); - return anf_graph; + try { + inference::Session::RegAllOp(); + auto anf_graph = lite::AnfConverter::RunAnfConverter(model_buf, size); + return anf_graph; + } catch (std::exception &e) { + MS_LOG(ERROR) << "Inference LoadModel failed"; + return nullptr; + } } void ExitInference() { @@ -51,12 +56,17 @@ void ExitInference() { } std::shared_ptr MSSession::CreateSession(const std::string &device, uint32_t device_id) { - auto session = std::make_shared(); - auto ret = session->Init(device, device_id); - if (ret != 0) { + try { + auto session = std::make_shared(); + auto ret = session->Init(device, device_id); + if (ret != 0) { + return nullptr; + } + return session; + } catch (std::exception &e) { + MS_LOG(ERROR) << "Inference CreatSession failed"; return nullptr; } - return session; } void Session::RegAllOp() { @@ -113,47 +123,71 @@ void Session::RegAllOp() { uint32_t Session::CompileGraph(std::shared_ptr funcGraphPtr) { MS_ASSERT(session_impl_ != nullptr); - auto graph_id = session_impl_->CompileGraph(NOT_NULL(funcGraphPtr)); - py::gil_scoped_release gil_release; - return graph_id; + try { + auto graph_id = session_impl_->CompileGraph(NOT_NULL(funcGraphPtr)); + py::gil_scoped_release gil_release; + return graph_id; + } catch (std::exception &e) { + MS_LOG(ERROR) << "Inference CompileGraph failed"; + return static_cast(-1); + } } MultiTensor Session::RunGraph(uint32_t graph_id, const std::vector> &inputs) { - std::vector inTensors; - inTensors.resize(inputs.size()); - bool has_error = false; - std::transform(inputs.begin(), inputs.end(), inTensors.begin(), - [&has_error](const std::shared_ptr &tensor_ptr) -> tensor::TensorPtr { - if (tensor_ptr == nullptr) { - MS_LOG(WARNING) << "input MSTensor is nullptr, return nullptr"; - has_error = true; - return nullptr; - } - auto tensor = static_cast(tensor_ptr.get()); - if (tensor == nullptr) { - MS_LOG(ERROR) << "Can not cast input MSTensor to tensor"; - has_error = true; - return nullptr; - } - return tensor->tensor(); - }); - if (has_error) { - MS_LOG(ERROR) << "Init Tensor failed, returning empty result"; - std::vector> multiTensor; - return multiTensor; + try { + std::vector inTensors; + inTensors.resize(inputs.size()); + bool has_error = false; + std::transform(inputs.begin(), inputs.end(), inTensors.begin(), + [&has_error](const std::shared_ptr &tensor_ptr) -> tensor::TensorPtr { + if (tensor_ptr == nullptr) { + MS_LOG(WARNING) << "input MSTensor is nullptr, return nullptr"; + has_error = true; + return nullptr; + } + auto tensor = static_cast(tensor_ptr.get()); + if (tensor == nullptr) { + MS_LOG(ERROR) << "Can not cast input MSTensor to tensor"; + has_error = true; + return nullptr; + } + return tensor->tensor(); + }); + if (has_error) { + MS_LOG(ERROR) << "Init Tensor failed, returning empty result"; + std::vector> multiTensor; + return multiTensor; + } + VectorRef outputs; + session_impl_->RunGraph(graph_id, inTensors, &outputs); + + return TransformVectorRefToMultiTensor(outputs); + } catch (std::exception &e) { + MS_LOG(ERROR) << "Inference Rungraph failed"; + return MultiTensor(); } - VectorRef outputs; - session_impl_->RunGraph(graph_id, inTensors, &outputs); - - return TransformVectorRefToMultiTensor(outputs); } - +namespace { +string AjustTargetName(const std::string &device) { + if (device == kAscendDevice) { + return std::string(kAscendDevice) + "Inference"; + } else { + MS_LOG(ERROR) << "Only support device Ascend right now"; + return ""; + } +} +} // namespace int Session::Init(const std::string &device, uint32_t device_id) { RegAllOp(); auto ms_context = MsContext::GetInstance(); ms_context->set_execution_mode(kGraphMode); - ms_context->set_device_target(kAscendDevice); - session_impl_ = session::SessionFactory::Get().Create(device); + ms_context->set_device_id(device_id); + auto ajust_device = AjustTargetName(device); + if (ajust_device == "") { + return -1; + } + ms_context->set_device_target(device); + session_impl_ = session::SessionFactory::Get().Create(ajust_device); if (session_impl_ == nullptr) { MS_LOG(ERROR) << "Session create failed!, please make sure target device:" << device << " is available."; return -1; diff --git a/mindspore/ccsrc/utils/log_adapter.cc b/mindspore/ccsrc/utils/log_adapter.cc index d16fbead9bc..3588754dae1 100644 --- a/mindspore/ccsrc/utils/log_adapter.cc +++ b/mindspore/ccsrc/utils/log_adapter.cc @@ -463,7 +463,7 @@ void InitSubModulesLogLevel() { // set submodule's log level auto submodule = GetEnv("MS_SUBMODULE_LOG_v"); - MS_LOG(INFO) << "MS_SUBMODULE_LOG_v=`" << submodule << "`"; + MS_LOG(DEBUG) << "MS_SUBMODULE_LOG_v=`" << submodule << "`"; LogConfigParser parser(submodule); auto configs = parser.Parse(); for (const auto &cfg : configs) { @@ -489,22 +489,14 @@ void InitSubModulesLogLevel() { } // namespace mindspore extern "C" { -// shared lib init hook #if defined(_WIN32) || defined(_WIN64) -__attribute__((constructor)) void mindspore_log_init(void) { +__attribute__((constructor)) void common_log_init(void) { #else -void mindspore_log_init(void) { +void common_log_init(void) { #endif #ifdef USE_GLOG // do not use glog predefined log prefix FLAGS_log_prefix = false; - static bool is_glog_initialzed = false; - if (!is_glog_initialzed) { -#if !defined(_WIN32) && !defined(_WIN64) - google::InitGoogleLogging("mindspore"); -#endif - is_glog_initialzed = true; - } // set default log level to WARNING if (mindspore::GetEnv("GLOG_v").empty()) { FLAGS_v = mindspore::WARNING; @@ -525,4 +517,22 @@ void mindspore_log_init(void) { #endif mindspore::InitSubModulesLogLevel(); } + +// shared lib init hook +#if defined(_WIN32) || defined(_WIN64) +__attribute__((constructor)) void mindspore_log_init(void) { +#else +void mindspore_log_init(void) { +#endif +#ifdef USE_GLOG + static bool is_glog_initialzed = false; + if (!is_glog_initialzed) { +#if !defined(_WIN32) && !defined(_WIN64) + google::InitGoogleLogging("mindspore"); +#endif + is_glog_initialzed = true; + } +#endif + common_log_init(); +} } diff --git a/serving/core/server.cc b/serving/core/server.cc index add9d16bee5..4a3a3b59eb5 100644 --- a/serving/core/server.cc +++ b/serving/core/server.cc @@ -22,6 +22,7 @@ #include #include #include +#include #include "mindspore/ccsrc/utils/log_adapter.h" #include "serving/ms_service.grpc.pb.h" @@ -40,7 +41,7 @@ namespace serving { using MSTensorPtr = std::shared_ptr; Status Session::CreatDeviceSession(const std::string &device, uint32_t device_id) { - session_ = inference::MSSession::CreateSession(device + "Inference", device_id); + session_ = inference::MSSession::CreateSession(device, device_id); if (session_ == nullptr) { MS_LOG(ERROR) << "Creat Session Failed"; return FAILED; @@ -67,6 +68,7 @@ Status Session::Predict(const std::vector &inputs, inference::Multi MS_LOG(INFO) << "run Predict"; *outputs = session_->RunGraph(graph_id_, inputs); + MS_LOG(INFO) << "run Predict finished"; return SUCCESS; } @@ -80,12 +82,16 @@ Status Session::Warmup(const MindSporeModelPtr model) { std::string file_name = model->GetModelPath() + '/' + model->GetModelName(); char *graphBuf = ReadFile(file_name.c_str(), &size); if (graphBuf == nullptr) { - MS_LOG(ERROR) << "Load graph model failed, file name is " << file_name.c_str(); + MS_LOG(ERROR) << "Read model file failed, file name is " << file_name.c_str(); return FAILED; } last_graph_ = inference::LoadModel(graphBuf, size, device_type_); + if (last_graph_ == nullptr) { + MS_LOG(ERROR) << "Load graph model failed, file name is " << file_name.c_str(); + return FAILED; + } graph_id_ = session_->CompileGraph(last_graph_); - MS_LOG(INFO) << "Session Warmup"; + MS_LOG(INFO) << "Session Warmup finished"; return SUCCESS; } @@ -95,6 +101,9 @@ Status Session::Clear() { } namespace { +static const uint32_t uint32max = 0x7FFFFFFF; +std::promise exit_requested; + const std::map type2id_map{ {ms_serving::MS_UNKNOWN, TypeId::kNumberTypeBegin}, {ms_serving::MS_BOOL, TypeId::kNumberTypeBool}, {ms_serving::MS_INT8, TypeId::kNumberTypeInt8}, {ms_serving::MS_UINT8, TypeId::kNumberTypeUInt8}, @@ -141,7 +150,7 @@ MSTensorPtr ServingTensor2MSTensor(const ms_serving::Tensor &tensor) { } TypeId type = iter->second; auto ms_tensor = std::shared_ptr(inference::MSTensor::CreateTensor(type, shape)); - memcpy_s(ms_tensor->MutableData(), tensor.data().size(), tensor.data().data(), tensor.data().size()); + memcpy_s(ms_tensor->MutableData(), ms_tensor->Size(), tensor.data().data(), tensor.data().size()); return ms_tensor; } @@ -166,10 +175,7 @@ void ClearEnv() { Session::Instance().Clear(); inference::ExitInference(); } -void HandleSignal(int sig) { - ClearEnv(); - exit(0); -} +void HandleSignal(int sig) { exit_requested.set_value(); } #ifdef ENABLE_D static rtContext_t g_ctx = nullptr; @@ -247,6 +253,7 @@ Status Server::BuildAndStart() { rtError_t rt_ret = rtCtxGetCurrent(&ctx); if (rt_ret != RT_ERROR_NONE || ctx == nullptr) { MS_LOG(ERROR) << "the ascend device context is null"; + ClearEnv(); return FAILED; } g_ctx = ctx; @@ -258,6 +265,7 @@ Status Server::BuildAndStart() { auto option = grpc::MakeChannelArgumentOption(GRPC_ARG_ALLOW_REUSEPORT, 0); grpc::ServerBuilder builder; builder.SetOption(std::move(option)); + builder.SetMaxMessageSize(uint32max); // Listen on the given address without any authentication mechanism. builder.AddListeningPort(server_address, grpc::InsecureServerCredentials()); // Register "service" as the instance through which we'll communicate with @@ -265,13 +273,15 @@ Status Server::BuildAndStart() { builder.RegisterService(&service); // Finally assemble the server. std::unique_ptr server(builder.BuildAndStart()); + auto grpc_server_run = [&server]() { server->Wait(); }; + std::thread serving_thread(grpc_server_run); MS_LOG(INFO) << "Server listening on " << server_address << std::endl; - - // Wait for the server to shutdown. Note that some other thread must be - // responsible for shutting down the server for this call to ever return. - server->Wait(); + auto exit_future = exit_requested.get_future(); + exit_future.wait(); + ClearEnv(); + server->Shutdown(); + serving_thread.join(); return SUCCESS; } - } // namespace serving } // namespace mindspore diff --git a/serving/core/util/file_system_operation.cc b/serving/core/util/file_system_operation.cc index a5143995dec..1af512a54c0 100644 --- a/serving/core/util/file_system_operation.cc +++ b/serving/core/util/file_system_operation.cc @@ -29,7 +29,6 @@ namespace mindspore { namespace serving { - char *ReadFile(const char *file, size_t *size) { if (file == nullptr) { MS_LOG(ERROR) << "file is nullptr"; @@ -70,8 +69,8 @@ bool DirOrFileExist(const std::string &file_path) { } std::vector GetAllSubDirs(const std::string &dir_path) { - DIR *dir; - struct dirent *ptr; + DIR *dir = nullptr; + struct dirent *ptr = nullptr; std::vector SubDirs; if ((dir = opendir(dir_path.c_str())) == NULL) { diff --git a/serving/core/util/option_parser.cc b/serving/core/util/option_parser.cc index 9cbd7eaee8f..c7f00e37338 100644 --- a/serving/core/util/option_parser.cc +++ b/serving/core/util/option_parser.cc @@ -36,17 +36,16 @@ bool RemovePrefix(std::string *str, const std::string &prefix) { bool Option::ParseInt32(std::string *arg) { if (RemovePrefix(arg, "--") && RemovePrefix(arg, name_) && RemovePrefix(arg, "=")) { - char extra; int32_t parsed_value; - if (sscanf(arg->data(), "%d%c", &parsed_value, &extra) != 1) { - std::cout << "Parse " << name_ << "Error for option " << *arg << std::endl; + try { + parsed_value = std::stoi(arg->data()); + } catch (std::invalid_argument) { + std::cout << "Parse " << name_ << " Error for option " << *arg << std::endl; return false; - } else { - *int32_default_ = parsed_value; } + *int32_default_ = parsed_value; return true; } - return false; } @@ -76,17 +75,16 @@ bool Option::ParseString(std::string *arg) { bool Option::ParseFloat(std::string *arg) { if (RemovePrefix(arg, "--") && RemovePrefix(arg, name_) && RemovePrefix(arg, "=")) { - char extra; float parsed_value; - if (sscanf(arg->data(), "%f%c", &parsed_value, &extra) != 1) { - std::cout << "Parse " << name_ << "Error for option " << *arg << std::endl; + try { + parsed_value = std::stof(arg->data()); + } catch (std::invalid_argument) { + std::cout << "Parse " << name_ << " Error for option " << *arg << std::endl; return false; - } else { - *float_default_ = parsed_value; } + *float_default_ = parsed_value; return true; } - return false; } @@ -159,10 +157,11 @@ Options::Options() : args_(nullptr) { CreateOptions(); } void Options::CreateOptions() { args_ = std::make_shared(); std::vector