forked from mindspore-Ecosystem/mindspore
serving http init
This commit is contained in:
parent
cde696477c
commit
75d116b5db
|
@ -0,0 +1,11 @@
|
|||
mindspore_add_pkg(libevent
|
||||
VER 2.1.12
|
||||
LIBS event event_pthreads
|
||||
URL https://github.com/libevent/libevent/releases/download/release-2.1.12-stable/libevent-2.1.12-stable.tar.gz
|
||||
MD5 b5333f021f880fe76490d8a799cd79f4
|
||||
CMAKE_OPTION -DCMAKE_BUILD_TYPE:STRING=Release -DBUILD_TESTING=OFF)
|
||||
|
||||
include_directories(${libevent_INC})
|
||||
|
||||
add_library(mindspore::event ALIAS libevent::event)
|
||||
add_library(mindspore::event_pthreads ALIAS libevent::event_pthreads)
|
|
@ -22,6 +22,8 @@ if (ENABLE_DEBUGGER OR ENABLE_SERVING OR ENABLE_TESTCASES)
|
|||
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/zlib.cmake)
|
||||
# build gRPC
|
||||
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/grpc.cmake)
|
||||
# build event
|
||||
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/libevent.cmake)
|
||||
endif()
|
||||
|
||||
include(${CMAKE_SOURCE_DIR}/cmake/external_libs/pybind11.cmake)
|
||||
|
|
|
@ -163,6 +163,13 @@ if (ENABLE_GPU)
|
|||
)
|
||||
endif ()
|
||||
|
||||
if (ENABLE_SERVING OR ENABLE_TESTCASES)
|
||||
file(GLOB_RECURSE LIBEVENT_LIB_LIST
|
||||
${libevent_LIBPATH}/libevent*
|
||||
${libevent_LIBPATH}/libevent_pthreads*
|
||||
)
|
||||
endif ()
|
||||
|
||||
if (NOT ENABLE_GE)
|
||||
if (ENABLE_D)
|
||||
if (DEFINED ENV{ASCEND_CUSTOM_PATH})
|
||||
|
@ -191,6 +198,7 @@ if (NOT ENABLE_GE)
|
|||
${CMAKE_SOURCE_DIR}/graphengine/third_party/prebuild/${CMAKE_HOST_SYSTEM_PROCESSOR}/libslog.so
|
||||
${CMAKE_SOURCE_DIR}/graphengine/third_party/prebuild/${CMAKE_HOST_SYSTEM_PROCESSOR}/liberror_manager.so
|
||||
${CMAKE_SOURCE_DIR}/build/graphengine/libc_sec.so
|
||||
${LIBEVENT_LIB_LIST}
|
||||
DESTINATION ${INSTALL_LIB_DIR}
|
||||
COMPONENT mindspore
|
||||
)
|
||||
|
@ -273,4 +281,10 @@ if (ENABLE_SERVING)
|
|||
DESTINATION ${INSTALL_LIB_DIR}
|
||||
COMPONENT mindspore
|
||||
)
|
||||
|
||||
install(
|
||||
FILES ${LIBEVENT_LIB_LIST}
|
||||
DESTINATION ${INSTALL_LIB_DIR}
|
||||
COMPONENT mindspore
|
||||
)
|
||||
endif ()
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include <sstream>
|
||||
#include <memory>
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
|
||||
#ifndef ENABLE_ACL
|
||||
#include "mindspore/core/utils/log_adapter.h"
|
||||
|
@ -101,7 +102,18 @@ class LogWriter {
|
|||
|
||||
#endif // ENABLE_ACL
|
||||
|
||||
#define MSI_TIME_STAMP_START(name) auto time_start_##name = std::chrono::steady_clock::now();
|
||||
#define MSI_TIME_STAMP_END(name) \
|
||||
{ \
|
||||
auto time_end_##name = std::chrono::steady_clock::now(); \
|
||||
auto time_cost = std::chrono::duration<double, std::milli>(time_end_##name - time_start_##name).count(); \
|
||||
MSI_LOG_INFO << #name " Time Cost # " << time_cost << " ms ---------------------"; \
|
||||
}
|
||||
|
||||
#define INFER_STATUS(code) inference::Status(code) < inference::LogStream()
|
||||
#define ERROR_INFER_STATUS(status, type, msg) \
|
||||
MSI_LOG_ERROR << msg; \
|
||||
status = inference::Status(type, msg)
|
||||
|
||||
} // namespace mindspore::inference
|
||||
|
||||
|
|
|
@ -74,6 +74,10 @@ class MS_API InferSession {
|
|||
const RequestBase & /*request*/, ReplyBase & /*reply*/) {
|
||||
return FAILED;
|
||||
}
|
||||
virtual Status GetModelInputsInfo(uint32_t graph_id, std::vector<inference::InferTensor> *tensor_list) const {
|
||||
Status status(SUCCESS);
|
||||
return status;
|
||||
}
|
||||
static std::shared_ptr<InferSession> CreateSession(const std::string &device, uint32_t device_id);
|
||||
};
|
||||
|
||||
|
|
|
@ -212,5 +212,31 @@ std::string AscendInferenceSession::InputsInfo(const std::vector<ParameterPtr> &
|
|||
return graph + " " + actual;
|
||||
}
|
||||
|
||||
void AscendInferenceSession::GetModelInputsInfo(uint32_t graph_id, std::vector<tensor::TensorPtr> *inputs) const {
|
||||
MS_LOG(INFO) << "Start get model inputs, graph id : " << graph_id;
|
||||
auto kernel_graph = GetGraph(graph_id);
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph);
|
||||
auto kernel_graph_inputs = kernel_graph->inputs();
|
||||
vector<ParameterPtr> paras;
|
||||
// find parameters of graph inputs
|
||||
for (size_t i = 0; i < kernel_graph_inputs.size(); ++i) {
|
||||
if (!kernel_graph_inputs[i]->isa<Parameter>()) {
|
||||
MS_LOG(ERROR) << "Kernel graph inputs have anfnode which is not Parameter.";
|
||||
continue;
|
||||
}
|
||||
auto parameter = kernel_graph_inputs[i]->cast<ParameterPtr>();
|
||||
if (!AnfAlgo::IsParameterWeight(parameter)) {
|
||||
vector<int> input_shape;
|
||||
auto parameter_shape = AnfAlgo::GetOutputDeviceShape(parameter, 0);
|
||||
(void)std::transform(parameter_shape.begin(), parameter_shape.end(), std::back_inserter(input_shape),
|
||||
[](const size_t dim) { return static_cast<int>(dim); });
|
||||
auto kernel_build_info = AnfAlgo::GetSelectKernelBuildInfo(parameter);
|
||||
auto data_type = kernel_build_info->GetOutputDeviceType(0);
|
||||
auto ms_tensor = std::make_shared<tensor::Tensor>(data_type, input_shape);
|
||||
inputs->push_back(ms_tensor);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace session
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -45,6 +45,7 @@ class AscendInferenceSession : public AscendSession {
|
|||
template <typename T>
|
||||
std::string PrintInputShape(std::vector<T> shape) const;
|
||||
std::string InputsInfo(const std::vector<ParameterPtr> ¶s, const std::vector<tensor::TensorPtr> &inputs) const;
|
||||
void GetModelInputsInfo(uint32_t graph_id, std::vector<tensor::TensorPtr> *inputs) const override;
|
||||
};
|
||||
MS_REG_SESSION(kDavinciInferenceDevice, AscendInferenceSession);
|
||||
} // namespace session
|
||||
|
|
|
@ -220,7 +220,7 @@ Status MSInferSession::ExecuteModel(uint32_t model_id, const RequestBase &reques
|
|||
for (const auto &tensor : outputs) {
|
||||
auto out_tensor = reply.add();
|
||||
if (out_tensor == nullptr) {
|
||||
MS_LOG(ERROR) << "Execute Model " << model_id << " Failed, add output tensor failed";
|
||||
MS_LOG(ERROR) << "Execute Model " << model_id << " Failed add output tensor failed";
|
||||
return FAILED;
|
||||
}
|
||||
MSTensor2ServingTensor(tensor, *out_tensor);
|
||||
|
@ -374,4 +374,18 @@ Status MSInferSession::CheckModelInputs(uint32_t graph_id, const std::vector<ten
|
|||
return SUCCESS;
|
||||
}
|
||||
|
||||
Status MSInferSession::GetModelInputsInfo(uint32_t model_id, std::vector<inference::InferTensor> *tensor_list) const {
|
||||
vector<tensor::TensorPtr> inputs;
|
||||
session_impl_->GetModelInputsInfo(model_id, &inputs);
|
||||
if (inputs.size() == 0) {
|
||||
MS_LOG(ERROR) << "The model inputs is NULL";
|
||||
return FAILED;
|
||||
}
|
||||
for (const auto &tensor : inputs) {
|
||||
InferTensor infer_tensor = InferTensor();
|
||||
MSTensor2ServingTensor(tensor, infer_tensor);
|
||||
tensor_list->push_back(infer_tensor);
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
} // namespace mindspore::inference
|
||||
|
|
|
@ -43,6 +43,7 @@ class MSInferSession : public InferSession {
|
|||
Status LoadModelFromFile(const std::string &file_name, uint32_t &model_id) override;
|
||||
Status UnloadModel(uint32_t model_id) override;
|
||||
Status ExecuteModel(uint32_t model_id, const RequestBase &inputs, ReplyBase &outputs) override;
|
||||
Status GetModelInputsInfo(uint32_t graph_id, std::vector<inference::InferTensor> *tensor_list) const override;
|
||||
|
||||
private:
|
||||
std::shared_ptr<session::SessionBasic> session_impl_ = nullptr;
|
||||
|
|
|
@ -97,6 +97,7 @@ class SessionBasic {
|
|||
std::string *error_msg) const {
|
||||
return true;
|
||||
}
|
||||
virtual void GetModelInputsInfo(uint32_t graph_id, std::vector<tensor::TensorPtr> *inputs) const {}
|
||||
|
||||
#ifdef ENABLE_DEBUGGER
|
||||
// set debugger
|
||||
|
|
|
@ -93,7 +93,10 @@ if (ENABLE_ACL)
|
|||
endif ()
|
||||
|
||||
include_directories(${CMAKE_BINARY_DIR})
|
||||
|
||||
add_executable(ms_serving ${SERVING_SRC})
|
||||
#libevent
|
||||
target_link_libraries(ms_serving mindspore::event mindspore::event_pthreads)
|
||||
|
||||
target_link_libraries(ms_serving ${_REFLECTION} ${_GRPC_GRPCPP} ${_PROTOBUF_LIBPROTOBUF} pthread)
|
||||
if (ENABLE_D)
|
||||
|
|
|
@ -0,0 +1,423 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "serving/ms_service.pb.h"
|
||||
#include "util/status.h"
|
||||
#include "core/session.h"
|
||||
#include "core/http_process.h"
|
||||
|
||||
using ms_serving::MSService;
|
||||
using ms_serving::PredictReply;
|
||||
using ms_serving::PredictRequest;
|
||||
using nlohmann::json;
|
||||
|
||||
namespace mindspore {
|
||||
namespace serving {
|
||||
|
||||
const int BUF_MAX = 0x1FFFFF;
|
||||
static constexpr char HTTP_DATA[] = "data";
|
||||
static constexpr char HTTP_TENSOR[] = "tensor";
|
||||
enum HTTP_TYPE { TYPE_DATA = 0, TYPE_TENSOR };
|
||||
enum HTTP_DATA_TYPE { HTTP_DATA_NONE, HTTP_DATA_INT, HTTP_DATA_FLOAT };
|
||||
static const std::map<HTTP_DATA_TYPE, ms_serving::DataType> http_to_infer_map{
|
||||
{HTTP_DATA_NONE, ms_serving::MS_UNKNOWN},
|
||||
{HTTP_DATA_INT, ms_serving::MS_INT32},
|
||||
{HTTP_DATA_FLOAT, ms_serving::MS_FLOAT32}};
|
||||
|
||||
Status GetPostMessage(struct evhttp_request *req, std::string *buf) {
|
||||
Status status(SUCCESS);
|
||||
size_t post_size = evbuffer_get_length(req->input_buffer);
|
||||
if (post_size == 0) {
|
||||
ERROR_INFER_STATUS(status, INVALID_INPUTS, "http message invalid");
|
||||
return status;
|
||||
} else {
|
||||
size_t copy_len = post_size > BUF_MAX ? BUF_MAX : post_size;
|
||||
buf->resize(copy_len);
|
||||
memcpy(buf->data(), evbuffer_pullup(req->input_buffer, -1), copy_len);
|
||||
return status;
|
||||
}
|
||||
}
|
||||
Status CheckRequestValid(struct evhttp_request *http_request) {
|
||||
Status status(SUCCESS);
|
||||
switch (evhttp_request_get_command(http_request)) {
|
||||
case EVHTTP_REQ_POST:
|
||||
return status;
|
||||
default:
|
||||
ERROR_INFER_STATUS(status, INVALID_INPUTS, "http message only support POST right now");
|
||||
return status;
|
||||
}
|
||||
}
|
||||
|
||||
void ErrorMessage(struct evhttp_request *req, Status status) {
|
||||
json error_json = {{"error_message", status.StatusMessage()}};
|
||||
std::string out_error_str = error_json.dump();
|
||||
struct evbuffer *retbuff = evbuffer_new();
|
||||
evbuffer_add(retbuff, out_error_str.data(), out_error_str.size());
|
||||
evhttp_send_reply(req, HTTP_OK, "Client", retbuff);
|
||||
evbuffer_free(retbuff);
|
||||
}
|
||||
|
||||
Status CheckMessageValid(const json &message_info, HTTP_TYPE *type) {
|
||||
Status status(SUCCESS);
|
||||
int count = 0;
|
||||
if (message_info.find(HTTP_DATA) != message_info.end()) {
|
||||
*type = TYPE_DATA;
|
||||
count++;
|
||||
}
|
||||
if (message_info.find(HTTP_TENSOR) != message_info.end()) {
|
||||
*type = TYPE_TENSOR;
|
||||
count++;
|
||||
}
|
||||
if (count != 1) {
|
||||
ERROR_INFER_STATUS(status, INVALID_INPUTS, "http message must have only one type of (data, tensor, text)");
|
||||
return status;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
Status GetDataFromJson(const json &json_data, std::string *data, HTTP_DATA_TYPE *type) {
|
||||
Status status(SUCCESS);
|
||||
if (json_data.is_number_integer()) {
|
||||
if (*type == HTTP_DATA_NONE) {
|
||||
*type = HTTP_DATA_INT;
|
||||
} else if (*type != HTTP_DATA_INT) {
|
||||
ERROR_INFER_STATUS(status, INVALID_INPUTS, "the input data type should be consistent");
|
||||
return status;
|
||||
}
|
||||
auto s_data = json_data.get<int32_t>();
|
||||
data->append(reinterpret_cast<char *>(&s_data), sizeof(int32_t));
|
||||
MSI_LOG(INFO) << "data size " << data->size();
|
||||
} else if (json_data.is_number_float()) {
|
||||
if (*type == HTTP_DATA_NONE) {
|
||||
*type = HTTP_DATA_FLOAT;
|
||||
} else if (*type != HTTP_DATA_FLOAT) {
|
||||
ERROR_INFER_STATUS(status, INVALID_INPUTS, "the input data type should be consistent");
|
||||
return status;
|
||||
}
|
||||
auto s_data = json_data.get<float>();
|
||||
data->append(reinterpret_cast<char *>(&s_data), sizeof(float));
|
||||
MSI_LOG(INFO) << "data size " << data->size();
|
||||
} else {
|
||||
ERROR_INFER_STATUS(status, INVALID_INPUTS, "the input data type should be int or float");
|
||||
return status;
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
Status RecusiveGetTensor(const json &json_data, size_t depth, std::vector<int> *shape, std::string *data,
|
||||
HTTP_DATA_TYPE *type) {
|
||||
Status status(SUCCESS);
|
||||
if (depth >= 10) {
|
||||
ERROR_INFER_STATUS(status, INVALID_INPUTS, "the tensor shape dims is larger than 10");
|
||||
return status;
|
||||
}
|
||||
if (!json_data.is_array()) {
|
||||
ERROR_INFER_STATUS(status, INVALID_INPUTS, "the tensor is constructed illegally");
|
||||
return status;
|
||||
}
|
||||
int cur_dim = json_data.size();
|
||||
if (shape->size() <= depth) {
|
||||
shape->push_back(cur_dim);
|
||||
} else if ((*shape)[depth] != cur_dim) {
|
||||
return INFER_STATUS(INVALID_INPUTS) << "the tensor shape is constructed illegally";
|
||||
}
|
||||
if (json_data.at(0).is_array()) {
|
||||
for (const auto &item : json_data) {
|
||||
status = RecusiveGetTensor(item, depth + 1, shape, data, type);
|
||||
if (status != SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// last dim, read the data
|
||||
for (auto item : json_data) {
|
||||
status = GetDataFromJson(item, data, type);
|
||||
if (status != SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
}
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
Status TransDataToPredictRequest(const json &message_info, PredictRequest *request) {
|
||||
Status status = SUCCESS;
|
||||
auto tensors = message_info.find(HTTP_DATA);
|
||||
if (tensors == message_info.end()) {
|
||||
ERROR_INFER_STATUS(status, INVALID_INPUTS, "http message do not have data type");
|
||||
return status;
|
||||
}
|
||||
|
||||
if (tensors->size() == 0) {
|
||||
ERROR_INFER_STATUS(status, INVALID_INPUTS, "the input tensor list is null");
|
||||
return status;
|
||||
}
|
||||
for (const auto &tensor : *tensors) {
|
||||
std::string msg_data;
|
||||
HTTP_DATA_TYPE type{HTTP_DATA_NONE};
|
||||
if (!tensor.is_array()) {
|
||||
ERROR_INFER_STATUS(status, INVALID_INPUTS, "the tensor is constructed illegally");
|
||||
return status;
|
||||
}
|
||||
if (tensor.size() == 0) {
|
||||
ERROR_INFER_STATUS(status, INVALID_INPUTS, "the input tensor is null");
|
||||
return status;
|
||||
}
|
||||
for (const auto &tensor_data : tensor) {
|
||||
status = GetDataFromJson(tensor_data, &msg_data, &type);
|
||||
if (status != SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
}
|
||||
auto iter = http_to_infer_map.find(type);
|
||||
if (iter == http_to_infer_map.end()) {
|
||||
ERROR_INFER_STATUS(status, INVALID_INPUTS, "the input type is not supported right now");
|
||||
return status;
|
||||
}
|
||||
|
||||
auto infer_tensor = request->add_data();
|
||||
infer_tensor->set_tensor_type(iter->second);
|
||||
infer_tensor->set_data(msg_data.data(), msg_data.size());
|
||||
}
|
||||
// get model required shape
|
||||
std::vector<inference::InferTensor> tensor_list;
|
||||
status = Session::Instance().GetModelInputsInfo(tensor_list);
|
||||
if (status != SUCCESS) {
|
||||
ERROR_INFER_STATUS(status, FAILED, "get model inputs info failed");
|
||||
return status;
|
||||
}
|
||||
if (request->data_size() != static_cast<int64_t>(tensor_list.size())) {
|
||||
ERROR_INFER_STATUS(status, INVALID_INPUTS, "the inputs number is not equal to model required");
|
||||
return status;
|
||||
}
|
||||
for (int i = 0; i < request->data_size(); i++) {
|
||||
for (size_t j = 0; j < tensor_list[i].shape().size(); ++j) {
|
||||
request->mutable_data(i)->mutable_tensor_shape()->add_dims(tensor_list[i].shape()[i]);
|
||||
}
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
Status TransTensorToPredictRequest(const json &message_info, PredictRequest *request) {
|
||||
Status status(SUCCESS);
|
||||
auto tensors = message_info.find(HTTP_TENSOR);
|
||||
if (tensors == message_info.end()) {
|
||||
ERROR_INFER_STATUS(status, INVALID_INPUTS, "http message do not have tensor type");
|
||||
return status;
|
||||
}
|
||||
|
||||
for (const auto &tensor : *tensors) {
|
||||
std::vector<int> shape;
|
||||
std::string msg_data;
|
||||
HTTP_DATA_TYPE type{HTTP_DATA_NONE};
|
||||
RecusiveGetTensor(tensor, 0, &shape, &msg_data, &type);
|
||||
MSI_LOG(INFO) << shape << ", data = " << msg_data.size();
|
||||
auto iter = http_to_infer_map.find(type);
|
||||
if (iter == http_to_infer_map.end()) {
|
||||
ERROR_INFER_STATUS(status, INVALID_INPUTS, "the input type is not supported right now");
|
||||
return status;
|
||||
}
|
||||
auto infer_tensor = request->add_data();
|
||||
infer_tensor->set_tensor_type(iter->second);
|
||||
infer_tensor->set_data(msg_data.data(), msg_data.size());
|
||||
for (const auto dim : shape) {
|
||||
infer_tensor->mutable_tensor_shape()->add_dims(dim);
|
||||
}
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
Status TransHTTPMsgToPredictRequest(struct evhttp_request *http_request, PredictRequest *request, HTTP_TYPE *type) {
|
||||
Status status = CheckRequestValid(http_request);
|
||||
if (status != SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
std::string post_message;
|
||||
status = GetPostMessage(http_request, &post_message);
|
||||
if (status != SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
|
||||
json message_info;
|
||||
try {
|
||||
message_info = nlohmann::json::parse(post_message);
|
||||
} catch (nlohmann::json::exception &e) {
|
||||
std::string json_exception = e.what();
|
||||
std::string error_message = "Illegal JSON format." + json_exception;
|
||||
ERROR_INFER_STATUS(status, INVALID_INPUTS, error_message);
|
||||
return status;
|
||||
}
|
||||
|
||||
status = CheckMessageValid(message_info, type);
|
||||
if (status != SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
switch (*type) {
|
||||
case TYPE_DATA:
|
||||
status = TransDataToPredictRequest(message_info, request);
|
||||
break;
|
||||
case TYPE_TENSOR:
|
||||
status = TransTensorToPredictRequest(message_info, request);
|
||||
break;
|
||||
default:
|
||||
ERROR_INFER_STATUS(status, INVALID_INPUTS, "http message must have only one type of (data, tensor)");
|
||||
return status;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
Status GetJsonFromTensor(const ms_serving::Tensor &tensor, int len, int *pos, json *out_json) {
|
||||
Status status(SUCCESS);
|
||||
switch (tensor.tensor_type()) {
|
||||
case ms_serving::MS_INT32: {
|
||||
std::vector<int> result_tensor;
|
||||
for (int j = 0; j < len; j++) {
|
||||
int val;
|
||||
memcpy(&val, reinterpret_cast<const int *>(tensor.data().data()) + *pos + j, sizeof(int));
|
||||
result_tensor.push_back(val);
|
||||
}
|
||||
*out_json = result_tensor;
|
||||
*pos += len;
|
||||
break;
|
||||
}
|
||||
case ms_serving::MS_FLOAT32: {
|
||||
std::vector<float> result_tensor;
|
||||
for (int j = 0; j < len; j++) {
|
||||
float val;
|
||||
memcpy(&val, reinterpret_cast<const float *>(tensor.data().data()) + *pos + j, sizeof(float));
|
||||
result_tensor.push_back(val);
|
||||
}
|
||||
*out_json = result_tensor;
|
||||
*pos += len;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
MSI_LOG(ERROR) << "the result type is not supported in restful api, type is " << tensor.tensor_type();
|
||||
ERROR_INFER_STATUS(status, FAILED, "reply have unsupported type");
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
Status TransPredictReplyToData(const PredictReply &reply, json *out_json) {
|
||||
Status status(SUCCESS);
|
||||
for (int i = 0; i < reply.result_size(); i++) {
|
||||
json tensor_json;
|
||||
int num = 1;
|
||||
for (auto j = 0; j < reply.result(i).tensor_shape().dims_size(); j++) {
|
||||
num *= reply.result(i).tensor_shape().dims(j);
|
||||
}
|
||||
int pos = 0;
|
||||
status = GetJsonFromTensor(reply.result(i), num, &pos, &tensor_json);
|
||||
if (status != SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
(*out_json)["data"].push_back(tensor_json);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
Status RecusiveGetJson(const ms_serving::Tensor &tensor, int depth, int *pos, json *out_json) {
|
||||
Status status(SUCCESS);
|
||||
if (depth >= 10) {
|
||||
ERROR_INFER_STATUS(status, FAILED, "result tensor shape dims is larger than 10");
|
||||
return status;
|
||||
}
|
||||
if (depth == tensor.tensor_shape().dims_size() - 1) {
|
||||
status = GetJsonFromTensor(tensor, tensor.tensor_shape().dims(depth), pos, out_json);
|
||||
if (status != SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
} else {
|
||||
for (int i = 0; i < tensor.tensor_shape().dims(depth); i++) {
|
||||
json tensor_json;
|
||||
status = RecusiveGetJson(tensor, depth + 1, pos, &tensor_json);
|
||||
if (status != SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
out_json->push_back(tensor_json);
|
||||
}
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
Status TransPredictReplyToTensor(const PredictReply &reply, json *out_json) {
|
||||
Status status(SUCCESS);
|
||||
for (int i = 0; i < reply.result_size(); i++) {
|
||||
json tensor_json;
|
||||
int pos = 0;
|
||||
status = RecusiveGetJson(reply.result(i), 0, &pos, &tensor_json);
|
||||
if (status != SUCCESS) {
|
||||
return status;
|
||||
}
|
||||
(*out_json)["tensor"].push_back(tensor_json);
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
Status TransPredictReplyToHTTPMsg(const PredictReply &reply, const HTTP_TYPE &type, struct evbuffer *buf) {
|
||||
Status status(SUCCESS);
|
||||
json out_json;
|
||||
switch (type) {
|
||||
case TYPE_DATA:
|
||||
status = TransPredictReplyToData(reply, &out_json);
|
||||
break;
|
||||
case TYPE_TENSOR:
|
||||
status = TransPredictReplyToTensor(reply, &out_json);
|
||||
break;
|
||||
default:
|
||||
ERROR_INFER_STATUS(status, FAILED, "http message must have only one type of (data, tensor)");
|
||||
return status;
|
||||
}
|
||||
|
||||
std::string out_str = out_json.dump();
|
||||
evbuffer_add(buf, out_str.data(), out_str.size());
|
||||
return status;
|
||||
}
|
||||
|
||||
void http_handler_msg(struct evhttp_request *req, void *arg) {
|
||||
std::cout << "in handle" << std::endl;
|
||||
PredictRequest request;
|
||||
PredictReply reply;
|
||||
HTTP_TYPE type;
|
||||
auto status = TransHTTPMsgToPredictRequest(req, &request, &type);
|
||||
if (status != SUCCESS) {
|
||||
ErrorMessage(req, status);
|
||||
MSI_LOG(ERROR) << "restful trans to request failed";
|
||||
return;
|
||||
}
|
||||
MSI_TIME_STAMP_START(Predict)
|
||||
status = Session::Instance().Predict(request, reply);
|
||||
if (status != SUCCESS) {
|
||||
ErrorMessage(req, status);
|
||||
MSI_LOG(ERROR) << "restful predict failed";
|
||||
}
|
||||
MSI_TIME_STAMP_END(Predict)
|
||||
struct evbuffer *retbuff = evbuffer_new();
|
||||
status = TransPredictReplyToHTTPMsg(reply, type, retbuff);
|
||||
if (status != SUCCESS) {
|
||||
ErrorMessage(req, status);
|
||||
MSI_LOG(ERROR) << "restful trans to reply failed";
|
||||
return;
|
||||
}
|
||||
evhttp_send_reply(req, HTTP_OK, "Client", retbuff);
|
||||
evbuffer_free(retbuff);
|
||||
}
|
||||
|
||||
} // namespace serving
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,29 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_SERVING_HTTP_PROCESS_H
|
||||
#define MINDSPORE_SERVING_HTTP_PROCESS_H
|
||||
|
||||
#include <evhttp.h>
|
||||
#include <event.h>
|
||||
#include <event2/http.h>
|
||||
#include <event2/http_struct.h>
|
||||
|
||||
namespace mindspore {
|
||||
namespace serving {
|
||||
void http_handler_msg(struct evhttp_request *req, void *arg);
|
||||
} // namespace serving
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_SERVER_H
|
|
@ -14,23 +14,25 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
#include "core/server.h"
|
||||
#include <evhttp.h>
|
||||
#include <event.h>
|
||||
#include <event2/thread.h>
|
||||
#include <grpcpp/grpcpp.h>
|
||||
#include <grpcpp/health_check_service_interface.h>
|
||||
#include <grpcpp/ext/proto_server_reflection_plugin.h>
|
||||
#include <future>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
#include <future>
|
||||
#include <chrono>
|
||||
|
||||
#include "include/infer_log.h"
|
||||
#include "serving/ms_service.grpc.pb.h"
|
||||
#include "core/util/option_parser.h"
|
||||
#include "core/version_control/version_controller.h"
|
||||
#include "core/util/file_system_operation.h"
|
||||
#include "core/session.h"
|
||||
#include "core/serving_tensor.h"
|
||||
#include "core/http_process.h"
|
||||
|
||||
|
||||
using ms_serving::MSService;
|
||||
using ms_serving::PredictReply;
|
||||
|
@ -39,93 +41,6 @@ using ms_serving::PredictRequest;
|
|||
namespace mindspore {
|
||||
namespace serving {
|
||||
|
||||
#define MSI_TIME_STAMP_START(name) auto time_start_##name = std::chrono::steady_clock::now();
|
||||
#define MSI_TIME_STAMP_END(name) \
|
||||
{ \
|
||||
auto time_end_##name = std::chrono::steady_clock::now(); \
|
||||
auto time_cost = std::chrono::duration<double, std::milli>(time_end_##name - time_start_##name).count(); \
|
||||
MSI_LOG_INFO << #name " Time Cost # " << time_cost << " ms ---------------------"; \
|
||||
}
|
||||
|
||||
Status Session::CreatDeviceSession(const std::string &device, uint32_t device_id) {
|
||||
session_ = inference::InferSession::CreateSession(device, device_id);
|
||||
if (session_ == nullptr) {
|
||||
MSI_LOG(ERROR) << "Creat Session Failed";
|
||||
return FAILED;
|
||||
}
|
||||
device_type_ = device;
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
Session &Session::Instance() {
|
||||
static Session instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
Status Session::Predict(const PredictRequest &request, PredictReply &reply) {
|
||||
if (!model_loaded_) {
|
||||
MSI_LOG(ERROR) << "the model has not loaded";
|
||||
return FAILED;
|
||||
}
|
||||
if (session_ == nullptr) {
|
||||
MSI_LOG(ERROR) << "the inference session has not be initialized";
|
||||
return FAILED;
|
||||
}
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
MSI_LOG(INFO) << "run Predict";
|
||||
|
||||
if (request.images_size() > 0) {
|
||||
ServingImagesRequest serving_images(request);
|
||||
ServingRequest serving_request(request);
|
||||
ServingReply serving_reply(reply);
|
||||
Status ret = session_->ExecuteModel(graph_id_, serving_images, serving_request, serving_reply);
|
||||
if (ret != SUCCESS) {
|
||||
MSI_LOG(ERROR) << "execute model with images return failed";
|
||||
return ret;
|
||||
}
|
||||
} else if (request.data_size() > 0) {
|
||||
ServingRequest serving_request(request);
|
||||
ServingReply serving_reply(reply);
|
||||
Status ret = session_->ExecuteModel(graph_id_, serving_request, serving_reply);
|
||||
if (ret != SUCCESS) {
|
||||
MSI_LOG(ERROR) << "execute model with datas return failed";
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
MSI_LOG(INFO) << "run Predict finished";
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
Status Session::Warmup(const MindSporeModelPtr model) {
|
||||
if (session_ == nullptr) {
|
||||
MSI_LOG(ERROR) << "The CreatDeviceSession should be called, before warmup";
|
||||
return FAILED;
|
||||
}
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
std::string file_name = model->GetModelPath() + '/' + model->GetModelName();
|
||||
model_loaded_ = false;
|
||||
MSI_TIME_STAMP_START(LoadModelFromFile)
|
||||
auto ret = session_->LoadModelFromFile(file_name, graph_id_);
|
||||
MSI_TIME_STAMP_END(LoadModelFromFile)
|
||||
if (ret != SUCCESS) {
|
||||
MSI_LOG(ERROR) << "Load graph model failed, file name is " << file_name.c_str();
|
||||
return ret;
|
||||
}
|
||||
model_loaded_ = true;
|
||||
MSI_LOG(INFO) << "Session Warmup finished";
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
Status Session::Clear() {
|
||||
if (session_ != nullptr) {
|
||||
session_->UnloadModel(graph_id_);
|
||||
session_->FinalizeEnv();
|
||||
session_ = nullptr;
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
namespace {
|
||||
static const uint32_t uint32max = 0x7FFFFFFF;
|
||||
std::promise<void> exit_requested;
|
||||
|
@ -179,6 +94,7 @@ Status Server::BuildAndStart() {
|
|||
signal(SIGINT, HandleSignal);
|
||||
signal(SIGTERM, HandleSignal);
|
||||
Status res;
|
||||
|
||||
auto option_args = Options::Instance().GetArgs();
|
||||
std::string server_address = "0.0.0.0:" + std::to_string(option_args->grpc_port);
|
||||
std::string model_path = option_args->model_path;
|
||||
|
@ -198,6 +114,26 @@ Status Server::BuildAndStart() {
|
|||
ClearEnv();
|
||||
return res;
|
||||
}
|
||||
|
||||
// init http server
|
||||
struct evhttp *http_server = NULL;
|
||||
struct event_base *eb = NULL;
|
||||
int32_t http_port = option_args->rest_api_port;
|
||||
std::string http_addr = "0.0.0.0";
|
||||
event_init();
|
||||
evthread_use_pthreads();
|
||||
eb = event_base_new();
|
||||
http_server = evhttp_new(eb);
|
||||
evhttp_bind_socket_with_handle(http_server, http_addr.c_str(), http_port);
|
||||
// http_server = evhttp_start(http_addr.c_str(), http_port);
|
||||
if (http_server == NULL) {
|
||||
MSI_LOG(ERROR) << "http server start failed.";
|
||||
return res;
|
||||
}
|
||||
evhttp_set_timeout(http_server, 5);
|
||||
evhttp_set_gencb(http_server, http_handler_msg, NULL);
|
||||
|
||||
// grpc server
|
||||
MSServiceImpl ms_service;
|
||||
grpc::EnableDefaultHealthCheckService(true);
|
||||
grpc::reflection::InitProtoReflectionServerBuilderPlugin();
|
||||
|
@ -214,14 +150,23 @@ Status Server::BuildAndStart() {
|
|||
ClearEnv();
|
||||
return FAILED;
|
||||
}
|
||||
auto grpc_server_run = [&server]() { server->Wait(); };
|
||||
std::thread serving_thread(grpc_server_run);
|
||||
MSI_LOG(INFO) << "MS Serving listening on " << server_address;
|
||||
auto grpc_server_run = [&server, &server_address]() {
|
||||
MSI_LOG(INFO) << "MS Serving grpc listening on " << server_address;
|
||||
server->Wait();
|
||||
};
|
||||
auto http_server_run = [&eb, &http_addr, &http_port]() {
|
||||
MSI_LOG(INFO) << "MS Serving restful listening on " << http_addr << ":" << http_port;
|
||||
event_base_dispatch(eb);
|
||||
};
|
||||
std::thread grpc_thread(grpc_server_run);
|
||||
std::thread restful_thread(http_server_run);
|
||||
auto exit_future = exit_requested.get_future();
|
||||
exit_future.wait();
|
||||
ClearEnv();
|
||||
server->Shutdown();
|
||||
serving_thread.join();
|
||||
event_base_loopexit(eb, NULL);
|
||||
grpc_thread.join();
|
||||
restful_thread.join();
|
||||
return SUCCESS;
|
||||
}
|
||||
} // namespace serving
|
||||
|
|
|
@ -16,46 +16,10 @@
|
|||
#ifndef MINDSPORE_SERVER_H
|
||||
#define MINDSPORE_SERVER_H
|
||||
|
||||
#include <string>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "util/status.h"
|
||||
#include "version_control/model.h"
|
||||
#include "include/inference.h"
|
||||
#include "serving/ms_service.pb.h"
|
||||
#include "serving/ms_service.grpc.pb.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace serving {
|
||||
|
||||
using ms_serving::PredictReply;
|
||||
using ms_serving::PredictRequest;
|
||||
using inference::Status;
|
||||
using inference::SUCCESS;
|
||||
using inference::FAILED;
|
||||
using inference::INVALID_INPUTS;
|
||||
|
||||
class Session {
|
||||
public:
|
||||
static Session &Instance();
|
||||
Status CreatDeviceSession(const std::string &device, uint32_t device_id);
|
||||
// Status Predict(const inference::MultiTensor &inputs, inference::MultiTensor &output);
|
||||
Status Predict(const PredictRequest &request, PredictReply &reply);
|
||||
Status Warmup(const MindSporeModelPtr model);
|
||||
Status Clear();
|
||||
|
||||
private:
|
||||
Session() = default;
|
||||
~Session() = default;
|
||||
int sesseion_id_{0};
|
||||
std::shared_ptr<inference::InferSession> session_{nullptr};
|
||||
bool model_loaded_ = false;
|
||||
uint32_t graph_id_{0};
|
||||
std::mutex mutex_;
|
||||
std::string device_type_;
|
||||
};
|
||||
|
||||
class Server {
|
||||
public:
|
||||
Server() = default;
|
||||
|
|
|
@ -0,0 +1,136 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "core/session.h"
|
||||
#include <grpcpp/grpcpp.h>
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
#include <chrono>
|
||||
|
||||
#include "include/infer_log.h"
|
||||
#include "serving/ms_service.grpc.pb.h"
|
||||
#include "core/util/option_parser.h"
|
||||
#include "core/version_control/version_controller.h"
|
||||
#include "core/util/file_system_operation.h"
|
||||
#include "core/serving_tensor.h"
|
||||
|
||||
using ms_serving::MSService;
|
||||
using ms_serving::PredictReply;
|
||||
using ms_serving::PredictRequest;
|
||||
|
||||
namespace mindspore {
|
||||
namespace serving {
|
||||
|
||||
Status Session::CreatDeviceSession(const std::string &device, uint32_t device_id) {
|
||||
session_ = inference::InferSession::CreateSession(device, device_id);
|
||||
if (session_ == nullptr) {
|
||||
MSI_LOG(ERROR) << "Creat Session Failed";
|
||||
return FAILED;
|
||||
}
|
||||
device_type_ = device;
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
Session &Session::Instance() {
|
||||
static Session instance;
|
||||
return instance;
|
||||
}
|
||||
|
||||
Status Session::Predict(const PredictRequest &request, PredictReply &reply) {
|
||||
if (!model_loaded_) {
|
||||
MSI_LOG(ERROR) << "the model has not loaded";
|
||||
return FAILED;
|
||||
}
|
||||
if (session_ == nullptr) {
|
||||
MSI_LOG(ERROR) << "the inference session has not be initialized";
|
||||
return FAILED;
|
||||
}
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
MSI_LOG(INFO) << "run Predict";
|
||||
|
||||
if (request.images_size() > 0) {
|
||||
ServingImagesRequest serving_images(request);
|
||||
ServingRequest serving_request(request);
|
||||
ServingReply serving_reply(reply);
|
||||
Status ret = session_->ExecuteModel(graph_id_, serving_images, serving_request, serving_reply);
|
||||
if (ret != SUCCESS) {
|
||||
MSI_LOG(ERROR) << "execute model with images return failed";
|
||||
return ret;
|
||||
}
|
||||
} else if (request.data_size() > 0) {
|
||||
ServingRequest serving_request(request);
|
||||
ServingReply serving_reply(reply);
|
||||
Status ret = session_->ExecuteModel(graph_id_, serving_request, serving_reply);
|
||||
if (ret != SUCCESS) {
|
||||
MSI_LOG(ERROR) << "execute model with datas return failed";
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
MSI_LOG(INFO) << "run Predict finished";
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
Status Session::Warmup(const MindSporeModelPtr model) {
|
||||
if (session_ == nullptr) {
|
||||
MSI_LOG(ERROR) << "The CreatDeviceSession should be called, before warmup";
|
||||
return FAILED;
|
||||
}
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
std::string file_name = model->GetModelPath() + '/' + model->GetModelName();
|
||||
model_loaded_ = false;
|
||||
MSI_TIME_STAMP_START(LoadModelFromFile)
|
||||
auto ret = session_->LoadModelFromFile(file_name, graph_id_);
|
||||
MSI_TIME_STAMP_END(LoadModelFromFile)
|
||||
if (ret != SUCCESS) {
|
||||
MSI_LOG(ERROR) << "Load graph model failed, file name is " << file_name.c_str();
|
||||
return ret;
|
||||
}
|
||||
model_loaded_ = true;
|
||||
MSI_LOG(INFO) << "Session Warmup finished";
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
Status Session::Clear() {
|
||||
if (session_ != nullptr) {
|
||||
session_->UnloadModel(graph_id_);
|
||||
session_->FinalizeEnv();
|
||||
session_ = nullptr;
|
||||
}
|
||||
return SUCCESS;
|
||||
}
|
||||
|
||||
Status Session::GetModelInputsInfo(std::vector<inference::InferTensor> &tensor_list) {
|
||||
if (!model_loaded_) {
|
||||
MSI_LOG(ERROR) << "the model has not loaded";
|
||||
return FAILED;
|
||||
}
|
||||
if (session_ == nullptr) {
|
||||
MSI_LOG(ERROR) << "the inference session has not be initialized";
|
||||
return FAILED;
|
||||
}
|
||||
std::lock_guard<std::mutex> lock(mutex_);
|
||||
Status ret = session_->GetModelInputsInfo(graph_id_, &tensor_list);
|
||||
if (ret != SUCCESS) {
|
||||
MSI_LOG(ERROR) << "get model inputs info failed";
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
} // namespace serving
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,62 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_SERVING_SESSION_H
|
||||
#define MINDSPORE_SERVING_SESSION_H
|
||||
|
||||
#include <string>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "util/status.h"
|
||||
#include "version_control/model.h"
|
||||
#include "include/inference.h"
|
||||
#include "serving/ms_service.pb.h"
|
||||
#include "serving/ms_service.grpc.pb.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace serving {
|
||||
|
||||
using inference::FAILED;
|
||||
using inference::INVALID_INPUTS;
|
||||
using inference::Status;
|
||||
using inference::SUCCESS;
|
||||
using ms_serving::PredictReply;
|
||||
using ms_serving::PredictRequest;
|
||||
|
||||
class Session {
|
||||
public:
|
||||
static Session &Instance();
|
||||
Status CreatDeviceSession(const std::string &device, uint32_t device_id);
|
||||
// Status Predict(const inference::MultiTensor &inputs, inference::MultiTensor &output);
|
||||
Status Predict(const PredictRequest &request, PredictReply &reply);
|
||||
Status Warmup(const MindSporeModelPtr model);
|
||||
Status Clear();
|
||||
Status GetModelInputsInfo(std::vector<inference::InferTensor> &tensor_list);
|
||||
|
||||
private:
|
||||
Session() = default;
|
||||
~Session() = default;
|
||||
int sesseion_id_{0};
|
||||
std::shared_ptr<inference::InferSession> session_{nullptr};
|
||||
bool model_loaded_ = false;
|
||||
uint32_t graph_id_{0};
|
||||
std::mutex mutex_;
|
||||
std::string device_type_;
|
||||
};
|
||||
|
||||
} // namespace serving
|
||||
} // namespace mindspore
|
||||
#endif // MINDSPORE_SERVER_H
|
|
@ -160,6 +160,8 @@ void Options::CreateOptions() {
|
|||
std::vector<Option> options = {
|
||||
Option("port", &args_->grpc_port,
|
||||
"[Optional] Port to listen on for gRPC API, default is 5500, range from 1 to 65535"),
|
||||
Option("rest_api_port", &args_->rest_api_port,
|
||||
"[Optional] Port to listen on for RESTful API, default is 5501, range from 1 to 65535"),
|
||||
Option("model_name", &args_->model_name, "[Required] model name "),
|
||||
Option("model_path", &args_->model_path, "[Required] the path of the model files"),
|
||||
Option("device_id", &args_->device_id, "[Optional] the device id, default is 0, range from 0 to 7"),
|
||||
|
@ -184,6 +186,14 @@ bool Options::CheckOptions() {
|
|||
std::cout << "the port should be in [1~65535]" << std::endl;
|
||||
return false;
|
||||
}
|
||||
if (args_->rest_api_port < 1 || args_->rest_api_port > 65535) {
|
||||
std::cout << "the rest_api_port should be in [1~65535]" << std::endl;
|
||||
return false;
|
||||
}
|
||||
if (args_->rest_api_port == args_->grpc_port) {
|
||||
std::cout << "the rest_api_port and grpc port should not be same" << std::endl;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@ namespace mindspore {
|
|||
namespace serving {
|
||||
struct Arguments {
|
||||
int32_t grpc_port = 5500;
|
||||
int32_t rest_api_port = 5501;
|
||||
std::string grpc_socket_path;
|
||||
std::string ssl_config_file;
|
||||
int32_t poll_model_wait_seconds = 1;
|
||||
|
|
|
@ -21,7 +21,7 @@
|
|||
#include <memory>
|
||||
#include "util/file_system_operation.h"
|
||||
#include "include/infer_log.h"
|
||||
#include "core/server.h"
|
||||
#include "core/session.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace serving {
|
||||
|
|
|
@ -30,10 +30,9 @@ class Net(nn.Cell):
|
|||
def construct(self, x_, y_):
|
||||
return self.add(x_, y_)
|
||||
|
||||
x = np.ones(4).astype(np.float32)
|
||||
y = np.ones(4).astype(np.float32)
|
||||
|
||||
def export_net():
|
||||
x = np.ones([2, 2]).astype(np.float32)
|
||||
y = np.ones([2, 2]).astype(np.float32)
|
||||
add = Net()
|
||||
output = add(Tensor(x), Tensor(y))
|
||||
export(add, Tensor(x), Tensor(y), file_name='tensor_add.mindir', file_format='MINDIR')
|
||||
|
|
|
@ -37,14 +37,14 @@ def run():
|
|||
request = ms_service_pb2.PredictRequest()
|
||||
|
||||
x = request.data.add()
|
||||
x.tensor_shape.dims.extend([4])
|
||||
x.tensor_shape.dims.extend([2, 2])
|
||||
x.tensor_type = ms_service_pb2.MS_FLOAT32
|
||||
x.data = (np.ones([4]).astype(np.float32)).tobytes()
|
||||
x.data = (np.ones([2, 2]).astype(np.float32)).tobytes()
|
||||
|
||||
y = request.data.add()
|
||||
y.tensor_shape.dims.extend([4])
|
||||
y.tensor_shape.dims.extend([2, 2])
|
||||
y.tensor_type = ms_service_pb2.MS_FLOAT32
|
||||
y.data = (np.ones([4]).astype(np.float32)).tobytes()
|
||||
y.data = (np.ones([2, 2]).astype(np.float32)).tobytes()
|
||||
|
||||
try:
|
||||
result = stub.Predict(request)
|
||||
|
|
|
@ -61,13 +61,13 @@ start_service()
|
|||
echo "$2 faile to start."
|
||||
fi
|
||||
|
||||
result=`grep -E 'MS Serving listening on 0.0.0.0:5500|MS Serving listening on 0.0.0.0:5501' $2_service.log | wc -l`
|
||||
result=`grep -E 'MS Serving grpc listening on 0.0.0.0:5500|MS Serving listening on 0.0.0.0:5501' $2_service.log | wc -l`
|
||||
count=0
|
||||
while [[ ${result} -ne 1 && ${count} -lt 150 ]]
|
||||
do
|
||||
sleep 1
|
||||
count=$(($count+1))
|
||||
result=`grep -E 'MS Serving listening on 0.0.0.0:5500|MS Serving listening on 0.0.0.0:5501' $2_service.log | wc -l`
|
||||
result=`grep -E 'MS Serving grpc listening on 0.0.0.0:5500|MS Serving listening on 0.0.0.0:5501' $2_service.log | wc -l`
|
||||
done
|
||||
|
||||
if [ ${count} -eq 150 ]
|
||||
|
|
|
@ -184,7 +184,7 @@ if (ENABLE_GE)
|
|||
endif()
|
||||
|
||||
if (CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
target_link_libraries(ut_tests PRIVATE mindspore::gtest mindspore_gvar ${PYTHON_LIBRARIES} pthread util dl)
|
||||
target_link_libraries(ut_tests PRIVATE mindspore::gtest mindspore::event mindspore::event_pthreads mindspore_gvar ${PYTHON_LIBRARIES} pthread util dl)
|
||||
if (ENABLE_MINDDATA)
|
||||
target_link_libraries(ut_tests PRIVATE _c_dataengine _c_mindrecord)
|
||||
endif()
|
||||
|
|
|
@ -87,4 +87,3 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR})
|
|||
include_directories(${CMAKE_CURRENT_BINARY_DIR}/../)
|
||||
add_library(ut_serving_obj OBJECT ${SERVING_SRC_TEST})
|
||||
|
||||
|
||||
|
|
|
@ -19,6 +19,7 @@
|
|||
|
||||
#include "common/common_test.h"
|
||||
#include "serving/core/server.h"
|
||||
#include "serving/core/session.h"
|
||||
#include "include/inference.h"
|
||||
#include "include/infer_tensor.h"
|
||||
#include "serving/core/serving_tensor.h"
|
||||
|
|
Loading…
Reference in New Issue