!31505 [MS][LITE][DEV]support env marco for different feature

Merge pull request !31505 from chenjianping/master_dev1
This commit is contained in:
i-robot 2022-03-19 03:10:46 +00:00 committed by Gitee
commit ff7df8e00f
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
17 changed files with 111 additions and 44 deletions

View File

@ -41,6 +41,10 @@ option(MSLITE_ENABLE_RUNTIME_GLOG "enable runtime glog" off)
option(MSLITE_ENABLE_COVERAGE "enable code coverage" off) option(MSLITE_ENABLE_COVERAGE "enable code coverage" off)
option(MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL "enable sharing memory with OpenGL" off) option(MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL "enable sharing memory with OpenGL" off)
option(MSLITE_ENABLE_SERVER_INFERENCE "enable inference on server" off) option(MSLITE_ENABLE_SERVER_INFERENCE "enable inference on server" off)
option(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE "enable distribute thread dynamically" off)
option(MSLITE_ENABLE_BFC_MEMORY "enable distribute BFC memory" off)
option(MSLITE_ENABLE_PARALLEL_INFERENCE "enable parallel inference interface" off)
option(MSLITE_ENABLE_SHARING_MODEL_WEIGHT "enable sharing model weight" off)
#Option that can be configured through manually #Option that can be configured through manually
option(ENABLE_VERBOSE "" off) option(ENABLE_VERBOSE "" off)
@ -148,11 +152,46 @@ endif()
if(DEFINED ENV{MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL}) if(DEFINED ENV{MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL})
set(MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL $ENV{MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL}) set(MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL $ENV{MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL})
endif() endif()
if(DEFINED ENV{MSLITE_ENABLE_SERVING})
set(MSLITE_ENABLE_SERVING $ENV{MSLITE_ENABLE_SERVING}) option(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE "enable distribute thread dynamically" off)
endif() option(MSLITE_ENABLE_BFC_MEMORY "enable distribute BFC memory" off)
option(MSLITE_ENABLE_PARALLEL_INFERENCE "enable parallel inference interface" off)
option(MSLITE_ENABLE_SHARING_MODEL_WEIGHT "enable sharing model weight" off)
if(DEFINED ENV{MSLITE_ENABLE_SERVER_INFERENCE}) if(DEFINED ENV{MSLITE_ENABLE_SERVER_INFERENCE})
set(MSLITE_ENABLE_SERVER_INFERENCE $ENV{MSLITE_ENABLE_SERVER_INFERENCE}) set(MSLITE_ENABLE_SERVER_INFERENCE $ENV{MSLITE_ENABLE_SERVER_INFERENCE})
set(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE on)
set(MSLITE_ENABLE_BFC_MEMORY on)
set(MSLITE_ENABLE_PARALLEL_INFERENCE on)
set(MSLITE_ENABLE_SHARING_MODEL_WEIGHT on)
endif()
if(DEFINED ENV{MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE})
set(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE $ENV{MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE})
endif()
if(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE)
add_compile_definitions(DYNAMIC_THREAD_DISTRIBUTE)
endif()
if(DEFINED ENV{MSLITE_ENABLE_BFC_MEMORY})
set(MSLITE_ENABLE_BFC_MEMORY $ENV{MSLITE_ENABLE_BFC_MEMORY})
endif()
if(MSLITE_ENABLE_BFC_MEMORY)
add_compile_definitions(BFC_MEMORY)
endif()
if(DEFINED ENV{MSLITE_ENABLE_PARALLEL_INFERENCE})
set(MSLITE_ENABLE_PARALLEL_INFERENCE $ENV{MSLITE_ENABLE_PARALLEL_INFERENCE})
endif()
if(MSLITE_ENABLE_PARALLEL_INFERENCE)
add_compile_definitions(PARALLEL_INFERENCE)
endif()
if(DEFINED ENV{MSLITE_ENABLE_SHARING_MODEL_WEIGHT})
set(MSLITE_ENABLE_SHARING_MODEL_WEIGHT $ENV{MSLITE_ENABLE_SHARING_MODEL_WEIGHT})
endif()
if(MSLITE_ENABLE_SHARING_MODEL_WEIGHT)
add_compile_definitions(SHARING_MODEL_WEIGHT)
endif() endif()
if(MACHINE_LINUX_ARM64) if(MACHINE_LINUX_ARM64)
@ -320,7 +359,11 @@ message(STATUS "\tMSLITE_ENABLE_RUNTIME_CONVERT = \t${MSLITE_ENABLE_RUNTIME_
message(STATUS "\tMSLITE_ENABLE_RUNTIME_GLOG = \t${MSLITE_ENABLE_RUNTIME_GLOG}") message(STATUS "\tMSLITE_ENABLE_RUNTIME_GLOG = \t${MSLITE_ENABLE_RUNTIME_GLOG}")
message(STATUS "\tMSLITE_ENABLE_COVERAGE = \t${MSLITE_ENABLE_COVERAGE}") message(STATUS "\tMSLITE_ENABLE_COVERAGE = \t${MSLITE_ENABLE_COVERAGE}")
message(STATUS "\tMSLITE_ENABLE_SHARING_MEM_WITH_OPENGL = \t${MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL}") message(STATUS "\tMSLITE_ENABLE_SHARING_MEM_WITH_OPENGL = \t${MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL}")
message(STATUS "\tMSLITE_ENABLE_SERVER_INFERENCE = \t${MSLITE_ENABLE_SERVER_INFERENCE}") message(STATUS "\tMSLITE_ENABLE_SERVER_INFERENCE = \t${MSLITE_ENABLE_SERVER_INFERENCE}")
message(STATUS "\tMSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE = \t${MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE}")
message(STATUS "\tMSLITE_ENABLE_BFC_MEMORY = \t${MSLITE_ENABLE_BFC_MEMORY}")
message(STATUS "\tMSLITE_ENABLE_PARALLEL_INFERENCE = \t${MSLITE_ENABLE_PARALLEL_INFERENCE}")
message(STATUS "\tMSLITE_ENABLE_SHARING_MODEL_WEIGHT = \t${MSLITE_ENABLE_SHARING_MODEL_WEIGHT}")
if((MSLITE_ENABLE_CONVERTER OR MSLITE_ENABLE_TESTCASES) AND ( if((MSLITE_ENABLE_CONVERTER OR MSLITE_ENABLE_TESTCASES) AND (
NOT MSLITE_ENABLE_MINDRT NOT MSLITE_ENABLE_MINDRT

View File

@ -88,7 +88,7 @@ set(JNI_SRC
${NEW_NATIVE_DIR}/version.cpp ${NEW_NATIVE_DIR}/version.cpp
) )
if(MSLITE_ENABLE_SERVER_INFERENCE) if(MSLITE_ENABLE_PARALLEL_INFERENCE)
set(JNI_SRC set(JNI_SRC
${JNI_SRC} ${JNI_SRC}
${NEW_NATIVE_DIR}/runner_config.cpp ${NEW_NATIVE_DIR}/runner_config.cpp

View File

@ -65,7 +65,7 @@ file(GLOB CXX_API_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/graph/*.cc ${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/graph/*.cc
${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/tensor/*.cc ${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/tensor/*.cc
) )
if(MSLITE_ENABLE_SERVER_INFERENCE) if(MSLITE_ENABLE_PARALLEL_INFERENCE)
set(CXX_API_SRCS set(CXX_API_SRCS
${CXX_API_SRCS} ${CXX_API_SRCS}
${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/model_pool/predict_task_queue.cc ${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/model_pool/predict_task_queue.cc
@ -140,17 +140,30 @@ if(MSLITE_ENABLE_MODEL_ENCRYPTION)
) )
endif() endif()
if(MSLITE_ENABLE_SERVER_INFERENCE) if(MSLITE_ENABLE_BFC_MEMORY)
set(LITE_SRC set(LITE_SRC
${LITE_SRC} ${LITE_SRC}
${CMAKE_CURRENT_SOURCE_DIR}/runtime/dynamic_mem_allocator.cc ${CMAKE_CURRENT_SOURCE_DIR}/runtime/dynamic_mem_allocator.cc
${CMAKE_CURRENT_SOURCE_DIR}/runtime/dynamic_mem_manager.cc ${CMAKE_CURRENT_SOURCE_DIR}/runtime/dynamic_mem_manager.cc
${CMAKE_CURRENT_SOURCE_DIR}/runtime/numa_adapter.cc ${CMAKE_CURRENT_SOURCE_DIR}/runtime/numa_adapter.cc
)
endif()
if(MSLITE_ENABLE_SHARING_MODEL_WEIGHT)
set(LITE_SRC
${LITE_SRC}
${CMAKE_CURRENT_SOURCE_DIR}/pack_weight_manager.cc ${CMAKE_CURRENT_SOURCE_DIR}/pack_weight_manager.cc
${CMAKE_CURRENT_SOURCE_DIR}/thread_cost_model.cc ${CMAKE_CURRENT_SOURCE_DIR}/thread_cost_model.cc
) )
endif() endif()
if(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE)
set(LITE_SRC
${LITE_SRC}
${CMAKE_CURRENT_SOURCE_DIR}/thread_cost_model.cc
)
endif()
if(MSLITE_ENABLE_CONTROLFLOW) if(MSLITE_ENABLE_CONTROLFLOW)
file(GLOB CONTROL_FLOW_SRC file(GLOB CONTROL_FLOW_SRC
${CMAKE_CURRENT_SOURCE_DIR}/control_flow/*.cc ${CMAKE_CURRENT_SOURCE_DIR}/control_flow/*.cc

View File

@ -66,7 +66,7 @@ static const char *const kMSCacheVocabSize = "vocab_size";
static const char *const kMSCacheDeviceSize = "device_cache_size"; static const char *const kMSCacheDeviceSize = "device_cache_size";
static const char *const kMSCacheSerializePath = "serialize_path"; static const char *const kMSCacheSerializePath = "serialize_path";
// config // config
#ifdef SERVER_INFERENCE #ifdef BFC_MEMORY
static const char *const kConfigServerInference = "server_inference"; static const char *const kConfigServerInference = "server_inference";
static const char *const kConfigNUMANodeId = "numa_node_id"; static const char *const kConfigNUMANodeId = "numa_node_id";
#endif #endif

View File

@ -27,7 +27,7 @@
#include <sys/types.h> #include <sys/types.h>
#include <sys/param.h> #include <sys/param.h>
#endif #endif
#ifdef SERVER_INFERENCE #ifdef BFC_MEMORY
#include <sys/sysinfo.h> #include <sys/sysinfo.h>
#endif #endif
@ -182,7 +182,7 @@ size_t GetMaxMallocSize() {
return max_malloc_size; return max_malloc_size;
} }
#ifdef SERVER_INFERENCE #ifdef BFC_MEMORY
int64_t GetFreeMemory() { int64_t GetFreeMemory() {
struct sysinfo info; struct sysinfo info;
auto ret = sysinfo(&info); auto ret = sysinfo(&info);

View File

@ -43,7 +43,7 @@ uint64_t GetTimeUs();
bool IsSupportSDot(); bool IsSupportSDot();
size_t GetMaxMallocSize(); size_t GetMaxMallocSize();
#ifdef SERVER_INFERENCE #ifdef BFC_MEMORY
int64_t GetFreeMemory(); int64_t GetFreeMemory();
#endif #endif

View File

@ -689,7 +689,7 @@ lite::LiteSession *ModelImpl::CreateLiteSession(lite::InnerContext *context) {
delete context; delete context;
return nullptr; return nullptr;
} }
#ifdef SERVER_INFERENCE #ifdef BFC_MEMORY
auto iter = config_info_.find(lite::kConfigServerInference); auto iter = config_info_.find(lite::kConfigServerInference);
if (iter != config_info_.end()) { if (iter != config_info_.end()) {
auto numa_iter = iter->second.find(lite::kConfigNUMANodeId); auto numa_iter = iter->second.find(lite::kConfigNUMANodeId);

View File

@ -63,9 +63,11 @@ Status ModelWorker::Init(const char *model_buf, size_t size, const std::shared_p
int node_id) { int node_id) {
model_ = std::make_shared<Model>(); model_ = std::make_shared<Model>();
mindspore::ModelType model_type = kMindIR_Lite; mindspore::ModelType model_type = kMindIR_Lite;
#ifdef BFC_MEMORY
if (node_id != -1) { if (node_id != -1) {
model_->UpdateConfig(lite::kConfigServerInference, {lite::kConfigNUMANodeId, std::to_string(node_id)}); model_->UpdateConfig(lite::kConfigServerInference, {lite::kConfigNUMANodeId, std::to_string(node_id)});
} }
#endif
auto status = model_->Build(model_buf, size, model_type, model_context); auto status = model_->Build(model_buf, size, model_type, model_context);
if (status != kSuccess) { if (status != kSuccess) {
MS_LOG(ERROR) << "model build failed in ModelPool Init"; MS_LOG(ERROR) << "model build failed in ModelPool Init";

View File

@ -148,7 +148,7 @@ int InnerContext::Init() {
} }
if (this->allocator == nullptr) { if (this->allocator == nullptr) {
#ifdef SERVER_INFERENCE #ifdef BFC_MEMORY
this->allocator = std::make_shared<DynamicMemAllocator>(node_id_); this->allocator = std::make_shared<DynamicMemAllocator>(node_id_);
#else #else
this->allocator = mindspore::Allocator::Create(); this->allocator = mindspore::Allocator::Create();

View File

@ -20,7 +20,7 @@
#include <string> #include <string>
#include <unordered_map> #include <unordered_map>
#include "include/context.h" #include "include/context.h"
#ifdef SERVER_INFERENCE #ifdef BFC_MEMORY
#include "src/runtime/dynamic_mem_allocator.h" #include "src/runtime/dynamic_mem_allocator.h"
#else #else
#include "src/runtime/inner_allocator.h" #include "src/runtime/inner_allocator.h"
@ -91,7 +91,7 @@ struct InnerContext : public Context {
void ReplaceLinkInfoSenderWithNewOne(void *new_sender, void *old_sender); void ReplaceLinkInfoSenderWithNewOne(void *new_sender, void *old_sender);
#ifdef SERVER_INFERENCE #ifdef BFC_MEMORY
/// \brief Set NUMA node id. /// \brief Set NUMA node id.
/// ///
/// \param[in] node Define the NUMA node id. /// \param[in] node Define the NUMA node id.
@ -117,7 +117,7 @@ struct InnerContext : public Context {
bool device_and_pkg_support_fp16_ = false; bool device_and_pkg_support_fp16_ = false;
#ifdef SERVER_INFERENCE #ifdef BFC_MEMORY
int node_id_ = -1; int node_id_ = -1;
#endif #endif

View File

@ -28,7 +28,7 @@
#include "src/common/graph_util.h" #include "src/common/graph_util.h"
#include "src/common/file_utils.h" #include "src/common/file_utils.h"
#include "src/tensor.h" #include "src/tensor.h"
#ifdef SERVER_INFERENCE #ifdef SHARING_MODEL_WEIGHT
#include "src/pack_weight_manager.h" #include "src/pack_weight_manager.h"
#endif #endif
#ifdef ENABLE_V0 #ifdef ENABLE_V0
@ -108,7 +108,7 @@ int LiteModel::ConvertAttrToTensors() {
#endif #endif
void LiteModel::Free() { void LiteModel::Free() {
#ifdef SERVER_INFERENCE #ifdef SHARING_MODEL_WEIGHT
lite::PackWeightManager::GetInstance()->DeleteSavedModelPtr(this); lite::PackWeightManager::GetInstance()->DeleteSavedModelPtr(this);
#endif #endif
if (this->buf != nullptr) { if (this->buf != nullptr) {
@ -603,7 +603,7 @@ Model *ImportFromBuffer(const char *model_buf, size_t size, bool take_buf) {
MS_LOG(ERROR) << "new model fail!"; MS_LOG(ERROR) << "new model fail!";
return nullptr; return nullptr;
} }
#ifdef SERVER_INFERENCE #ifdef SHARING_MODEL_WEIGHT
lite::PackWeightManager::GetInstance()->StoreLiteModel(model_buf, model); lite::PackWeightManager::GetInstance()->StoreLiteModel(model_buf, model);
#endif #endif
auto status = model->ConstructModel(model_buf, size, take_buf); auto status = model->ConstructModel(model_buf, size, take_buf);

View File

@ -16,7 +16,7 @@
#include "src/lite_session.h" #include "src/lite_session.h"
#include <set> #include <set>
#ifdef SERVER_INFERENCE #ifdef SHARING_MODEL_WEIGHT
#include "src/pack_weight_manager.h" #include "src/pack_weight_manager.h"
#endif #endif
#ifndef RUNTIME_PASS_CLIP #ifndef RUNTIME_PASS_CLIP
@ -40,9 +40,6 @@
#include "src/lite_model.h" #include "src/lite_model.h"
#include "src/weight_decoder.h" #include "src/weight_decoder.h"
#include "src/runtime/runtime_allocator.h" #include "src/runtime/runtime_allocator.h"
#ifdef SERVER_INFERENCE
#include "src/runtime/dynamic_mem_allocator.h"
#endif
#include "src/lite_kernel_util.h" #include "src/lite_kernel_util.h"
#ifndef CUSTOM_KERNEL_REGISTRY_CLIP #ifndef CUSTOM_KERNEL_REGISTRY_CLIP
#include "src/registry/register_kernel_impl.h" #include "src/registry/register_kernel_impl.h"
@ -669,7 +666,7 @@ void LiteSession::FreePackOpWeight(const std::vector<kernel::LiteKernel *> &kern
} }
} }
} }
#ifdef SERVER_INFERENCE #ifdef SHARING_MODEL_WEIGHT
int LiteSession::IniPackWeightData(Model *model) { int LiteSession::IniPackWeightData(Model *model) {
auto lite_model = reinterpret_cast<LiteModel *>(model); auto lite_model = reinterpret_cast<LiteModel *>(model);
auto kernel_num = model->all_nodes_.size(); auto kernel_num = model->all_nodes_.size();
@ -712,7 +709,7 @@ int LiteSession::CompileGraph(Model *model) {
is_running_.store(false); is_running_.store(false);
return ret; return ret;
} }
#ifdef SERVER_INFERENCE #ifdef SHARING_MODEL_WEIGHT
ret = IniPackWeightData(model); ret = IniPackWeightData(model);
if (ret != RET_OK) { if (ret != RET_OK) {
MS_LOG(ERROR) << "IniPackWeightData failed."; MS_LOG(ERROR) << "IniPackWeightData failed.";
@ -1831,7 +1828,7 @@ const char *lite::LiteSession::LoadModelByPath(const std::string &file, mindspor
delete[] model_buf; delete[] model_buf;
model_buf = nullptr; model_buf = nullptr;
} }
#ifdef SERVER_INFERENCE #ifdef SHARING_MODEL_WEIGHT
lite::PackWeightManager::GetInstance()->InitWeightManagerByPath(file, model_buf); lite::PackWeightManager::GetInstance()->InitWeightManagerByPath(file, model_buf);
#endif #endif
return lite_buf; return lite_buf;
@ -1855,7 +1852,7 @@ const char *lite::LiteSession::LoadModelByPath(const std::string &file, mindspor
delete[] model_buf; delete[] model_buf;
model_buf = nullptr; model_buf = nullptr;
} }
#ifdef SERVER_INFERENCE #ifdef SHARING_MODEL_WEIGHT
lite::PackWeightManager::GetInstance()->InitWeightManagerByPath(file, model_buf); lite::PackWeightManager::GetInstance()->InitWeightManagerByPath(file, model_buf);
#endif #endif
return lite_buf; return lite_buf;

View File

@ -119,7 +119,7 @@ class LiteSession : public session::LiteSession {
const std::vector<kernel::LiteKernel *> &kernels, const std::vector<kernel::LiteKernel *> &kernels,
const std::unordered_map<Tensor *, Tensor *> &isolate_input_map = std::unordered_map<Tensor *, Tensor *>()); const std::unordered_map<Tensor *, Tensor *> &isolate_input_map = std::unordered_map<Tensor *, Tensor *>());
static void FreePackOpWeight(const std::vector<kernel::LiteKernel *> &kernels); static void FreePackOpWeight(const std::vector<kernel::LiteKernel *> &kernels);
#ifdef SERVER_INFERENCE #ifdef SHARING_MODEL_WEIGHT
int IniPackWeightData(Model *model); int IniPackWeightData(Model *model);
#endif #endif

View File

@ -13,7 +13,7 @@
* See the License for the specific language governing permissions and * See the License for the specific language governing permissions and
* limitations under the License. * limitations under the License.
*/ */
#ifdef SERVER_INFERENCE #ifdef SHARING_MODEL_WEIGHT
#include "src/pack_weight_manager.h" #include "src/pack_weight_manager.h"
namespace mindspore::lite { namespace mindspore::lite {
namespace { namespace {

View File

@ -41,7 +41,7 @@
#include "include/mpi_sys.h" #include "include/mpi_sys.h"
#include "include/mpi_vb.h" #include "include/mpi_vb.h"
#endif #endif
#ifdef SERVER_INFERENCE #ifdef PARALLEL_INFERENCE
#include <thread> #include <thread>
#endif #endif
namespace mindspore { namespace mindspore {
@ -51,7 +51,7 @@ constexpr int kFrequencyDefault = 3;
constexpr int kPercentageDivisor = 100; constexpr int kPercentageDivisor = 100;
constexpr int kDumpInputsAndOutputs = 0; constexpr int kDumpInputsAndOutputs = 0;
constexpr int kDumpOutputs = 2; constexpr int kDumpOutputs = 2;
#ifdef SERVER_INFERENCE #ifdef PARALLEL_INFERENCE
constexpr int kMaxRequestNum = 200; constexpr int kMaxRequestNum = 200;
#endif #endif
namespace lite { namespace lite {
@ -221,7 +221,7 @@ int BenchmarkUnifiedApi::LoadInput() {
} }
int BenchmarkUnifiedApi::GenerateInputData() { int BenchmarkUnifiedApi::GenerateInputData() {
#ifdef SERVER_INFERENCE #ifdef PARALLEL_INFERENCE
if (flags_->enable_parallel_predict_) { if (flags_->enable_parallel_predict_) {
std::vector<MSTensor> inputs; std::vector<MSTensor> inputs;
for (size_t i = 0; i < ms_inputs_for_api_.size(); i++) { for (size_t i = 0; i < ms_inputs_for_api_.size(); i++) {
@ -298,7 +298,7 @@ void BenchmarkUnifiedApi::UpdateConfigInfo() {
} }
int BenchmarkUnifiedApi::ReadInputFile() { int BenchmarkUnifiedApi::ReadInputFile() {
#ifdef SERVER_INFERENCE #ifdef PARALLEL_INFERENCE
if (flags_->enable_parallel_predict_) { if (flags_->enable_parallel_predict_) {
std::vector<MSTensor> inputs; std::vector<MSTensor> inputs;
for (size_t i = 0; i < ms_inputs_for_api_.size(); i++) { for (size_t i = 0; i < ms_inputs_for_api_.size(); i++) {
@ -486,7 +486,7 @@ int BenchmarkUnifiedApi::InitMSContext(const std::shared_ptr<mindspore::Context>
return RET_OK; return RET_OK;
} }
#ifdef SERVER_INFERENCE #ifdef PARALLEL_INFERENCE
int BenchmarkUnifiedApi::CompareOutputForModelPool(std::vector<mindspore::MSTensor> *outputs) { int BenchmarkUnifiedApi::CompareOutputForModelPool(std::vector<mindspore::MSTensor> *outputs) {
if (outputs->empty()) { if (outputs->empty()) {
MS_LOG(ERROR) << "outputs is empty."; MS_LOG(ERROR) << "outputs is empty.";
@ -897,7 +897,7 @@ int BenchmarkUnifiedApi::MarkAccuracy() {
int BenchmarkUnifiedApi::PrintInputData() { int BenchmarkUnifiedApi::PrintInputData() {
for (size_t i = 0; i < ms_inputs_for_api_.size(); i++) { for (size_t i = 0; i < ms_inputs_for_api_.size(); i++) {
mindspore::MSTensor input; mindspore::MSTensor input;
#ifdef SERVER_INFERENCE #ifdef PARALLEL_INFERENCE
if (flags_->enable_parallel_predict_) { if (flags_->enable_parallel_predict_) {
input = all_inputs_[0][i]; input = all_inputs_[0][i];
} else { } else {
@ -947,7 +947,7 @@ int BenchmarkUnifiedApi::PrintInputData() {
} }
return RET_OK; return RET_OK;
} }
#ifdef SERVER_INFERENCE #ifdef PARALLEL_INFERENCE
int BenchmarkUnifiedApi::RunModelPool(std::shared_ptr<mindspore::Context> context) { int BenchmarkUnifiedApi::RunModelPool(std::shared_ptr<mindspore::Context> context) {
if (flags_->warm_up_loop_count_ > kMaxRequestNum) { if (flags_->warm_up_loop_count_ > kMaxRequestNum) {
MS_LOG(WARNING) << "in parallel predict warm up loop count should less than" << kMaxRequestNum; MS_LOG(WARNING) << "in parallel predict warm up loop count should less than" << kMaxRequestNum;
@ -1113,7 +1113,7 @@ int BenchmarkUnifiedApi::RunBenchmark() {
} }
UpdateConfigInfo(); UpdateConfigInfo();
#ifdef SERVER_INFERENCE #ifdef PARALLEL_INFERENCE
if (flags_->enable_parallel_predict_) { if (flags_->enable_parallel_predict_) {
status = RunModelPool(context); status = RunModelPool(context);
if (status != RET_OK) { if (status != RET_OK) {

View File

@ -42,7 +42,7 @@
#ifdef ENABLE_OPENGL_TEXTURE #ifdef ENABLE_OPENGL_TEXTURE
#include "tools/common/opengl_util.h" #include "tools/common/opengl_util.h"
#endif #endif
#ifdef SERVER_INFERENCE #ifdef PARALLEL_INFERENCE
#include "include/api/model_parallel_runner.h" #include "include/api/model_parallel_runner.h"
#endif #endif
@ -86,7 +86,7 @@ class MS_API BenchmarkUnifiedApi : public BenchmarkBase {
int GetDataTypeByTensorName(const std::string &tensor_name) override; int GetDataTypeByTensorName(const std::string &tensor_name) override;
int CompareOutput() override; int CompareOutput() override;
#ifdef SERVER_INFERENCE #ifdef PARALLEL_INFERENCE
int CompareOutputForModelPool(std::vector<mindspore::MSTensor> *outputs); int CompareOutputForModelPool(std::vector<mindspore::MSTensor> *outputs);
#endif #endif
int CompareOutputByCosineDistance(float cosine_distance_threshold); int CompareOutputByCosineDistance(float cosine_distance_threshold);
@ -100,7 +100,7 @@ class MS_API BenchmarkUnifiedApi : public BenchmarkBase {
int InitPrintTensorDataCallbackParameter() override; int InitPrintTensorDataCallbackParameter() override;
int PrintInputData(); int PrintInputData();
#ifdef SERVER_INFERENCE #ifdef PARALLEL_INFERENCE
int RunModelPool(std::shared_ptr<mindspore::Context> context); int RunModelPool(std::shared_ptr<mindspore::Context> context);
#endif #endif

View File

@ -125,13 +125,25 @@ set(LITE_SRC ${API_SRC}
${SRC_DIR}/huffman_decode.cc ${SRC_DIR}/huffman_decode.cc
${SRC_DIR}/delegate/tensorrt/distribution/distribution_base.cc ${SRC_DIR}/delegate/tensorrt/distribution/distribution_base.cc
) )
if(MSLITE_ENABLE_SERVER_INFERENCE) if(MSLITE_ENABLE_BFC_MEMORY)
set(LITE_SRC set(LITE_SRC
${LITE_SRC} ${LITE_SRC}
${SRC_DIR}/pack_weight_manager.cc
${SRC_DIR}/runtime/dynamic_mem_allocator.cc ${SRC_DIR}/runtime/dynamic_mem_allocator.cc
${SRC_DIR}/runtime/dynamic_mem_manager.cc ${SRC_DIR}/runtime/dynamic_mem_manager.cc
${SRC_DIR}/runtime/numa_adapter.cc ${SRC_DIR}/runtime/numa_adapter.cc
)
endif()
if(MSLITE_ENABLE_SHARING_MODEL_WEIGHT)
set(LITE_SRC
${LITE_SRC}
${SRC_DIR}/pack_weight_manager.cc
)
endif()
if(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE)
set(LITE_SRC
${LITE_SRC}
${SRC_DIR}/thread_cost_model.cc ${SRC_DIR}/thread_cost_model.cc
) )
endif() endif()