forked from mindspore-Ecosystem/mindspore
!31505 [MS][LITE][DEV]support env marco for different feature
Merge pull request !31505 from chenjianping/master_dev1
This commit is contained in:
commit
ff7df8e00f
|
@ -41,6 +41,10 @@ option(MSLITE_ENABLE_RUNTIME_GLOG "enable runtime glog" off)
|
||||||
option(MSLITE_ENABLE_COVERAGE "enable code coverage" off)
|
option(MSLITE_ENABLE_COVERAGE "enable code coverage" off)
|
||||||
option(MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL "enable sharing memory with OpenGL" off)
|
option(MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL "enable sharing memory with OpenGL" off)
|
||||||
option(MSLITE_ENABLE_SERVER_INFERENCE "enable inference on server" off)
|
option(MSLITE_ENABLE_SERVER_INFERENCE "enable inference on server" off)
|
||||||
|
option(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE "enable distribute thread dynamically" off)
|
||||||
|
option(MSLITE_ENABLE_BFC_MEMORY "enable distribute BFC memory" off)
|
||||||
|
option(MSLITE_ENABLE_PARALLEL_INFERENCE "enable parallel inference interface" off)
|
||||||
|
option(MSLITE_ENABLE_SHARING_MODEL_WEIGHT "enable sharing model weight" off)
|
||||||
|
|
||||||
#Option that can be configured through manually
|
#Option that can be configured through manually
|
||||||
option(ENABLE_VERBOSE "" off)
|
option(ENABLE_VERBOSE "" off)
|
||||||
|
@ -148,11 +152,46 @@ endif()
|
||||||
if(DEFINED ENV{MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL})
|
if(DEFINED ENV{MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL})
|
||||||
set(MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL $ENV{MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL})
|
set(MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL $ENV{MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL})
|
||||||
endif()
|
endif()
|
||||||
if(DEFINED ENV{MSLITE_ENABLE_SERVING})
|
|
||||||
set(MSLITE_ENABLE_SERVING $ENV{MSLITE_ENABLE_SERVING})
|
option(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE "enable distribute thread dynamically" off)
|
||||||
endif()
|
option(MSLITE_ENABLE_BFC_MEMORY "enable distribute BFC memory" off)
|
||||||
|
option(MSLITE_ENABLE_PARALLEL_INFERENCE "enable parallel inference interface" off)
|
||||||
|
option(MSLITE_ENABLE_SHARING_MODEL_WEIGHT "enable sharing model weight" off)
|
||||||
|
|
||||||
if(DEFINED ENV{MSLITE_ENABLE_SERVER_INFERENCE})
|
if(DEFINED ENV{MSLITE_ENABLE_SERVER_INFERENCE})
|
||||||
set(MSLITE_ENABLE_SERVER_INFERENCE $ENV{MSLITE_ENABLE_SERVER_INFERENCE})
|
set(MSLITE_ENABLE_SERVER_INFERENCE $ENV{MSLITE_ENABLE_SERVER_INFERENCE})
|
||||||
|
set(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE on)
|
||||||
|
set(MSLITE_ENABLE_BFC_MEMORY on)
|
||||||
|
set(MSLITE_ENABLE_PARALLEL_INFERENCE on)
|
||||||
|
set(MSLITE_ENABLE_SHARING_MODEL_WEIGHT on)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(DEFINED ENV{MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE})
|
||||||
|
set(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE $ENV{MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE})
|
||||||
|
endif()
|
||||||
|
if(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE)
|
||||||
|
add_compile_definitions(DYNAMIC_THREAD_DISTRIBUTE)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(DEFINED ENV{MSLITE_ENABLE_BFC_MEMORY})
|
||||||
|
set(MSLITE_ENABLE_BFC_MEMORY $ENV{MSLITE_ENABLE_BFC_MEMORY})
|
||||||
|
endif()
|
||||||
|
if(MSLITE_ENABLE_BFC_MEMORY)
|
||||||
|
add_compile_definitions(BFC_MEMORY)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(DEFINED ENV{MSLITE_ENABLE_PARALLEL_INFERENCE})
|
||||||
|
set(MSLITE_ENABLE_PARALLEL_INFERENCE $ENV{MSLITE_ENABLE_PARALLEL_INFERENCE})
|
||||||
|
endif()
|
||||||
|
if(MSLITE_ENABLE_PARALLEL_INFERENCE)
|
||||||
|
add_compile_definitions(PARALLEL_INFERENCE)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(DEFINED ENV{MSLITE_ENABLE_SHARING_MODEL_WEIGHT})
|
||||||
|
set(MSLITE_ENABLE_SHARING_MODEL_WEIGHT $ENV{MSLITE_ENABLE_SHARING_MODEL_WEIGHT})
|
||||||
|
endif()
|
||||||
|
if(MSLITE_ENABLE_SHARING_MODEL_WEIGHT)
|
||||||
|
add_compile_definitions(SHARING_MODEL_WEIGHT)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(MACHINE_LINUX_ARM64)
|
if(MACHINE_LINUX_ARM64)
|
||||||
|
@ -321,6 +360,10 @@ message(STATUS "\tMSLITE_ENABLE_RUNTIME_GLOG = \t${MSLITE_ENABLE_RUNTIME_
|
||||||
message(STATUS "\tMSLITE_ENABLE_COVERAGE = \t${MSLITE_ENABLE_COVERAGE}")
|
message(STATUS "\tMSLITE_ENABLE_COVERAGE = \t${MSLITE_ENABLE_COVERAGE}")
|
||||||
message(STATUS "\tMSLITE_ENABLE_SHARING_MEM_WITH_OPENGL = \t${MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL}")
|
message(STATUS "\tMSLITE_ENABLE_SHARING_MEM_WITH_OPENGL = \t${MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL}")
|
||||||
message(STATUS "\tMSLITE_ENABLE_SERVER_INFERENCE = \t${MSLITE_ENABLE_SERVER_INFERENCE}")
|
message(STATUS "\tMSLITE_ENABLE_SERVER_INFERENCE = \t${MSLITE_ENABLE_SERVER_INFERENCE}")
|
||||||
|
message(STATUS "\tMSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE = \t${MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE}")
|
||||||
|
message(STATUS "\tMSLITE_ENABLE_BFC_MEMORY = \t${MSLITE_ENABLE_BFC_MEMORY}")
|
||||||
|
message(STATUS "\tMSLITE_ENABLE_PARALLEL_INFERENCE = \t${MSLITE_ENABLE_PARALLEL_INFERENCE}")
|
||||||
|
message(STATUS "\tMSLITE_ENABLE_SHARING_MODEL_WEIGHT = \t${MSLITE_ENABLE_SHARING_MODEL_WEIGHT}")
|
||||||
|
|
||||||
if((MSLITE_ENABLE_CONVERTER OR MSLITE_ENABLE_TESTCASES) AND (
|
if((MSLITE_ENABLE_CONVERTER OR MSLITE_ENABLE_TESTCASES) AND (
|
||||||
NOT MSLITE_ENABLE_MINDRT
|
NOT MSLITE_ENABLE_MINDRT
|
||||||
|
|
|
@ -88,7 +88,7 @@ set(JNI_SRC
|
||||||
${NEW_NATIVE_DIR}/version.cpp
|
${NEW_NATIVE_DIR}/version.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
if(MSLITE_ENABLE_SERVER_INFERENCE)
|
if(MSLITE_ENABLE_PARALLEL_INFERENCE)
|
||||||
set(JNI_SRC
|
set(JNI_SRC
|
||||||
${JNI_SRC}
|
${JNI_SRC}
|
||||||
${NEW_NATIVE_DIR}/runner_config.cpp
|
${NEW_NATIVE_DIR}/runner_config.cpp
|
||||||
|
|
|
@ -65,7 +65,7 @@ file(GLOB CXX_API_SRCS
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/graph/*.cc
|
${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/graph/*.cc
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/tensor/*.cc
|
${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/tensor/*.cc
|
||||||
)
|
)
|
||||||
if(MSLITE_ENABLE_SERVER_INFERENCE)
|
if(MSLITE_ENABLE_PARALLEL_INFERENCE)
|
||||||
set(CXX_API_SRCS
|
set(CXX_API_SRCS
|
||||||
${CXX_API_SRCS}
|
${CXX_API_SRCS}
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/model_pool/predict_task_queue.cc
|
${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/model_pool/predict_task_queue.cc
|
||||||
|
@ -140,17 +140,30 @@ if(MSLITE_ENABLE_MODEL_ENCRYPTION)
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
if(MSLITE_ENABLE_SERVER_INFERENCE)
|
if(MSLITE_ENABLE_BFC_MEMORY)
|
||||||
set(LITE_SRC
|
set(LITE_SRC
|
||||||
${LITE_SRC}
|
${LITE_SRC}
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/runtime/dynamic_mem_allocator.cc
|
${CMAKE_CURRENT_SOURCE_DIR}/runtime/dynamic_mem_allocator.cc
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/runtime/dynamic_mem_manager.cc
|
${CMAKE_CURRENT_SOURCE_DIR}/runtime/dynamic_mem_manager.cc
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/runtime/numa_adapter.cc
|
${CMAKE_CURRENT_SOURCE_DIR}/runtime/numa_adapter.cc
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(MSLITE_ENABLE_SHARING_MODEL_WEIGHT)
|
||||||
|
set(LITE_SRC
|
||||||
|
${LITE_SRC}
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/pack_weight_manager.cc
|
${CMAKE_CURRENT_SOURCE_DIR}/pack_weight_manager.cc
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/thread_cost_model.cc
|
${CMAKE_CURRENT_SOURCE_DIR}/thread_cost_model.cc
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
if(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE)
|
||||||
|
set(LITE_SRC
|
||||||
|
${LITE_SRC}
|
||||||
|
${CMAKE_CURRENT_SOURCE_DIR}/thread_cost_model.cc
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
if(MSLITE_ENABLE_CONTROLFLOW)
|
if(MSLITE_ENABLE_CONTROLFLOW)
|
||||||
file(GLOB CONTROL_FLOW_SRC
|
file(GLOB CONTROL_FLOW_SRC
|
||||||
${CMAKE_CURRENT_SOURCE_DIR}/control_flow/*.cc
|
${CMAKE_CURRENT_SOURCE_DIR}/control_flow/*.cc
|
||||||
|
|
|
@ -66,7 +66,7 @@ static const char *const kMSCacheVocabSize = "vocab_size";
|
||||||
static const char *const kMSCacheDeviceSize = "device_cache_size";
|
static const char *const kMSCacheDeviceSize = "device_cache_size";
|
||||||
static const char *const kMSCacheSerializePath = "serialize_path";
|
static const char *const kMSCacheSerializePath = "serialize_path";
|
||||||
// config
|
// config
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef BFC_MEMORY
|
||||||
static const char *const kConfigServerInference = "server_inference";
|
static const char *const kConfigServerInference = "server_inference";
|
||||||
static const char *const kConfigNUMANodeId = "numa_node_id";
|
static const char *const kConfigNUMANodeId = "numa_node_id";
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -27,7 +27,7 @@
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <sys/param.h>
|
#include <sys/param.h>
|
||||||
#endif
|
#endif
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef BFC_MEMORY
|
||||||
#include <sys/sysinfo.h>
|
#include <sys/sysinfo.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -182,7 +182,7 @@ size_t GetMaxMallocSize() {
|
||||||
return max_malloc_size;
|
return max_malloc_size;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef BFC_MEMORY
|
||||||
int64_t GetFreeMemory() {
|
int64_t GetFreeMemory() {
|
||||||
struct sysinfo info;
|
struct sysinfo info;
|
||||||
auto ret = sysinfo(&info);
|
auto ret = sysinfo(&info);
|
||||||
|
|
|
@ -43,7 +43,7 @@ uint64_t GetTimeUs();
|
||||||
bool IsSupportSDot();
|
bool IsSupportSDot();
|
||||||
|
|
||||||
size_t GetMaxMallocSize();
|
size_t GetMaxMallocSize();
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef BFC_MEMORY
|
||||||
int64_t GetFreeMemory();
|
int64_t GetFreeMemory();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -689,7 +689,7 @@ lite::LiteSession *ModelImpl::CreateLiteSession(lite::InnerContext *context) {
|
||||||
delete context;
|
delete context;
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef BFC_MEMORY
|
||||||
auto iter = config_info_.find(lite::kConfigServerInference);
|
auto iter = config_info_.find(lite::kConfigServerInference);
|
||||||
if (iter != config_info_.end()) {
|
if (iter != config_info_.end()) {
|
||||||
auto numa_iter = iter->second.find(lite::kConfigNUMANodeId);
|
auto numa_iter = iter->second.find(lite::kConfigNUMANodeId);
|
||||||
|
|
|
@ -63,9 +63,11 @@ Status ModelWorker::Init(const char *model_buf, size_t size, const std::shared_p
|
||||||
int node_id) {
|
int node_id) {
|
||||||
model_ = std::make_shared<Model>();
|
model_ = std::make_shared<Model>();
|
||||||
mindspore::ModelType model_type = kMindIR_Lite;
|
mindspore::ModelType model_type = kMindIR_Lite;
|
||||||
|
#ifdef BFC_MEMORY
|
||||||
if (node_id != -1) {
|
if (node_id != -1) {
|
||||||
model_->UpdateConfig(lite::kConfigServerInference, {lite::kConfigNUMANodeId, std::to_string(node_id)});
|
model_->UpdateConfig(lite::kConfigServerInference, {lite::kConfigNUMANodeId, std::to_string(node_id)});
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
auto status = model_->Build(model_buf, size, model_type, model_context);
|
auto status = model_->Build(model_buf, size, model_type, model_context);
|
||||||
if (status != kSuccess) {
|
if (status != kSuccess) {
|
||||||
MS_LOG(ERROR) << "model build failed in ModelPool Init";
|
MS_LOG(ERROR) << "model build failed in ModelPool Init";
|
||||||
|
|
|
@ -148,7 +148,7 @@ int InnerContext::Init() {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this->allocator == nullptr) {
|
if (this->allocator == nullptr) {
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef BFC_MEMORY
|
||||||
this->allocator = std::make_shared<DynamicMemAllocator>(node_id_);
|
this->allocator = std::make_shared<DynamicMemAllocator>(node_id_);
|
||||||
#else
|
#else
|
||||||
this->allocator = mindspore::Allocator::Create();
|
this->allocator = mindspore::Allocator::Create();
|
||||||
|
|
|
@ -20,7 +20,7 @@
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include "include/context.h"
|
#include "include/context.h"
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef BFC_MEMORY
|
||||||
#include "src/runtime/dynamic_mem_allocator.h"
|
#include "src/runtime/dynamic_mem_allocator.h"
|
||||||
#else
|
#else
|
||||||
#include "src/runtime/inner_allocator.h"
|
#include "src/runtime/inner_allocator.h"
|
||||||
|
@ -91,7 +91,7 @@ struct InnerContext : public Context {
|
||||||
|
|
||||||
void ReplaceLinkInfoSenderWithNewOne(void *new_sender, void *old_sender);
|
void ReplaceLinkInfoSenderWithNewOne(void *new_sender, void *old_sender);
|
||||||
|
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef BFC_MEMORY
|
||||||
/// \brief Set NUMA node id.
|
/// \brief Set NUMA node id.
|
||||||
///
|
///
|
||||||
/// \param[in] node Define the NUMA node id.
|
/// \param[in] node Define the NUMA node id.
|
||||||
|
@ -117,7 +117,7 @@ struct InnerContext : public Context {
|
||||||
|
|
||||||
bool device_and_pkg_support_fp16_ = false;
|
bool device_and_pkg_support_fp16_ = false;
|
||||||
|
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef BFC_MEMORY
|
||||||
int node_id_ = -1;
|
int node_id_ = -1;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -28,7 +28,7 @@
|
||||||
#include "src/common/graph_util.h"
|
#include "src/common/graph_util.h"
|
||||||
#include "src/common/file_utils.h"
|
#include "src/common/file_utils.h"
|
||||||
#include "src/tensor.h"
|
#include "src/tensor.h"
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef SHARING_MODEL_WEIGHT
|
||||||
#include "src/pack_weight_manager.h"
|
#include "src/pack_weight_manager.h"
|
||||||
#endif
|
#endif
|
||||||
#ifdef ENABLE_V0
|
#ifdef ENABLE_V0
|
||||||
|
@ -108,7 +108,7 @@ int LiteModel::ConvertAttrToTensors() {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void LiteModel::Free() {
|
void LiteModel::Free() {
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef SHARING_MODEL_WEIGHT
|
||||||
lite::PackWeightManager::GetInstance()->DeleteSavedModelPtr(this);
|
lite::PackWeightManager::GetInstance()->DeleteSavedModelPtr(this);
|
||||||
#endif
|
#endif
|
||||||
if (this->buf != nullptr) {
|
if (this->buf != nullptr) {
|
||||||
|
@ -603,7 +603,7 @@ Model *ImportFromBuffer(const char *model_buf, size_t size, bool take_buf) {
|
||||||
MS_LOG(ERROR) << "new model fail!";
|
MS_LOG(ERROR) << "new model fail!";
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef SHARING_MODEL_WEIGHT
|
||||||
lite::PackWeightManager::GetInstance()->StoreLiteModel(model_buf, model);
|
lite::PackWeightManager::GetInstance()->StoreLiteModel(model_buf, model);
|
||||||
#endif
|
#endif
|
||||||
auto status = model->ConstructModel(model_buf, size, take_buf);
|
auto status = model->ConstructModel(model_buf, size, take_buf);
|
||||||
|
|
|
@ -16,7 +16,7 @@
|
||||||
|
|
||||||
#include "src/lite_session.h"
|
#include "src/lite_session.h"
|
||||||
#include <set>
|
#include <set>
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef SHARING_MODEL_WEIGHT
|
||||||
#include "src/pack_weight_manager.h"
|
#include "src/pack_weight_manager.h"
|
||||||
#endif
|
#endif
|
||||||
#ifndef RUNTIME_PASS_CLIP
|
#ifndef RUNTIME_PASS_CLIP
|
||||||
|
@ -40,9 +40,6 @@
|
||||||
#include "src/lite_model.h"
|
#include "src/lite_model.h"
|
||||||
#include "src/weight_decoder.h"
|
#include "src/weight_decoder.h"
|
||||||
#include "src/runtime/runtime_allocator.h"
|
#include "src/runtime/runtime_allocator.h"
|
||||||
#ifdef SERVER_INFERENCE
|
|
||||||
#include "src/runtime/dynamic_mem_allocator.h"
|
|
||||||
#endif
|
|
||||||
#include "src/lite_kernel_util.h"
|
#include "src/lite_kernel_util.h"
|
||||||
#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
|
#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
|
||||||
#include "src/registry/register_kernel_impl.h"
|
#include "src/registry/register_kernel_impl.h"
|
||||||
|
@ -669,7 +666,7 @@ void LiteSession::FreePackOpWeight(const std::vector<kernel::LiteKernel *> &kern
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef SHARING_MODEL_WEIGHT
|
||||||
int LiteSession::IniPackWeightData(Model *model) {
|
int LiteSession::IniPackWeightData(Model *model) {
|
||||||
auto lite_model = reinterpret_cast<LiteModel *>(model);
|
auto lite_model = reinterpret_cast<LiteModel *>(model);
|
||||||
auto kernel_num = model->all_nodes_.size();
|
auto kernel_num = model->all_nodes_.size();
|
||||||
|
@ -712,7 +709,7 @@ int LiteSession::CompileGraph(Model *model) {
|
||||||
is_running_.store(false);
|
is_running_.store(false);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef SHARING_MODEL_WEIGHT
|
||||||
ret = IniPackWeightData(model);
|
ret = IniPackWeightData(model);
|
||||||
if (ret != RET_OK) {
|
if (ret != RET_OK) {
|
||||||
MS_LOG(ERROR) << "IniPackWeightData failed.";
|
MS_LOG(ERROR) << "IniPackWeightData failed.";
|
||||||
|
@ -1831,7 +1828,7 @@ const char *lite::LiteSession::LoadModelByPath(const std::string &file, mindspor
|
||||||
delete[] model_buf;
|
delete[] model_buf;
|
||||||
model_buf = nullptr;
|
model_buf = nullptr;
|
||||||
}
|
}
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef SHARING_MODEL_WEIGHT
|
||||||
lite::PackWeightManager::GetInstance()->InitWeightManagerByPath(file, model_buf);
|
lite::PackWeightManager::GetInstance()->InitWeightManagerByPath(file, model_buf);
|
||||||
#endif
|
#endif
|
||||||
return lite_buf;
|
return lite_buf;
|
||||||
|
@ -1855,7 +1852,7 @@ const char *lite::LiteSession::LoadModelByPath(const std::string &file, mindspor
|
||||||
delete[] model_buf;
|
delete[] model_buf;
|
||||||
model_buf = nullptr;
|
model_buf = nullptr;
|
||||||
}
|
}
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef SHARING_MODEL_WEIGHT
|
||||||
lite::PackWeightManager::GetInstance()->InitWeightManagerByPath(file, model_buf);
|
lite::PackWeightManager::GetInstance()->InitWeightManagerByPath(file, model_buf);
|
||||||
#endif
|
#endif
|
||||||
return lite_buf;
|
return lite_buf;
|
||||||
|
|
|
@ -119,7 +119,7 @@ class LiteSession : public session::LiteSession {
|
||||||
const std::vector<kernel::LiteKernel *> &kernels,
|
const std::vector<kernel::LiteKernel *> &kernels,
|
||||||
const std::unordered_map<Tensor *, Tensor *> &isolate_input_map = std::unordered_map<Tensor *, Tensor *>());
|
const std::unordered_map<Tensor *, Tensor *> &isolate_input_map = std::unordered_map<Tensor *, Tensor *>());
|
||||||
static void FreePackOpWeight(const std::vector<kernel::LiteKernel *> &kernels);
|
static void FreePackOpWeight(const std::vector<kernel::LiteKernel *> &kernels);
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef SHARING_MODEL_WEIGHT
|
||||||
int IniPackWeightData(Model *model);
|
int IniPackWeightData(Model *model);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -13,7 +13,7 @@
|
||||||
* See the License for the specific language governing permissions and
|
* See the License for the specific language governing permissions and
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef SHARING_MODEL_WEIGHT
|
||||||
#include "src/pack_weight_manager.h"
|
#include "src/pack_weight_manager.h"
|
||||||
namespace mindspore::lite {
|
namespace mindspore::lite {
|
||||||
namespace {
|
namespace {
|
||||||
|
|
|
@ -41,7 +41,7 @@
|
||||||
#include "include/mpi_sys.h"
|
#include "include/mpi_sys.h"
|
||||||
#include "include/mpi_vb.h"
|
#include "include/mpi_vb.h"
|
||||||
#endif
|
#endif
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef PARALLEL_INFERENCE
|
||||||
#include <thread>
|
#include <thread>
|
||||||
#endif
|
#endif
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
|
@ -51,7 +51,7 @@ constexpr int kFrequencyDefault = 3;
|
||||||
constexpr int kPercentageDivisor = 100;
|
constexpr int kPercentageDivisor = 100;
|
||||||
constexpr int kDumpInputsAndOutputs = 0;
|
constexpr int kDumpInputsAndOutputs = 0;
|
||||||
constexpr int kDumpOutputs = 2;
|
constexpr int kDumpOutputs = 2;
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef PARALLEL_INFERENCE
|
||||||
constexpr int kMaxRequestNum = 200;
|
constexpr int kMaxRequestNum = 200;
|
||||||
#endif
|
#endif
|
||||||
namespace lite {
|
namespace lite {
|
||||||
|
@ -221,7 +221,7 @@ int BenchmarkUnifiedApi::LoadInput() {
|
||||||
}
|
}
|
||||||
|
|
||||||
int BenchmarkUnifiedApi::GenerateInputData() {
|
int BenchmarkUnifiedApi::GenerateInputData() {
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef PARALLEL_INFERENCE
|
||||||
if (flags_->enable_parallel_predict_) {
|
if (flags_->enable_parallel_predict_) {
|
||||||
std::vector<MSTensor> inputs;
|
std::vector<MSTensor> inputs;
|
||||||
for (size_t i = 0; i < ms_inputs_for_api_.size(); i++) {
|
for (size_t i = 0; i < ms_inputs_for_api_.size(); i++) {
|
||||||
|
@ -298,7 +298,7 @@ void BenchmarkUnifiedApi::UpdateConfigInfo() {
|
||||||
}
|
}
|
||||||
|
|
||||||
int BenchmarkUnifiedApi::ReadInputFile() {
|
int BenchmarkUnifiedApi::ReadInputFile() {
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef PARALLEL_INFERENCE
|
||||||
if (flags_->enable_parallel_predict_) {
|
if (flags_->enable_parallel_predict_) {
|
||||||
std::vector<MSTensor> inputs;
|
std::vector<MSTensor> inputs;
|
||||||
for (size_t i = 0; i < ms_inputs_for_api_.size(); i++) {
|
for (size_t i = 0; i < ms_inputs_for_api_.size(); i++) {
|
||||||
|
@ -486,7 +486,7 @@ int BenchmarkUnifiedApi::InitMSContext(const std::shared_ptr<mindspore::Context>
|
||||||
|
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef PARALLEL_INFERENCE
|
||||||
int BenchmarkUnifiedApi::CompareOutputForModelPool(std::vector<mindspore::MSTensor> *outputs) {
|
int BenchmarkUnifiedApi::CompareOutputForModelPool(std::vector<mindspore::MSTensor> *outputs) {
|
||||||
if (outputs->empty()) {
|
if (outputs->empty()) {
|
||||||
MS_LOG(ERROR) << "outputs is empty.";
|
MS_LOG(ERROR) << "outputs is empty.";
|
||||||
|
@ -897,7 +897,7 @@ int BenchmarkUnifiedApi::MarkAccuracy() {
|
||||||
int BenchmarkUnifiedApi::PrintInputData() {
|
int BenchmarkUnifiedApi::PrintInputData() {
|
||||||
for (size_t i = 0; i < ms_inputs_for_api_.size(); i++) {
|
for (size_t i = 0; i < ms_inputs_for_api_.size(); i++) {
|
||||||
mindspore::MSTensor input;
|
mindspore::MSTensor input;
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef PARALLEL_INFERENCE
|
||||||
if (flags_->enable_parallel_predict_) {
|
if (flags_->enable_parallel_predict_) {
|
||||||
input = all_inputs_[0][i];
|
input = all_inputs_[0][i];
|
||||||
} else {
|
} else {
|
||||||
|
@ -947,7 +947,7 @@ int BenchmarkUnifiedApi::PrintInputData() {
|
||||||
}
|
}
|
||||||
return RET_OK;
|
return RET_OK;
|
||||||
}
|
}
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef PARALLEL_INFERENCE
|
||||||
int BenchmarkUnifiedApi::RunModelPool(std::shared_ptr<mindspore::Context> context) {
|
int BenchmarkUnifiedApi::RunModelPool(std::shared_ptr<mindspore::Context> context) {
|
||||||
if (flags_->warm_up_loop_count_ > kMaxRequestNum) {
|
if (flags_->warm_up_loop_count_ > kMaxRequestNum) {
|
||||||
MS_LOG(WARNING) << "in parallel predict warm up loop count should less than" << kMaxRequestNum;
|
MS_LOG(WARNING) << "in parallel predict warm up loop count should less than" << kMaxRequestNum;
|
||||||
|
@ -1113,7 +1113,7 @@ int BenchmarkUnifiedApi::RunBenchmark() {
|
||||||
}
|
}
|
||||||
|
|
||||||
UpdateConfigInfo();
|
UpdateConfigInfo();
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef PARALLEL_INFERENCE
|
||||||
if (flags_->enable_parallel_predict_) {
|
if (flags_->enable_parallel_predict_) {
|
||||||
status = RunModelPool(context);
|
status = RunModelPool(context);
|
||||||
if (status != RET_OK) {
|
if (status != RET_OK) {
|
||||||
|
|
|
@ -42,7 +42,7 @@
|
||||||
#ifdef ENABLE_OPENGL_TEXTURE
|
#ifdef ENABLE_OPENGL_TEXTURE
|
||||||
#include "tools/common/opengl_util.h"
|
#include "tools/common/opengl_util.h"
|
||||||
#endif
|
#endif
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef PARALLEL_INFERENCE
|
||||||
#include "include/api/model_parallel_runner.h"
|
#include "include/api/model_parallel_runner.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -86,7 +86,7 @@ class MS_API BenchmarkUnifiedApi : public BenchmarkBase {
|
||||||
int GetDataTypeByTensorName(const std::string &tensor_name) override;
|
int GetDataTypeByTensorName(const std::string &tensor_name) override;
|
||||||
|
|
||||||
int CompareOutput() override;
|
int CompareOutput() override;
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef PARALLEL_INFERENCE
|
||||||
int CompareOutputForModelPool(std::vector<mindspore::MSTensor> *outputs);
|
int CompareOutputForModelPool(std::vector<mindspore::MSTensor> *outputs);
|
||||||
#endif
|
#endif
|
||||||
int CompareOutputByCosineDistance(float cosine_distance_threshold);
|
int CompareOutputByCosineDistance(float cosine_distance_threshold);
|
||||||
|
@ -100,7 +100,7 @@ class MS_API BenchmarkUnifiedApi : public BenchmarkBase {
|
||||||
int InitPrintTensorDataCallbackParameter() override;
|
int InitPrintTensorDataCallbackParameter() override;
|
||||||
|
|
||||||
int PrintInputData();
|
int PrintInputData();
|
||||||
#ifdef SERVER_INFERENCE
|
#ifdef PARALLEL_INFERENCE
|
||||||
int RunModelPool(std::shared_ptr<mindspore::Context> context);
|
int RunModelPool(std::shared_ptr<mindspore::Context> context);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -125,13 +125,25 @@ set(LITE_SRC ${API_SRC}
|
||||||
${SRC_DIR}/huffman_decode.cc
|
${SRC_DIR}/huffman_decode.cc
|
||||||
${SRC_DIR}/delegate/tensorrt/distribution/distribution_base.cc
|
${SRC_DIR}/delegate/tensorrt/distribution/distribution_base.cc
|
||||||
)
|
)
|
||||||
if(MSLITE_ENABLE_SERVER_INFERENCE)
|
if(MSLITE_ENABLE_BFC_MEMORY)
|
||||||
set(LITE_SRC
|
set(LITE_SRC
|
||||||
${LITE_SRC}
|
${LITE_SRC}
|
||||||
${SRC_DIR}/pack_weight_manager.cc
|
|
||||||
${SRC_DIR}/runtime/dynamic_mem_allocator.cc
|
${SRC_DIR}/runtime/dynamic_mem_allocator.cc
|
||||||
${SRC_DIR}/runtime/dynamic_mem_manager.cc
|
${SRC_DIR}/runtime/dynamic_mem_manager.cc
|
||||||
${SRC_DIR}/runtime/numa_adapter.cc
|
${SRC_DIR}/runtime/numa_adapter.cc
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(MSLITE_ENABLE_SHARING_MODEL_WEIGHT)
|
||||||
|
set(LITE_SRC
|
||||||
|
${LITE_SRC}
|
||||||
|
${SRC_DIR}/pack_weight_manager.cc
|
||||||
|
)
|
||||||
|
endif()
|
||||||
|
|
||||||
|
if(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE)
|
||||||
|
set(LITE_SRC
|
||||||
|
${LITE_SRC}
|
||||||
${SRC_DIR}/thread_cost_model.cc
|
${SRC_DIR}/thread_cost_model.cc
|
||||||
)
|
)
|
||||||
endif()
|
endif()
|
||||||
|
|
Loading…
Reference in New Issue