support marco for difference feature

This commit is contained in:
jpc_chenjianping 2022-03-18 11:15:02 +08:00
parent 81428386d6
commit ecc840fe06
17 changed files with 111 additions and 44 deletions

View File

@ -41,6 +41,10 @@ option(MSLITE_ENABLE_RUNTIME_GLOG "enable runtime glog" off)
option(MSLITE_ENABLE_COVERAGE "enable code coverage" off)
option(MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL "enable sharing memory with OpenGL" off)
option(MSLITE_ENABLE_SERVER_INFERENCE "enable inference on server" off)
option(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE "enable distribute thread dynamically" off)
option(MSLITE_ENABLE_BFC_MEMORY "enable distribute BFC memory" off)
option(MSLITE_ENABLE_PARALLEL_INFERENCE "enable parallel inference interface" off)
option(MSLITE_ENABLE_SHARING_MODEL_WEIGHT "enable sharing model weight" off)
#Option that can be configured through manually
option(ENABLE_VERBOSE "" off)
@ -148,11 +152,46 @@ endif()
if(DEFINED ENV{MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL})
set(MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL $ENV{MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL})
endif()
if(DEFINED ENV{MSLITE_ENABLE_SERVING})
set(MSLITE_ENABLE_SERVING $ENV{MSLITE_ENABLE_SERVING})
endif()
option(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE "enable distribute thread dynamically" off)
option(MSLITE_ENABLE_BFC_MEMORY "enable distribute BFC memory" off)
option(MSLITE_ENABLE_PARALLEL_INFERENCE "enable parallel inference interface" off)
option(MSLITE_ENABLE_SHARING_MODEL_WEIGHT "enable sharing model weight" off)
if(DEFINED ENV{MSLITE_ENABLE_SERVER_INFERENCE})
set(MSLITE_ENABLE_SERVER_INFERENCE $ENV{MSLITE_ENABLE_SERVER_INFERENCE})
set(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE on)
set(MSLITE_ENABLE_BFC_MEMORY on)
set(MSLITE_ENABLE_PARALLEL_INFERENCE on)
set(MSLITE_ENABLE_SHARING_MODEL_WEIGHT on)
endif()
if(DEFINED ENV{MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE})
set(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE $ENV{MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE})
endif()
if(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE)
add_compile_definitions(DYNAMIC_THREAD_DISTRIBUTE)
endif()
if(DEFINED ENV{MSLITE_ENABLE_BFC_MEMORY})
set(MSLITE_ENABLE_BFC_MEMORY $ENV{MSLITE_ENABLE_BFC_MEMORY})
endif()
if(MSLITE_ENABLE_BFC_MEMORY)
add_compile_definitions(BFC_MEMORY)
endif()
if(DEFINED ENV{MSLITE_ENABLE_PARALLEL_INFERENCE})
set(MSLITE_ENABLE_PARALLEL_INFERENCE $ENV{MSLITE_ENABLE_PARALLEL_INFERENCE})
endif()
if(MSLITE_ENABLE_PARALLEL_INFERENCE)
add_compile_definitions(PARALLEL_INFERENCE)
endif()
if(DEFINED ENV{MSLITE_ENABLE_SHARING_MODEL_WEIGHT})
set(MSLITE_ENABLE_SHARING_MODEL_WEIGHT $ENV{MSLITE_ENABLE_SHARING_MODEL_WEIGHT})
endif()
if(MSLITE_ENABLE_SHARING_MODEL_WEIGHT)
add_compile_definitions(SHARING_MODEL_WEIGHT)
endif()
if(MACHINE_LINUX_ARM64)
@ -321,6 +360,10 @@ message(STATUS "\tMSLITE_ENABLE_RUNTIME_GLOG = \t${MSLITE_ENABLE_RUNTIME_
message(STATUS "\tMSLITE_ENABLE_COVERAGE = \t${MSLITE_ENABLE_COVERAGE}")
message(STATUS "\tMSLITE_ENABLE_SHARING_MEM_WITH_OPENGL = \t${MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL}")
message(STATUS "\tMSLITE_ENABLE_SERVER_INFERENCE = \t${MSLITE_ENABLE_SERVER_INFERENCE}")
message(STATUS "\tMSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE = \t${MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE}")
message(STATUS "\tMSLITE_ENABLE_BFC_MEMORY = \t${MSLITE_ENABLE_BFC_MEMORY}")
message(STATUS "\tMSLITE_ENABLE_PARALLEL_INFERENCE = \t${MSLITE_ENABLE_PARALLEL_INFERENCE}")
message(STATUS "\tMSLITE_ENABLE_SHARING_MODEL_WEIGHT = \t${MSLITE_ENABLE_SHARING_MODEL_WEIGHT}")
if((MSLITE_ENABLE_CONVERTER OR MSLITE_ENABLE_TESTCASES) AND (
NOT MSLITE_ENABLE_MINDRT

View File

@ -85,7 +85,7 @@ set(JNI_SRC
${NEW_NATIVE_DIR}/version.cpp
)
if(MSLITE_ENABLE_SERVER_INFERENCE)
if(MSLITE_ENABLE_PARALLEL_INFERENCE)
set(JNI_SRC
${JNI_SRC}
${NEW_NATIVE_DIR}/runner_config.cpp

View File

@ -65,7 +65,7 @@ file(GLOB CXX_API_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/graph/*.cc
${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/tensor/*.cc
)
if(MSLITE_ENABLE_SERVER_INFERENCE)
if(MSLITE_ENABLE_PARALLEL_INFERENCE)
set(CXX_API_SRCS
${CXX_API_SRCS}
${CMAKE_CURRENT_SOURCE_DIR}/cxx_api/model_pool/predict_task_queue.cc
@ -140,17 +140,30 @@ if(MSLITE_ENABLE_MODEL_ENCRYPTION)
)
endif()
if(MSLITE_ENABLE_SERVER_INFERENCE)
set(LITE_SRC
if(MSLITE_ENABLE_BFC_MEMORY)
set(LITE_SRC
${LITE_SRC}
${CMAKE_CURRENT_SOURCE_DIR}/runtime/dynamic_mem_allocator.cc
${CMAKE_CURRENT_SOURCE_DIR}/runtime/dynamic_mem_manager.cc
${CMAKE_CURRENT_SOURCE_DIR}/runtime/numa_adapter.cc
)
endif()
if(MSLITE_ENABLE_SHARING_MODEL_WEIGHT)
set(LITE_SRC
${LITE_SRC}
${CMAKE_CURRENT_SOURCE_DIR}/pack_weight_manager.cc
${CMAKE_CURRENT_SOURCE_DIR}/thread_cost_model.cc
)
endif()
if(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE)
set(LITE_SRC
${LITE_SRC}
${CMAKE_CURRENT_SOURCE_DIR}/thread_cost_model.cc
)
endif()
if(MSLITE_ENABLE_CONTROLFLOW)
file(GLOB CONTROL_FLOW_SRC
${CMAKE_CURRENT_SOURCE_DIR}/control_flow/*.cc

View File

@ -66,7 +66,7 @@ static const char *const kMSCacheVocabSize = "vocab_size";
static const char *const kMSCacheDeviceSize = "device_cache_size";
static const char *const kMSCacheSerializePath = "serialize_path";
// config
#ifdef SERVER_INFERENCE
#ifdef BFC_MEMORY
static const char *const kConfigServerInference = "server_inference";
static const char *const kConfigNUMANodeId = "numa_node_id";
#endif

View File

@ -27,7 +27,7 @@
#include <sys/types.h>
#include <sys/param.h>
#endif
#ifdef SERVER_INFERENCE
#ifdef BFC_MEMORY
#include <sys/sysinfo.h>
#endif
@ -182,7 +182,7 @@ size_t GetMaxMallocSize() {
return max_malloc_size;
}
#ifdef SERVER_INFERENCE
#ifdef BFC_MEMORY
int64_t GetFreeMemory() {
struct sysinfo info;
auto ret = sysinfo(&info);

View File

@ -43,7 +43,7 @@ uint64_t GetTimeUs();
bool IsSupportSDot();
size_t GetMaxMallocSize();
#ifdef SERVER_INFERENCE
#ifdef BFC_MEMORY
int64_t GetFreeMemory();
#endif

View File

@ -689,7 +689,7 @@ lite::LiteSession *ModelImpl::CreateLiteSession(lite::InnerContext *context) {
delete context;
return nullptr;
}
#ifdef SERVER_INFERENCE
#ifdef BFC_MEMORY
auto iter = config_info_.find(lite::kConfigServerInference);
if (iter != config_info_.end()) {
auto numa_iter = iter->second.find(lite::kConfigNUMANodeId);

View File

@ -63,9 +63,11 @@ Status ModelWorker::Init(const char *model_buf, size_t size, const std::shared_p
int node_id) {
model_ = std::make_shared<Model>();
mindspore::ModelType model_type = kMindIR_Lite;
#ifdef BFC_MEMORY
if (node_id != -1) {
model_->UpdateConfig(lite::kConfigServerInference, {lite::kConfigNUMANodeId, std::to_string(node_id)});
}
#endif
auto status = model_->Build(model_buf, size, model_type, model_context);
if (status != kSuccess) {
MS_LOG(ERROR) << "model build failed in ModelPool Init";

View File

@ -139,7 +139,7 @@ int InnerContext::Init() {
}
if (this->allocator == nullptr) {
#ifdef SERVER_INFERENCE
#ifdef BFC_MEMORY
this->allocator = std::make_shared<DynamicMemAllocator>(node_id_);
#else
this->allocator = mindspore::Allocator::Create();

View File

@ -20,7 +20,7 @@
#include <string>
#include <unordered_map>
#include "include/context.h"
#ifdef SERVER_INFERENCE
#ifdef BFC_MEMORY
#include "src/runtime/dynamic_mem_allocator.h"
#else
#include "src/runtime/inner_allocator.h"
@ -86,7 +86,7 @@ struct InnerContext : public Context {
void ReplaceLinkInfoSenderWithNewOne(void *new_sender, void *old_sender);
#ifdef SERVER_INFERENCE
#ifdef BFC_MEMORY
/// \brief Set NUMA node id.
///
/// \param[in] node Define the NUMA node id.
@ -110,7 +110,7 @@ struct InnerContext : public Context {
bool device_and_pkg_support_fp16_ = false;
#ifdef SERVER_INFERENCE
#ifdef BFC_MEMORY
int node_id_ = -1;
#endif

View File

@ -28,7 +28,7 @@
#include "src/common/graph_util.h"
#include "src/common/file_utils.h"
#include "src/tensor.h"
#ifdef SERVER_INFERENCE
#ifdef SHARING_MODEL_WEIGHT
#include "src/pack_weight_manager.h"
#endif
#ifdef ENABLE_V0
@ -108,7 +108,7 @@ int LiteModel::ConvertAttrToTensors() {
#endif
void LiteModel::Free() {
#ifdef SERVER_INFERENCE
#ifdef SHARING_MODEL_WEIGHT
lite::PackWeightManager::GetInstance()->DeleteSavedModelPtr(this);
#endif
if (this->buf != nullptr) {
@ -603,7 +603,7 @@ Model *ImportFromBuffer(const char *model_buf, size_t size, bool take_buf) {
MS_LOG(ERROR) << "new model fail!";
return nullptr;
}
#ifdef SERVER_INFERENCE
#ifdef SHARING_MODEL_WEIGHT
lite::PackWeightManager::GetInstance()->StoreLiteModel(model_buf, model);
#endif
auto status = model->ConstructModel(model_buf, size, take_buf);

View File

@ -16,7 +16,7 @@
#include "src/lite_session.h"
#include <set>
#ifdef SERVER_INFERENCE
#ifdef SHARING_MODEL_WEIGHT
#include "src/pack_weight_manager.h"
#endif
#ifndef RUNTIME_PASS_CLIP
@ -40,9 +40,6 @@
#include "src/lite_model.h"
#include "src/weight_decoder.h"
#include "src/runtime/runtime_allocator.h"
#ifdef SERVER_INFERENCE
#include "src/runtime/dynamic_mem_allocator.h"
#endif
#include "src/lite_kernel_util.h"
#ifndef CUSTOM_KERNEL_REGISTRY_CLIP
#include "src/registry/register_kernel_impl.h"
@ -666,7 +663,7 @@ void LiteSession::FreePackOpWeight(const std::vector<kernel::LiteKernel *> &kern
}
}
}
#ifdef SERVER_INFERENCE
#ifdef SHARING_MODEL_WEIGHT
int LiteSession::IniPackWeightData(Model *model) {
auto lite_model = reinterpret_cast<LiteModel *>(model);
auto kernel_num = model->all_nodes_.size();
@ -709,7 +706,7 @@ int LiteSession::CompileGraph(Model *model) {
is_running_.store(false);
return ret;
}
#ifdef SERVER_INFERENCE
#ifdef SHARING_MODEL_WEIGHT
ret = IniPackWeightData(model);
if (ret != RET_OK) {
MS_LOG(ERROR) << "IniPackWeightData failed.";
@ -1818,7 +1815,7 @@ const char *lite::LiteSession::LoadModelByPath(const std::string &file, mindspor
delete[] model_buf;
model_buf = nullptr;
}
#ifdef SERVER_INFERENCE
#ifdef SHARING_MODEL_WEIGHT
lite::PackWeightManager::GetInstance()->InitWeightManagerByPath(file, model_buf);
#endif
return lite_buf;
@ -1842,7 +1839,7 @@ const char *lite::LiteSession::LoadModelByPath(const std::string &file, mindspor
delete[] model_buf;
model_buf = nullptr;
}
#ifdef SERVER_INFERENCE
#ifdef SHARING_MODEL_WEIGHT
lite::PackWeightManager::GetInstance()->InitWeightManagerByPath(file, model_buf);
#endif
return lite_buf;

View File

@ -119,7 +119,7 @@ class LiteSession : public session::LiteSession {
const std::vector<kernel::LiteKernel *> &kernels,
const std::unordered_map<Tensor *, Tensor *> &isolate_input_map = std::unordered_map<Tensor *, Tensor *>());
static void FreePackOpWeight(const std::vector<kernel::LiteKernel *> &kernels);
#ifdef SERVER_INFERENCE
#ifdef SHARING_MODEL_WEIGHT
int IniPackWeightData(Model *model);
#endif

View File

@ -13,7 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifdef SERVER_INFERENCE
#ifdef SHARING_MODEL_WEIGHT
#include "src/pack_weight_manager.h"
namespace mindspore::lite {
namespace {

View File

@ -41,7 +41,7 @@
#include "include/mpi_sys.h"
#include "include/mpi_vb.h"
#endif
#ifdef SERVER_INFERENCE
#ifdef PARALLEL_INFERENCE
#include <thread>
#endif
namespace mindspore {
@ -51,7 +51,7 @@ constexpr int kFrequencyDefault = 3;
constexpr int kPercentageDivisor = 100;
constexpr int kDumpInputsAndOutputs = 0;
constexpr int kDumpOutputs = 2;
#ifdef SERVER_INFERENCE
#ifdef PARALLEL_INFERENCE
constexpr int kMaxRequestNum = 200;
#endif
namespace lite {
@ -221,7 +221,7 @@ int BenchmarkUnifiedApi::LoadInput() {
}
int BenchmarkUnifiedApi::GenerateInputData() {
#ifdef SERVER_INFERENCE
#ifdef PARALLEL_INFERENCE
if (flags_->enable_parallel_predict_) {
std::vector<MSTensor> inputs;
for (size_t i = 0; i < ms_inputs_for_api_.size(); i++) {
@ -298,7 +298,7 @@ void BenchmarkUnifiedApi::UpdateConfigInfo() {
}
int BenchmarkUnifiedApi::ReadInputFile() {
#ifdef SERVER_INFERENCE
#ifdef PARALLEL_INFERENCE
if (flags_->enable_parallel_predict_) {
std::vector<MSTensor> inputs;
for (size_t i = 0; i < ms_inputs_for_api_.size(); i++) {
@ -486,7 +486,7 @@ int BenchmarkUnifiedApi::InitMSContext(const std::shared_ptr<mindspore::Context>
return RET_OK;
}
#ifdef SERVER_INFERENCE
#ifdef PARALLEL_INFERENCE
int BenchmarkUnifiedApi::CompareOutputForModelPool(std::vector<mindspore::MSTensor> *outputs) {
if (outputs->empty()) {
MS_LOG(ERROR) << "outputs is empty.";
@ -897,7 +897,7 @@ int BenchmarkUnifiedApi::MarkAccuracy() {
int BenchmarkUnifiedApi::PrintInputData() {
for (size_t i = 0; i < ms_inputs_for_api_.size(); i++) {
mindspore::MSTensor input;
#ifdef SERVER_INFERENCE
#ifdef PARALLEL_INFERENCE
if (flags_->enable_parallel_predict_) {
input = all_inputs_[0][i];
} else {
@ -947,7 +947,7 @@ int BenchmarkUnifiedApi::PrintInputData() {
}
return RET_OK;
}
#ifdef SERVER_INFERENCE
#ifdef PARALLEL_INFERENCE
int BenchmarkUnifiedApi::RunModelPool(std::shared_ptr<mindspore::Context> context) {
if (flags_->warm_up_loop_count_ > kMaxRequestNum) {
MS_LOG(WARNING) << "in parallel predict warm up loop count should less than" << kMaxRequestNum;
@ -1113,7 +1113,7 @@ int BenchmarkUnifiedApi::RunBenchmark() {
}
UpdateConfigInfo();
#ifdef SERVER_INFERENCE
#ifdef PARALLEL_INFERENCE
if (flags_->enable_parallel_predict_) {
status = RunModelPool(context);
if (status != RET_OK) {

View File

@ -42,7 +42,7 @@
#ifdef ENABLE_OPENGL_TEXTURE
#include "tools/common/opengl_util.h"
#endif
#ifdef SERVER_INFERENCE
#ifdef PARALLEL_INFERENCE
#include "include/api/model_parallel_runner.h"
#endif
@ -86,7 +86,7 @@ class MS_API BenchmarkUnifiedApi : public BenchmarkBase {
int GetDataTypeByTensorName(const std::string &tensor_name) override;
int CompareOutput() override;
#ifdef SERVER_INFERENCE
#ifdef PARALLEL_INFERENCE
int CompareOutputForModelPool(std::vector<mindspore::MSTensor> *outputs);
#endif
int CompareOutputByCosineDistance(float cosine_distance_threshold);
@ -100,7 +100,7 @@ class MS_API BenchmarkUnifiedApi : public BenchmarkBase {
int InitPrintTensorDataCallbackParameter() override;
int PrintInputData();
#ifdef SERVER_INFERENCE
#ifdef PARALLEL_INFERENCE
int RunModelPool(std::shared_ptr<mindspore::Context> context);
#endif

View File

@ -125,13 +125,25 @@ set(LITE_SRC ${API_SRC}
${SRC_DIR}/huffman_decode.cc
${SRC_DIR}/delegate/tensorrt/distribution/distribution_base.cc
)
if(MSLITE_ENABLE_SERVER_INFERENCE)
set(LITE_SRC
if(MSLITE_ENABLE_BFC_MEMORY)
set(LITE_SRC
${LITE_SRC}
${SRC_DIR}/pack_weight_manager.cc
${SRC_DIR}/runtime/dynamic_mem_allocator.cc
${SRC_DIR}/runtime/dynamic_mem_manager.cc
${SRC_DIR}/runtime/numa_adapter.cc
)
endif()
if(MSLITE_ENABLE_SHARING_MODEL_WEIGHT)
set(LITE_SRC
${LITE_SRC}
${SRC_DIR}/pack_weight_manager.cc
)
endif()
if(MSLITE_ENABLE_DYNAMIC_THREAD_DISTRIBUTE)
set(LITE_SRC
${LITE_SRC}
${SRC_DIR}/thread_cost_model.cc
)
endif()