!28834 [MS][LITE] pack weight
Merge pull request !28834 from yefeng/195-4-copy_3
This commit is contained in:
commit
fa31dccdea
|
@ -39,6 +39,7 @@ option(MSLITE_ENABLE_RUNTIME_CONVERT "enable runtime convert" off)
|
|||
option(MSLITE_ENABLE_RUNTIME_GLOG "enable runtime glog" off)
|
||||
option(MSLITE_ENABLE_COVERAGE "enable code coverage" off)
|
||||
option(MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL "enable sharing memory with OpenGL" off)
|
||||
option(MSLITE_ENABLE_SERVING "enable serving" off)
|
||||
|
||||
#Option that can be configured through manually
|
||||
option(ENABLE_VERBOSE "" off)
|
||||
|
@ -140,6 +141,9 @@ endif()
|
|||
if(DEFINED ENV{MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL})
|
||||
set(MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL $ENV{MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL})
|
||||
endif()
|
||||
if(DEFINED ENV{MSLITE_ENABLE_SERVING})
|
||||
set(MSLITE_ENABLE_SERVING $ENV{MSLITE_ENABLE_SERVING})
|
||||
endif()
|
||||
|
||||
if(MACHINE_LINUX_ARM64)
|
||||
add_compile_definitions(MACHINE_LINUX_ARM64)
|
||||
|
@ -292,6 +296,7 @@ message(STATUS "\tMSLITE_ENABLE_RUNTIME_CONVERT = \t${MSLITE_ENABLE_RUNTIME_
|
|||
message(STATUS "\tMSLITE_ENABLE_RUNTIME_GLOG = \t${MSLITE_ENABLE_RUNTIME_GLOG}")
|
||||
message(STATUS "\tMSLITE_ENABLE_COVERAGE = \t${MSLITE_ENABLE_COVERAGE}")
|
||||
message(STATUS "\tMSLITE_ENABLE_SHARING_MEM_WITH_OPENGL = \t${MSLITE_ENABLE_SHARING_MEM_WITH_OPENGL}")
|
||||
message(STATUS "\tMSLITE_ENABLE_SERVING = \t${MSLITE_ENABLE_SERVING}")
|
||||
|
||||
if((MSLITE_ENABLE_CONVERTER OR MSLITE_ENABLE_TESTCASES) AND (
|
||||
NOT MSLITE_ENABLE_MINDRT
|
||||
|
|
|
@ -152,6 +152,7 @@ set(LITE_SRC
|
|||
${LITE_DIR}/src/huffman_decode.cc
|
||||
${LITE_DIR}/src/common/log.cc
|
||||
${LITE_DIR}/src/common/utils.cc
|
||||
${LITE_DIR}/src/pack_weight_manager.cc
|
||||
${LITE_DIR}/../core/utils/status.cc
|
||||
### tools
|
||||
${LITE_DIR}/tools/common/flag_parser.cc
|
||||
|
|
|
@ -30,6 +30,9 @@ endif()
|
|||
if(NOT MSLITE_ENABLE_INT8)
|
||||
add_compile_definitions(OP_INT8_CLIP)
|
||||
endif()
|
||||
if(MSLITE_ENABLE_SERVING)
|
||||
add_compile_definitions(USING_SERVING)
|
||||
endif()
|
||||
|
||||
if(APPLE OR PLATFORM_ARM32 OR PLATFORM_ARM64)
|
||||
#for performance
|
||||
|
@ -118,6 +121,7 @@ set(LITE_SRC
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/lite_session.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/errorcode.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cpu_info.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/pack_weight_manager.cc
|
||||
)
|
||||
|
||||
if(MSLITE_ENABLE_CONTROLFLOW)
|
||||
|
|
|
@ -90,7 +90,7 @@ std::vector<size_t> GetLinkedPostNodeIdx(const lite::Model *model, const size_t
|
|||
bool IsPackedOp(int op_type) {
|
||||
static const std::vector<int> packed_ops = {schema::PrimitiveType_Conv2DFusion,
|
||||
schema::PrimitiveType_Conv2dTransposeFusion,
|
||||
schema::PrimitiveType_MatMulFusion};
|
||||
schema::PrimitiveType_FullConnection, schema::PrimitiveType_MatMulFusion};
|
||||
return IsContain(packed_ops, op_type);
|
||||
}
|
||||
} // namespace lite
|
||||
|
|
|
@ -15,6 +15,10 @@
|
|||
*/
|
||||
|
||||
#include "src/lite_session.h"
|
||||
#include <set>
|
||||
#ifdef USING_SERVING
|
||||
#include "src/pack_weight_manager.h"
|
||||
#endif
|
||||
#ifndef RUNTIME_PASS_CLIP
|
||||
#include "src/runtime/runtime_pass.h"
|
||||
#endif
|
||||
|
@ -548,6 +552,35 @@ void LiteSession::FreePackOpWeight(const std::vector<kernel::LiteKernel *> &kern
|
|||
}
|
||||
}
|
||||
}
|
||||
#ifdef USING_SERVING
|
||||
int LiteSession::IniPackWeightData(Model *model) {
|
||||
auto lite_model = reinterpret_cast<LiteModel *>(model);
|
||||
auto kernel_num = model->all_nodes_.size();
|
||||
for (size_t i = 0; i < kernel_num; i++) {
|
||||
auto node = model->all_nodes_[i];
|
||||
auto node_type = node->node_type_;
|
||||
if (IsPackedOp(node_type)) {
|
||||
for (size_t j = 0; j < node->input_indices_.size(); j++) {
|
||||
auto tensor_index = node->input_indices_[j];
|
||||
auto src_tensor = lite_model->GetSchemaTensor(tensor_index);
|
||||
if (src_tensor == nullptr || src_tensor->handler() == nullptr || src_tensor->data() == nullptr ||
|
||||
src_tensor->length() == 0) {
|
||||
continue;
|
||||
}
|
||||
lite::PackWeightManager::GetInstance()->StoreOriginTensor(lite_model, src_tensor, tensor_index);
|
||||
auto data = lite::PackWeightManager::GetInstance()->GetTensorData(lite_model, tensor_index);
|
||||
if (data == nullptr) {
|
||||
MS_LOG(DEBUG) << "data not packed.";
|
||||
continue;
|
||||
}
|
||||
this->tensors_[tensor_index]->set_data(data);
|
||||
this->tensors_[tensor_index]->set_own_data(false);
|
||||
}
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
#endif
|
||||
|
||||
int LiteSession::CompileGraph(Model *model) {
|
||||
auto ret = PreCheck(model);
|
||||
|
@ -563,6 +596,13 @@ int LiteSession::CompileGraph(Model *model) {
|
|||
is_running_.store(false);
|
||||
return ret;
|
||||
}
|
||||
#ifdef USING_SERVING
|
||||
ret = IniPackWeightData(model);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "IniPackWeightData failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
#endif
|
||||
InitGraphInputTensors(model);
|
||||
InitGraphOutputTensors(model);
|
||||
|
||||
|
@ -942,6 +982,9 @@ LiteSession::~LiteSession() {
|
|||
MS_LOG(ERROR) << "Not support multi-threading";
|
||||
return;
|
||||
}
|
||||
#ifdef USING_SERVING
|
||||
lite::PackWeightManager::GetInstance()->DeleteSavedSessionPtr(this);
|
||||
#endif
|
||||
for (auto *kernel : kernels_) {
|
||||
delete kernel;
|
||||
kernel = nullptr;
|
||||
|
@ -1640,6 +1683,9 @@ const char *lite::LiteSession::LoadModelByPath(const std::string &file, mindspor
|
|||
delete[] model_buf;
|
||||
model_buf = nullptr;
|
||||
}
|
||||
#ifdef USING_SERVING
|
||||
lite::PackWeightManager::GetInstance()->InitWeightManagerByPath(file, model_buf, nullptr);
|
||||
#endif
|
||||
return lite_buf;
|
||||
}
|
||||
|
||||
|
@ -1661,6 +1707,9 @@ const char *lite::LiteSession::LoadModelByPath(const std::string &file, mindspor
|
|||
delete[] model_buf;
|
||||
model_buf = nullptr;
|
||||
}
|
||||
#ifdef USING_SERVING
|
||||
lite::PackWeightManager::GetInstance()->InitWeightManagerByPath(file, model_buf, nullptr);
|
||||
#endif
|
||||
return lite_buf;
|
||||
}
|
||||
|
||||
|
@ -1673,7 +1722,9 @@ int lite::LiteSession::LoadModelAndCompileByBuf(const char *model_buf, mindspore
|
|||
MS_LOG(ERROR) << "Invalid model_buf";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
#ifdef USING_SERVING
|
||||
lite::PackWeightManager::GetInstance()->InitWeightManagerByBuf(model_buf, this);
|
||||
#endif
|
||||
auto *model = lite::ImportFromBuffer(lite_buf, lite_buf_size, true);
|
||||
if (model == nullptr) {
|
||||
MS_LOG(ERROR) << "Import model failed";
|
||||
|
@ -1704,7 +1755,9 @@ int lite::LiteSession::LoadModelAndCompileByBuf(const char *model_buf, mindspore
|
|||
MS_LOG(ERROR) << "Invalid model_buf";
|
||||
return RET_ERROR;
|
||||
}
|
||||
|
||||
#ifdef USING_SERVING
|
||||
lite::PackWeightManager::GetInstance()->InitWeightManagerByBuf(model_buf, this);
|
||||
#endif
|
||||
auto *model = lite::ImportFromBuffer(lite_buf, lite_buf_size, true);
|
||||
if (model == nullptr) {
|
||||
MS_LOG(ERROR) << "Import model failed";
|
||||
|
|
|
@ -114,6 +114,9 @@ class LiteSession : public session::LiteSession {
|
|||
const std::vector<kernel::LiteKernel *> &kernels,
|
||||
const std::unordered_map<Tensor *, Tensor *> isolate_input_map = std::unordered_map<Tensor *, Tensor *>());
|
||||
static void FreePackOpWeight(const std::vector<kernel::LiteKernel *> &kernels);
|
||||
#ifdef USING_SERVING
|
||||
int IniPackWeightData(Model *model);
|
||||
#endif
|
||||
|
||||
private:
|
||||
int PreCheck(Model *model);
|
||||
|
|
|
@ -0,0 +1,245 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifdef USING_SERVING
|
||||
#include "src/pack_weight_manager.h"
|
||||
namespace mindspore::lite {
|
||||
PackWeightManager *PackWeightManager::GetInstance() {
|
||||
static PackWeightManager instance;
|
||||
return &instance;
|
||||
}
|
||||
|
||||
void PackWeightManager::InitWeightManagerByBuf(const char *model_buf, const LiteSession *lite_session) {
|
||||
MS_CHECK_TRUE_RET_VOID(model_buf != nullptr);
|
||||
MS_CHECK_TRUE_RET_VOID(lite_session != nullptr);
|
||||
if (buf_model_weight_.find(model_buf) == buf_model_weight_.end()) {
|
||||
auto *model_const_weight = new (std::nothrow) ModelConstWeight();
|
||||
if (model_const_weight == nullptr) {
|
||||
return;
|
||||
}
|
||||
buf_model_weight_[model_buf] = model_const_weight;
|
||||
}
|
||||
buf_model_weight_[model_buf]->lite_sessions.push_back(lite_session);
|
||||
}
|
||||
|
||||
void PackWeightManager::InitWeightManagerByPath(const std::string &model_path, const char *model_buf,
|
||||
const LiteSession *session) {
|
||||
MS_CHECK_TRUE_RET_VOID(model_buf != nullptr);
|
||||
if (path_model_buf_.find(model_path) == path_model_buf_.end()) {
|
||||
auto *model_const_weight = new (std::nothrow) ModelConstWeight();
|
||||
if (model_const_weight == nullptr) {
|
||||
return;
|
||||
}
|
||||
path_model_weight_[model_path] = model_const_weight;
|
||||
}
|
||||
path_model_weight_[model_path]->lite_sessions.push_back(session);
|
||||
path_model_buf_[model_path].push_back(model_buf);
|
||||
}
|
||||
|
||||
STATUS PackWeightManager::StoreLiteModel(const char *model_buf, const Model *model) {
|
||||
MS_CHECK_TRUE_RET(model_buf != nullptr, RET_ERROR);
|
||||
MS_CHECK_TRUE_RET(model != nullptr, RET_ERROR);
|
||||
for (auto &item : path_model_buf_) {
|
||||
auto &model_bufs = item.second;
|
||||
auto path = item.first;
|
||||
if (find(model_bufs.begin(), model_bufs.end(), model_buf) != model_bufs.end()) {
|
||||
path_model_weight_[path]->lite_models.push_back(model);
|
||||
return RET_OK;
|
||||
}
|
||||
}
|
||||
if (buf_model_weight_.find(model_buf) == buf_model_weight_.end()) {
|
||||
MS_LOG(ERROR) << "Set model failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
buf_model_weight_[model_buf]->lite_models.push_back(model);
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
void PackWeightManager::StoreOriginTensor(const LiteModel *model, const SchemaTensorWrapper *origin_tensor,
|
||||
size_t tensor_index) {
|
||||
MS_CHECK_TRUE_RET_VOID(model != nullptr);
|
||||
MS_CHECK_TRUE_RET_VOID(origin_tensor != nullptr);
|
||||
for (auto &item : buf_model_weight_) {
|
||||
auto &model_buf = item.first;
|
||||
auto &model_weight = item.second;
|
||||
for (auto &lite_model : model_weight->lite_models) {
|
||||
if (model == lite_model) {
|
||||
if (model_weight->origin_weight.find(tensor_index) == model_weight->origin_weight.end()) {
|
||||
buf_model_weight_[model_buf]->origin_weight[tensor_index] = origin_tensor->data();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto &item : path_model_weight_) {
|
||||
auto &path = item.first;
|
||||
auto &model_weight = item.second;
|
||||
for (auto &lite_model : model_weight->lite_models) {
|
||||
if (model == lite_model) {
|
||||
if (model_weight->origin_weight.find(tensor_index) == model_weight->origin_weight.end()) {
|
||||
path_model_weight_[path]->origin_weight[tensor_index] = origin_tensor->data();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void *PackWeightManager::GetTensorData(const LiteModel *model, size_t tensor_index) {
|
||||
MS_CHECK_TRUE_RET(model != nullptr, nullptr);
|
||||
for (auto &item : buf_model_weight_) {
|
||||
auto &model_weight = item.second;
|
||||
auto &models = model_weight->lite_models;
|
||||
if (find(models.begin(), models.end(), model) != models.end()) {
|
||||
if (model_weight->packed_weight.find(tensor_index) != model_weight->packed_weight.end()) {
|
||||
return model_weight->packed_weight[tensor_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
for (auto &item : path_model_weight_) {
|
||||
auto &model_weight = item.second;
|
||||
auto &models = model_weight->lite_models;
|
||||
if (find(models.begin(), models.end(), model) != models.end()) {
|
||||
if (model_weight->packed_weight.find(tensor_index) != model_weight->packed_weight.end()) {
|
||||
return model_weight->packed_weight[tensor_index];
|
||||
}
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::pair<PackStatus, void *> PackWeightManager::FindPackedTensor(PackedWeight *packed_weights,
|
||||
const OriginWeight &origin_weithts,
|
||||
const Tensor *tensor, const size_t size) {
|
||||
MS_CHECK_TRUE_RET(packed_weights != nullptr, std::make_pair(MALLOC, nullptr));
|
||||
MS_CHECK_TRUE_RET(tensor != nullptr, std::make_pair(MALLOC, nullptr));
|
||||
if (size > MAX_MALLOC_SIZE) {
|
||||
MS_LOG(ERROR) << "malloc size more than MAX_MALLOC_SIZE";
|
||||
return std::make_pair(MALLOC, nullptr);
|
||||
}
|
||||
for (auto &packed_weight : *packed_weights) {
|
||||
auto &packed_tensor = packed_weight.second;
|
||||
if (packed_tensor == tensor->data()) {
|
||||
return std::make_pair(PACKED, packed_tensor);
|
||||
}
|
||||
}
|
||||
for (auto &origin_weight : origin_weithts) {
|
||||
auto &origin_tensor = origin_weight.second;
|
||||
auto &origin_index = origin_weight.first;
|
||||
if (origin_tensor == tensor->data()) {
|
||||
void *data = malloc(size);
|
||||
if (data == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc failed.";
|
||||
return std::make_pair(MALLOC, nullptr);
|
||||
}
|
||||
memset(data, 0, size);
|
||||
packed_weights->insert(std::make_pair(origin_index, data));
|
||||
return std::make_pair(NOTPACK, packed_weights->at(origin_index));
|
||||
}
|
||||
}
|
||||
return std::make_pair(MALLOC, nullptr);
|
||||
}
|
||||
|
||||
std::pair<PackStatus, void *> PackWeightManager::GetPackedTensor(const Tensor *tensor, const size_t size) {
|
||||
MS_CHECK_TRUE_RET(tensor != nullptr, std::make_pair(MALLOC, nullptr));
|
||||
std::pair<PackStatus, void *> packed_tensor_pair;
|
||||
for (auto &item : buf_model_weight_) {
|
||||
auto &model_weight = item.second;
|
||||
auto &origin_weithts = model_weight->origin_weight;
|
||||
auto &packed_weights = model_weight->packed_weight;
|
||||
packed_tensor_pair = FindPackedTensor(&packed_weights, origin_weithts, tensor, size);
|
||||
if (packed_tensor_pair.second != nullptr) {
|
||||
return packed_tensor_pair;
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &item : path_model_weight_) {
|
||||
auto &model_weight = item.second;
|
||||
auto &origin_weithts = model_weight->origin_weight;
|
||||
auto &packed_weights = model_weight->packed_weight;
|
||||
packed_tensor_pair = FindPackedTensor(&packed_weights, origin_weithts, tensor, size);
|
||||
if (packed_tensor_pair.second != nullptr) {
|
||||
return packed_tensor_pair;
|
||||
}
|
||||
}
|
||||
return std::make_pair(MALLOC, nullptr);
|
||||
}
|
||||
|
||||
void PackWeightManager::DeleteSavedModelPtr(LiteModel *delete_model) {
|
||||
MS_CHECK_TRUE_RET_VOID(delete_model != nullptr);
|
||||
for (auto &item : path_model_weight_) {
|
||||
auto &weight = item.second;
|
||||
auto it = find(weight->lite_models.begin(), weight->lite_models.end(), delete_model);
|
||||
if (it != weight->lite_models.end()) {
|
||||
weight->lite_models.erase(it);
|
||||
}
|
||||
}
|
||||
for (auto &item : buf_model_weight_) {
|
||||
auto &weight = item.second;
|
||||
auto it = find(weight->lite_models.begin(), weight->lite_models.end(), delete_model);
|
||||
if (it != weight->lite_models.end()) {
|
||||
weight->lite_models.erase(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PackWeightManager::DeleteSavedSessionPtr(LiteSession *delete_session) {
|
||||
MS_CHECK_TRUE_RET_VOID(delete_session != nullptr);
|
||||
for (auto &item : path_model_weight_) {
|
||||
auto &weight = item.second;
|
||||
auto it = find(weight->lite_sessions.begin(), weight->lite_sessions.end(), delete_session);
|
||||
if (it != weight->lite_sessions.end()) {
|
||||
weight->lite_sessions.erase(it);
|
||||
}
|
||||
}
|
||||
for (auto &item : buf_model_weight_) {
|
||||
auto &weight = item.second;
|
||||
auto it = find(weight->lite_sessions.begin(), weight->lite_sessions.end(), delete_session);
|
||||
if (it != weight->lite_sessions.end()) {
|
||||
weight->lite_sessions.erase(it);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PackWeightManager::FreePackedWeight(ModelConstWeight *weight) {
|
||||
auto &packed_tensors = weight->packed_weight;
|
||||
for (auto &packed_tensor : packed_tensors) {
|
||||
if (packed_tensor.second != nullptr) {
|
||||
free(packed_tensor.second);
|
||||
packed_tensor.second = nullptr;
|
||||
}
|
||||
}
|
||||
if (weight != nullptr) {
|
||||
delete weight;
|
||||
weight = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
void PackWeightManager::FreeBufModelWeight() {
|
||||
for (auto &item : buf_model_weight_) {
|
||||
FreePackedWeight(item.second);
|
||||
}
|
||||
}
|
||||
|
||||
void PackWeightManager::FreePathModelWeight() {
|
||||
for (auto &item : path_model_weight_) {
|
||||
FreePackedWeight(item.second);
|
||||
}
|
||||
}
|
||||
|
||||
PackWeightManager::~PackWeightManager() {
|
||||
FreePathModelWeight();
|
||||
FreeBufModelWeight();
|
||||
}
|
||||
} // namespace mindspore::lite
|
||||
#endif
|
|
@ -0,0 +1,70 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_PACK_WEIGHT_MANAGER_H_
|
||||
#define MINDSPORE_LITE_SRC_PACK_WEIGHT_MANAGER_H_
|
||||
#ifdef USING_SERVING
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "src/tensor.h"
|
||||
#include "src/lite_session.h"
|
||||
namespace mindspore::lite {
|
||||
// tensor index <-> tensor data
|
||||
using OriginWeight = std::map<size_t, const void *>;
|
||||
using PackedWeight = std::map<size_t, void *>;
|
||||
struct ModelConstWeight {
|
||||
PackedWeight packed_weight;
|
||||
OriginWeight origin_weight;
|
||||
std::vector<const Model *> lite_models;
|
||||
std::vector<const LiteSession *> lite_sessions;
|
||||
};
|
||||
enum PackStatus { NOTPACK = 1, PACKED = 2, MALLOC = 3 };
|
||||
|
||||
class PackWeightManager {
|
||||
public:
|
||||
static PackWeightManager *GetInstance();
|
||||
virtual ~PackWeightManager();
|
||||
|
||||
void DeleteSavedModelPtr(LiteModel *delete_model);
|
||||
void DeleteSavedSessionPtr(LiteSession *delete_session);
|
||||
void FreePathModelWeight();
|
||||
void FreeBufModelWeight();
|
||||
|
||||
void InitWeightManagerByBuf(const char *model_buf, const LiteSession *lite_session);
|
||||
void InitWeightManagerByPath(const std::string &model_path, const char *model_buf,
|
||||
const LiteSession *session = nullptr);
|
||||
STATUS StoreLiteModel(const char *model_buf, const Model *model);
|
||||
|
||||
void StoreOriginTensor(const LiteModel *model, const SchemaTensorWrapper *origin_tensor, size_t tensor_index);
|
||||
void *GetTensorData(const LiteModel *model, size_t tensor_index);
|
||||
std::pair<PackStatus, void *> GetPackedTensor(const Tensor *tensor, const size_t size);
|
||||
void FreePackedWeight(ModelConstWeight *weight);
|
||||
|
||||
private:
|
||||
PackWeightManager() = default;
|
||||
std::pair<PackStatus, void *> FindPackedTensor(PackedWeight *packed_weights, const OriginWeight &origin_weithts,
|
||||
const Tensor *tensor, const size_t size);
|
||||
std::map<const char *, ModelConstWeight *> buf_model_weight_;
|
||||
std::map<const std::string, std::vector<const void *>> path_model_buf_;
|
||||
// path: model_buf
|
||||
std::map<const std::string, ModelConstWeight *> path_model_weight_;
|
||||
};
|
||||
} // namespace mindspore::lite
|
||||
#endif
|
||||
#endif // MINDSPORE_LITE_SRC_PACK_WEIGHT_MANAGER_H_
|
|
@ -50,10 +50,14 @@ ConvolutionBaseCPUKernel::~ConvolutionBaseCPUKernel() {
|
|||
if (addr_map.find(reinterpret_cast<uintptr_t>(packed_weight_)) != addr_map.end()) {
|
||||
FreeAlignedData(reinterpret_cast<void **>(&packed_weight_));
|
||||
} else if (!op_parameter_->is_train_session_) {
|
||||
if (packed_weight_ != nullptr) {
|
||||
#ifdef USING_SERVING
|
||||
if (packed_weight_ != nullptr && weight_is_packed_ == lite::MALLOC) {
|
||||
#endif
|
||||
free(packed_weight_);
|
||||
packed_weight_ = nullptr;
|
||||
#ifdef USING_SERVING
|
||||
}
|
||||
#endif
|
||||
}
|
||||
if (addr_map.find(reinterpret_cast<uintptr_t>(bias_data_)) != addr_map.end()) {
|
||||
FreeAlignedData(reinterpret_cast<void **>(&bias_data_));
|
||||
|
@ -154,6 +158,12 @@ int ConvolutionBaseCPUKernel::InitConvWeightBias() {
|
|||
MS_ASSERT(in_tensors_.size() == kInputSize1);
|
||||
}
|
||||
if (!op_parameter_->is_train_session_) {
|
||||
#ifdef USING_SERVING
|
||||
if (weight_is_packed_ == lite::PACKED) {
|
||||
MS_LOG(DEBUG) << "not do weight pack.";
|
||||
return RET_OK;
|
||||
}
|
||||
#endif
|
||||
if (origin_weight_ != nullptr) {
|
||||
PackWeight();
|
||||
} else {
|
||||
|
|
|
@ -28,6 +28,9 @@
|
|||
#endif
|
||||
#endif
|
||||
#include "src/inner_kernel.h"
|
||||
#ifdef USING_SERVING
|
||||
#include "src/pack_weight_manager.h"
|
||||
#endif
|
||||
#include "include/context.h"
|
||||
#include "src/runtime/kernel/arm/base/layout_transform.h"
|
||||
#include "src/weight_decoder.h"
|
||||
|
@ -77,6 +80,9 @@ class ConvolutionBaseCPUKernel : public InnerKernel {
|
|||
bool IsRepack() { return is_repack_; }
|
||||
std::unordered_map<uintptr_t, void *> addr_map;
|
||||
void *packed_weight_ = nullptr;
|
||||
#ifdef USING_SERVING
|
||||
lite::PackStatus weight_is_packed_ = lite::MALLOC;
|
||||
#endif
|
||||
void *bias_data_ = nullptr;
|
||||
const InnerContext *ctx_ = nullptr;
|
||||
ConvParameter *conv_param_ = nullptr;
|
||||
|
|
|
@ -15,7 +15,9 @@
|
|||
*/
|
||||
|
||||
#include "src/runtime/kernel/arm/fp32/convolution_1x1_fp32.h"
|
||||
|
||||
#ifdef USING_SERVING
|
||||
#include "src/pack_weight_manager.h"
|
||||
#endif
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_MEMORY_FAILED;
|
||||
using mindspore::lite::RET_OK;
|
||||
|
@ -305,7 +307,16 @@ int Convolution1x1CPUKernel::MallocWeightBiasData() {
|
|||
int size = input_channel * UP_ROUND(output_channel, col_tile_) * sizeof(float);
|
||||
if (!op_parameter_->is_train_session_) {
|
||||
CHECK_LESS_RETURN(MAX_MALLOC_SIZE, size);
|
||||
#ifdef USING_SERVING
|
||||
auto packed = lite::PackWeightManager::GetInstance()->GetPackedTensor(in_tensors_[1], size);
|
||||
packed_weight_ = packed.second;
|
||||
weight_is_packed_ = packed.first;
|
||||
if (weight_is_packed_ == lite::MALLOC && packed_weight_ == nullptr) {
|
||||
packed_weight_ = malloc(size);
|
||||
}
|
||||
#else
|
||||
packed_weight_ = malloc(size);
|
||||
#endif
|
||||
if (packed_weight_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Conv1x1 Malloc packed_weight_ error!";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -16,7 +16,9 @@
|
|||
|
||||
#include "src/runtime/kernel/arm/fp32/convolution_depthwise_fp32.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
#ifdef USING_SERVING
|
||||
#include "src/pack_weight_manager.h"
|
||||
#endif
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_INFER_INVALID;
|
||||
using mindspore::lite::RET_OK;
|
||||
|
@ -116,7 +118,17 @@ int ConvolutionDepthwiseCPUKernel::MallocWeightBiasData() {
|
|||
}
|
||||
if (!op_parameter_->is_train_session_) {
|
||||
CHECK_LESS_RETURN(MAX_MALLOC_SIZE, pack_weight_size * sizeof(float));
|
||||
#ifdef USING_SERVING
|
||||
auto packed = lite::PackWeightManager::GetInstance()->GetPackedTensor(
|
||||
in_tensors_[1], static_cast<size_t>(pack_weight_size) * sizeof(float));
|
||||
packed_weight_ = packed.second;
|
||||
weight_is_packed_ = packed.first;
|
||||
if (weight_is_packed_ == lite::MALLOC && packed_weight_ == nullptr) {
|
||||
packed_weight_ = malloc(pack_weight_size * sizeof(float));
|
||||
}
|
||||
#else
|
||||
packed_weight_ = malloc(pack_weight_size * sizeof(float));
|
||||
#endif
|
||||
if (packed_weight_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc buffer failed.";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -16,6 +16,9 @@
|
|||
|
||||
#include "src/runtime/kernel/arm/fp32/convolution_depthwise_indirect_fp32.h"
|
||||
#include "include/errorcode.h"
|
||||
#ifdef USING_SERVING
|
||||
#include "src/pack_weight_manager.h"
|
||||
#endif
|
||||
|
||||
using mindspore::lite::RET_ERROR;
|
||||
using mindspore::lite::RET_INFER_INVALID;
|
||||
|
@ -199,7 +202,17 @@ int ConvolutionDepthwiseIndirectCPUKernel::MallocWeightBiasData() {
|
|||
int pack_weight_size = div_flag * batch_flag * weight_tensor->Height() * weight_tensor->Width();
|
||||
if (!op_parameter_->is_train_session_) {
|
||||
CHECK_LESS_RETURN(MAX_MALLOC_SIZE, pack_weight_size * sizeof(float));
|
||||
#ifdef USING_SERVING
|
||||
auto packed = lite::PackWeightManager::GetInstance()->GetPackedTensor(
|
||||
in_tensors_[1], static_cast<size_t>(pack_weight_size * sizeof(float)));
|
||||
packed_weight_ = packed.second;
|
||||
weight_is_packed_ = packed.first;
|
||||
if (weight_is_packed_ == lite::MALLOC && packed_weight_ == nullptr) {
|
||||
packed_weight_ = malloc(pack_weight_size * sizeof(float));
|
||||
}
|
||||
#else
|
||||
packed_weight_ = malloc(pack_weight_size * sizeof(float));
|
||||
#endif
|
||||
if (packed_weight_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc buffer failed.";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -15,6 +15,7 @@
|
|||
*/
|
||||
|
||||
#include "src/runtime/kernel/arm/fp32/convolution_depthwise_slidewindow_fp32.h"
|
||||
#include "src/pack_weight_manager.h"
|
||||
#include "include/errorcode.h"
|
||||
|
||||
using mindspore::lite::RET_ERROR;
|
||||
|
@ -174,7 +175,17 @@ int ConvolutionDepthwiseSWCPUKernel::MallocWeightBiasData() {
|
|||
int pack_weight_size = C4NUM * OC4 * weight_tensor->Height() * weight_tensor->Width();
|
||||
if (!op_parameter_->is_train_session_) {
|
||||
CHECK_LESS_RETURN(MAX_MALLOC_SIZE, pack_weight_size * sizeof(float));
|
||||
#ifdef USING_SERVING
|
||||
auto packed = lite::PackWeightManager::GetInstance()->GetPackedTensor(
|
||||
in_tensors_[1], static_cast<size_t>(pack_weight_size) * sizeof(float));
|
||||
packed_weight_ = packed.second;
|
||||
weight_is_packed_ = packed.first;
|
||||
if (packed_weight_ == nullptr && weight_is_packed_ == lite::MALLOC) {
|
||||
packed_weight_ = malloc(pack_weight_size * sizeof(float));
|
||||
}
|
||||
#else
|
||||
packed_weight_ = malloc(pack_weight_size * sizeof(float));
|
||||
#endif
|
||||
if (packed_weight_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Malloc buffer failed.";
|
||||
return RET_ERROR;
|
||||
|
|
|
@ -15,6 +15,9 @@
|
|||
*/
|
||||
|
||||
#include "src/runtime/kernel/arm/fp32/convolution_fp32.h"
|
||||
#ifdef USING_SERVING
|
||||
#include "src/pack_weight_manager.h"
|
||||
#endif
|
||||
#include "include/errorcode.h"
|
||||
#include "nnacl/common_func.h"
|
||||
#include "schema/model_generated.h"
|
||||
|
@ -210,12 +213,25 @@ int ConvolutionCPUKernel::MallocWeightBiasData() {
|
|||
size_t pack_weight_size = oc_block_num * in_channel * kernel_plane;
|
||||
if (!op_parameter_->is_train_session_) {
|
||||
CHECK_LESS_RETURN(MAX_MALLOC_SIZE, pack_weight_size * sizeof(float));
|
||||
#ifdef USING_SERVING
|
||||
auto packed = lite::PackWeightManager::GetInstance()->GetPackedTensor(
|
||||
in_tensors_[1], static_cast<size_t>(pack_weight_size) * sizeof(float));
|
||||
packed_weight_ = packed.second;
|
||||
weight_is_packed_ = packed.first;
|
||||
if (weight_is_packed_ == lite::MALLOC && packed_weight_ == nullptr) {
|
||||
packed_weight_ = malloc(pack_weight_size * sizeof(float));
|
||||
memset(packed_weight_, 0, pack_weight_size * sizeof(float));
|
||||
}
|
||||
#else
|
||||
packed_weight_ = malloc(pack_weight_size * sizeof(float));
|
||||
#endif
|
||||
if (packed_weight_ == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc packed weight failed.";
|
||||
return RET_ERROR;
|
||||
}
|
||||
#ifndef USING_SERVING
|
||||
memset(packed_weight_, 0, pack_weight_size * sizeof(float));
|
||||
#endif
|
||||
}
|
||||
|
||||
if (bias_data_ == nullptr) {
|
||||
|
|
|
@ -208,13 +208,25 @@ int ConvolutionWinogradCPUKernel::MallocWeightBiasData() {
|
|||
if (!op_parameter_->is_train_session_) {
|
||||
if (packed_weight_ == nullptr) {
|
||||
CHECK_LESS_RETURN(MAX_MALLOC_SIZE, trans_matrix_data_size);
|
||||
#ifdef USING_SERVING
|
||||
auto packed = lite::PackWeightManager::GetInstance()->GetPackedTensor(in_tensors_[1], trans_matrix_data_size);
|
||||
packed_weight_ = packed.second;
|
||||
weight_is_packed_ = packed.first;
|
||||
if (weight_is_packed_ == lite::MALLOC && packed_weight_ == nullptr) {
|
||||
packed_weight_ = malloc(trans_matrix_data_size);
|
||||
memset(packed_weight_, 0, trans_matrix_data_size);
|
||||
}
|
||||
#else
|
||||
packed_weight_ = malloc(trans_matrix_data_size);
|
||||
#endif
|
||||
if (packed_weight_ == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc matrix_buffer failed.";
|
||||
return RET_MEMORY_FAILED;
|
||||
}
|
||||
}
|
||||
#ifndef USING_SERVING
|
||||
memset(packed_weight_, 0, trans_matrix_data_size);
|
||||
#endif
|
||||
}
|
||||
|
||||
float matrix_a[64];
|
||||
|
|
|
@ -68,7 +68,18 @@ int MatmulFp32BaseCPUKernel::InitBufferA() {
|
|||
if (op_parameter_->is_train_session_) {
|
||||
a_pack_ptr_ = reinterpret_cast<float *>(workspace());
|
||||
} else {
|
||||
#ifdef USING_SERVING
|
||||
auto a_packed = lite::PackWeightManager::GetInstance()->GetPackedTensor(
|
||||
in_tensors()[0], static_cast<size_t>(matrix_a_pack_size_) * sizeof(float));
|
||||
a_pack_ptr_ = reinterpret_cast<float *>(a_packed.second);
|
||||
a_is_packed_ = a_packed.first;
|
||||
if (a_pack_ptr_ == nullptr && a_is_packed_ == lite::MALLOC) {
|
||||
a_pack_ptr_ = reinterpret_cast<float *>(
|
||||
ms_context_->allocator->Malloc(static_cast<size_t>(matrix_a_pack_size_) * sizeof(float)));
|
||||
}
|
||||
#else
|
||||
a_pack_ptr_ = reinterpret_cast<float *>(ms_context_->allocator->Malloc(matrix_a_pack_size_ * sizeof(float)));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
if (a_pack_ptr_ == nullptr) {
|
||||
|
@ -85,8 +96,19 @@ int MatmulFp32BaseCPUKernel::InitBufferB() {
|
|||
if (op_parameter_->is_train_session_) {
|
||||
b_pack_ptr_ = reinterpret_cast<float *>(workspace()) + matrix_a_pack_size_;
|
||||
} else {
|
||||
#ifdef USING_SERVING
|
||||
auto b_packed = lite::PackWeightManager::GetInstance()->GetPackedTensor(
|
||||
in_tensors()[1], static_cast<size_t>(matrix_b_pack_size_) * sizeof(float));
|
||||
b_pack_ptr_ = reinterpret_cast<float *>(b_packed.second);
|
||||
b_is_packed_ = b_packed.first;
|
||||
if (b_pack_ptr_ == nullptr && b_is_packed_ == lite::MALLOC) {
|
||||
b_pack_ptr_ = reinterpret_cast<float *>(
|
||||
ms_context_->allocator->Malloc(static_cast<size_t>(matrix_b_pack_size_) * sizeof(float)));
|
||||
}
|
||||
#else
|
||||
b_pack_ptr_ = reinterpret_cast<float *>(
|
||||
ms_context_->allocator->Malloc(static_cast<size_t>(matrix_b_pack_size_) * sizeof(float)));
|
||||
#endif
|
||||
}
|
||||
if (b_pack_ptr_ == nullptr) {
|
||||
MS_LOG(ERROR) << "malloc b_pack_ptr_ failed";
|
||||
|
@ -193,14 +215,26 @@ void MatmulFp32BaseCPUKernel::FreeBiasBuf() {
|
|||
|
||||
void MatmulFp32BaseCPUKernel::FreeResizeBufA() {
|
||||
if (!vec_matmul_ && !op_parameter_->is_train_session_ && a_pack_ptr_ != nullptr && is_pack_) {
|
||||
ms_context_->allocator->Free(a_pack_ptr_);
|
||||
#ifdef USING_SERVING
|
||||
if (a_is_packed_ == lite::MALLOC) {
|
||||
#endif
|
||||
ms_context_->allocator->Free(a_pack_ptr_);
|
||||
#ifdef USING_SERVING
|
||||
}
|
||||
#endif
|
||||
}
|
||||
a_pack_ptr_ = nullptr;
|
||||
}
|
||||
|
||||
void MatmulFp32BaseCPUKernel::FreeResizeBufB() {
|
||||
if (!op_parameter_->is_train_session_ && b_pack_ptr_ != nullptr && is_pack_) {
|
||||
ms_context_->allocator->Free(b_pack_ptr_);
|
||||
#ifdef USING_SERVING
|
||||
if (b_is_packed_ == lite::MALLOC) {
|
||||
#endif
|
||||
ms_context_->allocator->Free(b_pack_ptr_);
|
||||
#ifdef USING_SERVING
|
||||
}
|
||||
#endif
|
||||
}
|
||||
b_pack_ptr_ = nullptr;
|
||||
}
|
||||
|
@ -370,11 +404,17 @@ int MatmulFp32BaseCPUKernel::Prepare() {
|
|||
if (InitBufferA() != RET_OK) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
ret = InitMatrixA(reinterpret_cast<float *>(in_tensors_[0]->data()));
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "InitMatrixA failed!";
|
||||
return ret;
|
||||
#ifdef USING_SERVING
|
||||
if (a_is_packed_ != lite::PACKED) {
|
||||
#endif
|
||||
ret = InitMatrixA(reinterpret_cast<float *>(in_tensors_[0]->data()));
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "InitMatrixA failed!";
|
||||
return ret;
|
||||
}
|
||||
#ifdef USING_SERVING
|
||||
}
|
||||
#endif
|
||||
}
|
||||
if (params_->b_const_) {
|
||||
auto b_tensor = in_tensors_[1];
|
||||
|
@ -382,10 +422,16 @@ int MatmulFp32BaseCPUKernel::Prepare() {
|
|||
if (InitBufferB() != RET_OK) {
|
||||
return RET_ERROR;
|
||||
}
|
||||
if (InitMatrixB(static_cast<float *>(b_tensor->data())) != RET_OK) {
|
||||
MS_LOG(ERROR) << "InitMatrixB failed!";
|
||||
return RET_ERROR;
|
||||
#ifdef USING_SERVING
|
||||
if (b_is_packed_ != lite::PACKED) {
|
||||
#endif
|
||||
if (InitMatrixB(static_cast<float *>(b_tensor->data())) != RET_OK) {
|
||||
MS_LOG(ERROR) << "InitMatrixB failed!";
|
||||
return RET_ERROR;
|
||||
}
|
||||
#ifdef USING_SERVING
|
||||
}
|
||||
#endif
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
|
|
@ -19,6 +19,9 @@
|
|||
|
||||
#include <vector>
|
||||
#include "src/inner_kernel.h"
|
||||
#ifdef USING_SERVING
|
||||
#include "src/pack_weight_manager.h"
|
||||
#endif
|
||||
#include "nnacl/matmul_parameter.h"
|
||||
#include "include/errorcode.h"
|
||||
#include "src/common/common.h"
|
||||
|
@ -78,6 +81,10 @@ class MatmulFp32BaseCPUKernel : public InnerKernel {
|
|||
MatMulParameter *params_ = nullptr;
|
||||
float *a_pack_ptr_ = nullptr;
|
||||
float *b_pack_ptr_ = nullptr;
|
||||
#ifdef USING_SERVING
|
||||
lite::PackStatus a_is_packed_ = lite::MALLOC;
|
||||
lite::PackStatus b_is_packed_ = lite::MALLOC;
|
||||
#endif
|
||||
int a_batch_ = 1;
|
||||
int b_batch_ = 1;
|
||||
std::vector<int> a_offset_;
|
||||
|
|
|
@ -119,6 +119,7 @@ set(LITE_SRC ${API_SRC}
|
|||
${SRC_DIR}/errorcode.cc
|
||||
${SRC_DIR}/weight_decoder.cc
|
||||
${SRC_DIR}/huffman_decode.cc
|
||||
${SRC_DIR}/pack_weight_manager.cc
|
||||
${SRC_DIR}/delegate/tensorrt/distribution/distribution_base.cc
|
||||
)
|
||||
|
||||
|
|
Loading…
Reference in New Issue