forked from mindspore-Ecosystem/mindspore
memory optimization
This commit is contained in:
parent
b917ceca36
commit
208c620cea
|
@ -133,6 +133,7 @@ set(TRAIN_SRC
|
|||
${CMAKE_CURRENT_SOURCE_DIR}/train/accuracy_monitor.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/train/classification_train_accuracy_monitor.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/train/train_export.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/train/opt_allocator.cc
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/../tools/common/storage.cc
|
||||
)
|
||||
if(ENABLE_V0)
|
||||
|
|
|
@ -316,7 +316,9 @@ void Tensor::FreeData() {
|
|||
this->data_ = nullptr;
|
||||
} else {
|
||||
allocator_->Free(this->data_);
|
||||
this->data_ = nullptr;
|
||||
if (!IS_STATIC_ALLOCATOR(allocator_) || (allocator_->RefCount(this->data_) != 0)) {
|
||||
this->data_ = nullptr;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -34,12 +34,15 @@
|
|||
|
||||
namespace mindspore {
|
||||
namespace lite {
|
||||
|
||||
#define STATIC_ALLOCATION -271964
|
||||
#define IS_STATIC_ALLOCATOR(allocator) ((allocator != nullptr) && (allocator->RefCount(nullptr) == STATIC_ALLOCATION))
|
||||
struct LiteQuantParam {
|
||||
double scale;
|
||||
int32_t zeroPoint;
|
||||
float var_corr{1};
|
||||
float mean_corr{0};
|
||||
bool inited;
|
||||
bool inited{false};
|
||||
std::vector<float> clusters{};
|
||||
int bitNum;
|
||||
int roundType;
|
||||
|
@ -133,7 +136,6 @@ class Tensor : public mindspore::tensor::MSTensor {
|
|||
void set_format(mindspore::Format format) override { this->format_ = format; }
|
||||
|
||||
mindspore::Format format() const override { return this->format_; }
|
||||
|
||||
virtual int ref_count() const { return ref_count_; }
|
||||
|
||||
virtual int init_ref_count() const { return this->init_ref_count_; }
|
||||
|
|
|
@ -0,0 +1,90 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "src/train/opt_allocator.h"
|
||||
#include <limits>
|
||||
#include "nnacl/op_base.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
||||
size_t OptAllocator::FindFree(size_t size) {
|
||||
size_t min_size = std::numeric_limits<size_t>::max();
|
||||
size_t min_addr = std::numeric_limits<size_t>::max();
|
||||
for (auto const &itr : arena_) {
|
||||
// best fit
|
||||
if (itr.second >= size) {
|
||||
if (min_size > itr.second) {
|
||||
min_size = itr.second;
|
||||
min_addr = itr.first;
|
||||
}
|
||||
}
|
||||
}
|
||||
return min_addr;
|
||||
}
|
||||
|
||||
void OptAllocator::Reorder(size_t addr) {
|
||||
size_t length = arena_[addr];
|
||||
size_t post = addr + length;
|
||||
// connect to upper block
|
||||
auto it = arena_.find(post);
|
||||
if (it != arena_.end()) {
|
||||
size_t post_size = it->second;
|
||||
arena_[addr] = length + post_size;
|
||||
arena_.erase(post);
|
||||
}
|
||||
// connect to lower block
|
||||
auto itr = arena_.lower_bound(addr);
|
||||
if (itr != arena_.begin()) {
|
||||
itr--;
|
||||
size_t last = itr->first;
|
||||
if ((last + arena_[last]) == addr) {
|
||||
arena_[last] = arena_[last] + arena_[addr];
|
||||
arena_.erase(addr);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t OptAllocator::Malloc(size_t size) {
|
||||
size = UP_DIV(size, align_size_) * align_size_;
|
||||
size_t addr = FindFree(size);
|
||||
// free block not found
|
||||
if (addr == std::numeric_limits<size_t>::max()) {
|
||||
if (!arena_.empty()) {
|
||||
addr = arena_.rbegin()->first;
|
||||
if (addr + arena_[addr] < heap_) {
|
||||
addr = heap_;
|
||||
} else {
|
||||
arena_.erase(addr);
|
||||
}
|
||||
} else {
|
||||
addr = heap_;
|
||||
}
|
||||
heap_ = addr + size;
|
||||
} else {
|
||||
if (arena_[addr] > size) {
|
||||
arena_[addr + size] = arena_[addr] - size;
|
||||
}
|
||||
arena_.erase(addr);
|
||||
}
|
||||
alloc_[addr] = size;
|
||||
return addr;
|
||||
}
|
||||
|
||||
void OptAllocator::Free(size_t addr) {
|
||||
arena_[addr] = alloc_[addr];
|
||||
alloc_.erase(addr);
|
||||
Reorder(addr);
|
||||
}
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,41 @@
|
|||
/**
|
||||
* Copyright 2020 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef MINDSPORE_LITE_SRC_TRAIN_OPT_ALLOCATOR_H_
|
||||
#define MINDSPORE_LITE_SRC_TRAIN_OPT_ALLOCATOR_H_
|
||||
|
||||
#include <map>
|
||||
#include "include/api/allocator.h"
|
||||
|
||||
namespace mindspore {
|
||||
class OptAllocator {
|
||||
public:
|
||||
explicit OptAllocator(size_t aligned_size = 32) : align_size_(aligned_size) {}
|
||||
~OptAllocator() {}
|
||||
size_t Malloc(size_t size);
|
||||
void Free(size_t offset);
|
||||
size_t total_size() { return heap_; }
|
||||
|
||||
private:
|
||||
size_t FindFree(size_t size);
|
||||
void Reorder(size_t addr);
|
||||
std::map<size_t, size_t> arena_;
|
||||
std::map<size_t, size_t> alloc_;
|
||||
size_t heap_ = 0;
|
||||
size_t align_size_;
|
||||
};
|
||||
}; // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_SRC_TRAIN_OPT_ALLOCATOR_H_
|
|
@ -0,0 +1,52 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_LITE_SRC_TRAIN_STATIC_ALLOCATOR_H_
|
||||
#define MINDSPORE_LITE_SRC_TRAIN_STATIC_ALLOCATOR_H_
|
||||
|
||||
namespace mindspore {
|
||||
class StaticAllocator : public Allocator {
|
||||
public:
|
||||
void SetContex(void *buf, size_t size) {
|
||||
start_buf_ = buf;
|
||||
size_ = size;
|
||||
}
|
||||
int SetRefCount(void *ptr, int ref_count) override { return 0; }
|
||||
int DecRefCount(void *ptr, int ref_count) override { return 0; }
|
||||
int IncRefCount(void *ptr, int ref_count) override { return 0; }
|
||||
size_t total_size() { return total_size_; }
|
||||
void Clear() {}
|
||||
void *Malloc(size_t size) override {
|
||||
total_size_ += size;
|
||||
return malloc(size);
|
||||
}
|
||||
void Free(void *ptr) override {
|
||||
if (RefCount(ptr) != 0) free(ptr);
|
||||
}
|
||||
|
||||
int RefCount(void *ptr) override {
|
||||
if (ptr == nullptr) return STATIC_ALLOCATION;
|
||||
char *ptrc = reinterpret_cast<char *>(ptr);
|
||||
char *bufc = reinterpret_cast<char *>(start_buf_);
|
||||
return ((ptrc < bufc) || (ptrc - bufc >= static_cast<ptrdiff_t>(size_)) ? 1 : 0);
|
||||
}
|
||||
|
||||
private:
|
||||
void *start_buf_;
|
||||
size_t size_;
|
||||
size_t total_size_ = 0;
|
||||
};
|
||||
}; // namespace mindspore
|
||||
#endif // MINDSPORE_LITE_SRC_TRAIN_STATIC_ALLOCATOR_H_
|
|
@ -39,6 +39,8 @@
|
|||
#include "src/train/optimizer_kernel.h"
|
||||
#include "src/train/train_utils.h"
|
||||
#include "src/train/train_export.h"
|
||||
#include "src/train/opt_allocator.h"
|
||||
#include "src/train/static_allocator.h"
|
||||
#include "src/train/train_populate_parameter.h"
|
||||
#include "src/train/train_populate_parameter_v0.h"
|
||||
|
||||
|
@ -68,6 +70,7 @@ int TrainSession::Init(const Context *context, const TrainCfg *train_cfg) {
|
|||
}
|
||||
cfg_ = *train_cfg;
|
||||
}
|
||||
allocator_ = context->allocator;
|
||||
return lite::LiteSession::Init(context);
|
||||
}
|
||||
|
||||
|
@ -159,6 +162,51 @@ int TrainSession::InitCallBack() {
|
|||
return RET_OK;
|
||||
}
|
||||
|
||||
int TrainSession::AllocTensors(const std::vector<kernel::LiteKernel *> &kernels) {
|
||||
if (!IS_STATIC_ALLOCATOR(allocator_)) return RET_OK;
|
||||
OptAllocator allocator;
|
||||
std::unordered_map<lite::Tensor *, int> ref_count;
|
||||
std::unordered_map<lite::Tensor *, size_t> offset_map;
|
||||
for (auto kernel : kernels) {
|
||||
for (auto tensor : kernel->out_tensors()) {
|
||||
size_t size = tensor->Size();
|
||||
size_t offset = allocator.Malloc(size);
|
||||
offset_map[tensor] = offset;
|
||||
ref_count[tensor] = tensor->init_ref_count();
|
||||
}
|
||||
for (auto tensor : kernel->in_tensors()) {
|
||||
if (tensor->category() == lite::Tensor::VAR) {
|
||||
int count = ref_count[tensor] - 1;
|
||||
ref_count[tensor] = count;
|
||||
if (count == 0) {
|
||||
allocator.Free(offset_map[tensor]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// Set Tensor data
|
||||
if (tensors_data_ == nullptr) {
|
||||
auto size = allocator.total_size();
|
||||
auto buf = malloc(size);
|
||||
if (buf == nullptr) {
|
||||
MS_LOG(ERROR) << "cannot allocate buffer size" << size;
|
||||
return RET_ERROR;
|
||||
}
|
||||
StaticAllocator *alloc = reinterpret_cast<StaticAllocator *>(allocator_.get());
|
||||
alloc->SetContex(buf, size);
|
||||
tensors_data_ = buf;
|
||||
}
|
||||
for (auto kernel : train_kernels_) {
|
||||
for (auto tensor : kernel->out_tensors()) {
|
||||
auto it = offset_map.find(tensor);
|
||||
if (it != offset_map.end()) {
|
||||
tensor->set_data(reinterpret_cast<void *>(reinterpret_cast<char *>(tensors_data_) + it->second));
|
||||
}
|
||||
}
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
int TrainSession::CompileGraph(lite::Model *model) { return lite::RET_ERROR; }
|
||||
|
||||
int TrainSession::CompileTrainGraph(std::shared_ptr<Model> model) {
|
||||
|
@ -194,10 +242,21 @@ int TrainSession::CompileTrainGraph(std::shared_ptr<Model> model) {
|
|||
MS_LOG(ERROR) << "failed to allocate space";
|
||||
return RET_ERROR;
|
||||
}
|
||||
ret = AllocTensors(train_kernels_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "failed to allocate space";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
TrainSession::~TrainSession() { FreeWorkSpace(); }
|
||||
TrainSession::~TrainSession() {
|
||||
FreeWorkSpace();
|
||||
if (tensors_data_ != nullptr) {
|
||||
free(tensors_data_);
|
||||
tensors_data_ = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
int TrainSession::ExecKernels(const KernelCallBack &before, const KernelCallBack &after,
|
||||
const std::vector<kernel::LiteKernel *> &run_kernels) {
|
||||
|
@ -420,6 +479,12 @@ int TrainSession::Train() {
|
|||
lite_tensor->set_init_ref_count(lite_tensor->init_ref_count() + 1);
|
||||
}
|
||||
}
|
||||
// allocate tensors
|
||||
auto ret = AllocTensors(train_kernels_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "failed to allocate tensor space";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -446,6 +511,11 @@ int TrainSession::Eval() {
|
|||
lite_tensor->set_init_ref_count(lite_tensor->init_ref_count() + 1);
|
||||
}
|
||||
}
|
||||
auto ret = AllocTensors(inference_kernels_);
|
||||
if (ret != RET_OK) {
|
||||
MS_LOG(ERROR) << "failed to allocate space";
|
||||
return RET_ERROR;
|
||||
}
|
||||
return RET_OK;
|
||||
}
|
||||
|
||||
|
@ -781,7 +851,12 @@ session::LiteSession *session::TrainSession::CreateTrainSession(const std::strin
|
|||
MS_LOG(ERROR) << "create session failed";
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
if (context->allocator == nullptr) {
|
||||
const_cast<lite::Context *>(context)->allocator = std::shared_ptr<Allocator>(new (std::nothrow) StaticAllocator());
|
||||
if (context->allocator == nullptr) {
|
||||
MS_LOG(ERROR) << " cannot convert to static allocation";
|
||||
}
|
||||
}
|
||||
auto ret = session->Init(context, cfg);
|
||||
if (ret != mindspore::lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "init session failed";
|
||||
|
|
|
@ -147,6 +147,7 @@ class TrainSession : virtual public lite::LiteSession {
|
|||
void FreeRestoreTensors();
|
||||
bool AllInputsNeedScale(kernel::LiteKernel *kernel);
|
||||
void FreeWorkSpace();
|
||||
int AllocTensors(const std::vector<kernel::LiteKernel *> &kernels);
|
||||
|
||||
std::map<Tensor *, Tensor *> restored_origin_tensors_;
|
||||
int virtual_batch_idx_ = 0;
|
||||
|
@ -155,6 +156,8 @@ class TrainSession : virtual public lite::LiteSession {
|
|||
void *workspace_ = nullptr;
|
||||
SchedCallBack sched_mix_precision_callback_;
|
||||
bool train_mode_ = false;
|
||||
void *tensors_data_ = nullptr;
|
||||
std::shared_ptr<Allocator> allocator_;
|
||||
};
|
||||
|
||||
} // namespace lite
|
||||
|
|
|
@ -603,7 +603,7 @@ int NetTrain::InitCallbackParameter() {
|
|||
}
|
||||
op_call_times_total_++;
|
||||
op_begin_ = GetTimeUs();
|
||||
if ((callParam.node_type == "Adam") || (callParam.node_type == "Assign")) {
|
||||
if ((callParam.node_type == "Adam") || (callParam.node_type == "Assign") || callParam.node_type == "SGD") {
|
||||
for (auto tensor : before_outputs) {
|
||||
std::fill(reinterpret_cast<int8_t *>(tensor->MutableData()),
|
||||
reinterpret_cast<int8_t *>(tensor->MutableData()) + tensor->Size(), 0);
|
||||
|
|
Loading…
Reference in New Issue