!40477 optimize process of free dynamic input memory

Merge pull request !40477 from zhengyuanhua/br3
This commit is contained in:
i-robot 2022-08-17 01:47:14 +00:00 committed by Gitee
commit 79bdbbaca9
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
6 changed files with 59 additions and 23 deletions

View File

@ -32,10 +32,6 @@ constexpr auto kImageSizeHwNum = 2;
int DynShapeProcess::ProcDynamicInput(std::vector<KernelTensorPtr> *const inputs) {
MS_CHECK_TRUE_MSG(acl_options_ != nullptr, lite::RET_ERROR, "Acl options ptr is nullptr.");
if (acl_options_->batch_size.empty() && acl_options_->image_size.empty()) {
MS_LOG(INFO) << "Inputs are not dynamic mode.";
return lite::RET_OK;
}
if (!acl_options_->batch_size.empty() && !acl_options_->image_size.empty()) {
MS_LOG(ERROR) << "Batch size and image size can't be set at the same time.";
return lite::RET_ERROR;
@ -67,8 +63,8 @@ int DynShapeProcess::AddBatchSizeInput(std::vector<KernelTensorPtr> *const input
free(batch_size_addr);
return lite::RET_ERROR;
}
auto batch_size_ptr = std::make_shared<Address>(batch_size_addr, sizeof(int32_t));
if (batch_size_ptr == nullptr) {
batch_size_ptr_ = std::make_shared<Address>(batch_size_addr, sizeof(int32_t));
if (batch_size_ptr_ == nullptr) {
MS_LOG(ERROR) << "Create Address failed.";
free(batch_size_addr);
return lite::RET_ERROR;
@ -80,7 +76,7 @@ int DynShapeProcess::AddBatchSizeInput(std::vector<KernelTensorPtr> *const input
return lite::RET_ERROR;
}
tensor_ptr->SetData(batch_size_ptr);
tensor_ptr->SetData(batch_size_ptr_);
auto abstract = std::make_shared<abstract::AbstractTensor>(kInt32, std::vector<int64_t>());
tensor_ptr->SetAbstract(abstract);
inputs->emplace_back(tensor_ptr);
@ -98,8 +94,8 @@ int DynShapeProcess::AddImageSizeInput(std::vector<KernelTensorPtr> *const input
free(image_size_addr);
return lite::RET_ERROR;
}
auto image_size_ptr = std::make_shared<Address>(image_size_addr, kImageSizeHwNum * sizeof(int32_t));
if (image_size_ptr == nullptr) {
image_size_ptr_ = std::make_shared<Address>(image_size_addr, kImageSizeHwNum * sizeof(int32_t));
if (image_size_ptr_ == nullptr) {
MS_LOG(ERROR) << "Create Address failed.";
free(image_size_addr);
return lite::RET_ERROR;
@ -111,7 +107,7 @@ int DynShapeProcess::AddImageSizeInput(std::vector<KernelTensorPtr> *const input
return lite::RET_ERROR;
}
tensor_ptr->SetData(image_size_ptr);
tensor_ptr->SetData(image_size_ptr_);
auto abstract = std::make_shared<abstract::AbstractTensor>(kInt32, std::vector<int64_t>());
tensor_ptr->SetAbstract(abstract);
inputs->emplace_back(tensor_ptr);
@ -180,5 +176,18 @@ int DynShapeProcess::GetRealImageSize(std::vector<KernelTensorPtr> *const inputs
MS_LOG(DEBUG) << "Current height " << height << " width " << width;
return lite::RET_OK;
}
void DynShapeProcess::DestroyDynamicInput() {
if (batch_size_ptr_ != nullptr && batch_size_ptr_->addr != nullptr) {
free(batch_size_ptr_->addr);
batch_size_ptr_->addr = nullptr;
batch_size_ptr_->size = 0;
}
if (image_size_ptr_ != nullptr && image_size_ptr_->addr != nullptr) {
free(image_size_ptr_->addr);
image_size_ptr_->addr = nullptr;
image_size_ptr_->size = 0;
}
}
} // namespace acl
} // namespace mindspore::kernel

View File

@ -28,9 +28,10 @@ namespace acl {
class DynShapeProcess {
public:
explicit DynShapeProcess(const AclModelOptionsPtr &options, size_t input_data_idx)
: acl_options_(options), input_data_idx_(input_data_idx) {}
: acl_options_(options), input_data_idx_(input_data_idx), batch_size_ptr_(nullptr), image_size_ptr_(nullptr) {}
int ProcDynamicInput(std::vector<KernelTensorPtr> *const inputs);
void DestroyDynamicInput();
private:
int AddBatchSizeInput(std::vector<KernelTensorPtr> *const inputs);
@ -40,6 +41,8 @@ class DynShapeProcess {
AclModelOptionsPtr acl_options_;
size_t input_data_idx_;
AddressPtr batch_size_ptr_;
AddressPtr image_size_ptr_;
};
using DynShapeProcPtr = std::shared_ptr<DynShapeProcess>;

View File

@ -379,9 +379,6 @@ STATUS ModelProcess::SetBatchSize(const std::vector<KernelTensorPtr> &inputs) {
MS_LOG(ERROR) << "Set dynamic batch size failed, model_id is " << model_id_;
return lite::RET_ERROR;
}
free(batch_size_tensor->GetData()->addr);
batch_size_tensor->GetData()->addr = nullptr;
batch_size_tensor->GetData()->size = 0;
return lite::RET_OK;
}
@ -416,9 +413,6 @@ STATUS ModelProcess::SetImageSize(const std::vector<KernelTensorPtr> &inputs) {
MS_LOG(ERROR) << "Set dynamic batch size failed, model_id is " << model_id_;
return lite::RET_ERROR;
}
free(image_size_tensor->GetData()->addr);
image_size_tensor->GetData()->addr = nullptr;
image_size_tensor->GetData()->size = 0;
return lite::RET_OK;
}

View File

@ -199,6 +199,23 @@ int CustomAscendKernelMod::SetInputAndOutputAddr(const std::vector<AddressPtr> &
return lite::RET_OK;
}
bool CustomAscendKernelMod::IsDynamicInput() {
if (acl_options_->batch_size.empty() && acl_options_->image_size.empty()) {
MS_LOG(INFO) << "Inputs are not dynamic mode.";
return false;
}
return true;
}
void CustomAscendKernelMod::UpdateOutputAddr(const std::vector<AddressPtr> &outputs) {
for (size_t i = 0; i < outputs.size(); ++i) {
if ((outputs[i]->addr != outputs_[i]->GetData()->addr) || (outputs[i]->size != outputs_[i]->GetData()->size)) {
outputs[i]->addr = outputs_[i]->GetData()->addr;
outputs[i]->size = outputs_[i]->GetData()->size;
}
}
}
bool CustomAscendKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
if (!load_model_) {
@ -209,18 +226,20 @@ bool CustomAscendKernelMod::Launch(const std::vector<AddressPtr> &inputs, const
MS_LOG(ERROR) << "Check input and output param failed.";
return false;
}
if (dyn_shape_proc_->ProcDynamicInput(&inputs_) != lite::RET_OK) {
MS_LOG(ERROR) << "Proc dynamic batch size input failed.";
return false;
if (IsDynamicInput()) {
if (dyn_shape_proc_->ProcDynamicInput(&inputs_) != lite::RET_OK) {
MS_LOG(ERROR) << "Proc dynamic batch size input failed.";
return false;
}
}
if (model_infer_->Inference(inputs_, outputs_) != lite::RET_OK) {
MS_LOG(ERROR) << "Custom kernel execute failed.";
return false;
}
for (size_t i = 0; i < outputs.size(); ++i) {
outputs[i]->addr = outputs_[i]->GetData()->addr;
outputs[i]->size = outputs_[i]->GetData()->size;
if (IsDynamicInput()) {
dyn_shape_proc_->DestroyDynamicInput();
}
UpdateOutputAddr(outputs);
return true;
}

View File

@ -56,6 +56,8 @@ class CustomAscendKernelMod : public kernel::KernelMod {
bool InitParam(const std::vector<KernelTensorPtr> &inputs, const std::vector<KernelTensorPtr> &outputs);
int SetInputAndOutputAddr(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
int LoadModel();
bool IsDynamicInput();
void UpdateOutputAddr(const std::vector<AddressPtr> &outputs);
bool load_model_;
std::vector<KernelTensorPtr> inputs_;

View File

@ -59,7 +59,13 @@ bool TransposeKernelMod::Init(const BaseOperatorPtr &base_operator, const std::v
input_shape_ = inputs[kIndex0]->GetShapeVector();
output_shape_ = outputs[kIndex0]->GetShapeVector();
auto address_ptr = inputs[kIndex1]->GetData();
if (address_ptr == nullptr) {
MS_LOG(EXCEPTION) << "Address ptr is nullptr.";
}
int *addr = static_cast<int *>(address_ptr->addr);
if (addr == nullptr) {
MS_LOG(EXCEPTION) << "Cast addr failed.";
}
std::vector<int64_t> perm;
for (size_t i = 0; i < (address_ptr->size) / sizeof(int); ++i) {
perm.emplace_back(static_cast<int64_t>(addr[i]));
@ -107,6 +113,9 @@ bool TransposeKernelMod::Init(const BaseOperatorPtr &base_operator, const std::v
} else {
MS_LOG(EXCEPTION) << "Unsupported input data type: " << dtype_;
}
free(address_ptr->addr);
inputs[kIndex1]->GetData()->addr = nullptr;
inputs[kIndex1]->GetData()->size = 0;
return true;
}