forked from mindspore-Ecosystem/mindspore
!40477 optimize process of free dynamic input memory
Merge pull request !40477 from zhengyuanhua/br3
This commit is contained in:
commit
79bdbbaca9
|
@ -32,10 +32,6 @@ constexpr auto kImageSizeHwNum = 2;
|
|||
|
||||
int DynShapeProcess::ProcDynamicInput(std::vector<KernelTensorPtr> *const inputs) {
|
||||
MS_CHECK_TRUE_MSG(acl_options_ != nullptr, lite::RET_ERROR, "Acl options ptr is nullptr.");
|
||||
if (acl_options_->batch_size.empty() && acl_options_->image_size.empty()) {
|
||||
MS_LOG(INFO) << "Inputs are not dynamic mode.";
|
||||
return lite::RET_OK;
|
||||
}
|
||||
if (!acl_options_->batch_size.empty() && !acl_options_->image_size.empty()) {
|
||||
MS_LOG(ERROR) << "Batch size and image size can't be set at the same time.";
|
||||
return lite::RET_ERROR;
|
||||
|
@ -67,8 +63,8 @@ int DynShapeProcess::AddBatchSizeInput(std::vector<KernelTensorPtr> *const input
|
|||
free(batch_size_addr);
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
auto batch_size_ptr = std::make_shared<Address>(batch_size_addr, sizeof(int32_t));
|
||||
if (batch_size_ptr == nullptr) {
|
||||
batch_size_ptr_ = std::make_shared<Address>(batch_size_addr, sizeof(int32_t));
|
||||
if (batch_size_ptr_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Create Address failed.";
|
||||
free(batch_size_addr);
|
||||
return lite::RET_ERROR;
|
||||
|
@ -80,7 +76,7 @@ int DynShapeProcess::AddBatchSizeInput(std::vector<KernelTensorPtr> *const input
|
|||
return lite::RET_ERROR;
|
||||
}
|
||||
|
||||
tensor_ptr->SetData(batch_size_ptr);
|
||||
tensor_ptr->SetData(batch_size_ptr_);
|
||||
auto abstract = std::make_shared<abstract::AbstractTensor>(kInt32, std::vector<int64_t>());
|
||||
tensor_ptr->SetAbstract(abstract);
|
||||
inputs->emplace_back(tensor_ptr);
|
||||
|
@ -98,8 +94,8 @@ int DynShapeProcess::AddImageSizeInput(std::vector<KernelTensorPtr> *const input
|
|||
free(image_size_addr);
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
auto image_size_ptr = std::make_shared<Address>(image_size_addr, kImageSizeHwNum * sizeof(int32_t));
|
||||
if (image_size_ptr == nullptr) {
|
||||
image_size_ptr_ = std::make_shared<Address>(image_size_addr, kImageSizeHwNum * sizeof(int32_t));
|
||||
if (image_size_ptr_ == nullptr) {
|
||||
MS_LOG(ERROR) << "Create Address failed.";
|
||||
free(image_size_addr);
|
||||
return lite::RET_ERROR;
|
||||
|
@ -111,7 +107,7 @@ int DynShapeProcess::AddImageSizeInput(std::vector<KernelTensorPtr> *const input
|
|||
return lite::RET_ERROR;
|
||||
}
|
||||
|
||||
tensor_ptr->SetData(image_size_ptr);
|
||||
tensor_ptr->SetData(image_size_ptr_);
|
||||
auto abstract = std::make_shared<abstract::AbstractTensor>(kInt32, std::vector<int64_t>());
|
||||
tensor_ptr->SetAbstract(abstract);
|
||||
inputs->emplace_back(tensor_ptr);
|
||||
|
@ -180,5 +176,18 @@ int DynShapeProcess::GetRealImageSize(std::vector<KernelTensorPtr> *const inputs
|
|||
MS_LOG(DEBUG) << "Current height " << height << " width " << width;
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
void DynShapeProcess::DestroyDynamicInput() {
|
||||
if (batch_size_ptr_ != nullptr && batch_size_ptr_->addr != nullptr) {
|
||||
free(batch_size_ptr_->addr);
|
||||
batch_size_ptr_->addr = nullptr;
|
||||
batch_size_ptr_->size = 0;
|
||||
}
|
||||
if (image_size_ptr_ != nullptr && image_size_ptr_->addr != nullptr) {
|
||||
free(image_size_ptr_->addr);
|
||||
image_size_ptr_->addr = nullptr;
|
||||
image_size_ptr_->size = 0;
|
||||
}
|
||||
}
|
||||
} // namespace acl
|
||||
} // namespace mindspore::kernel
|
||||
|
|
|
@ -28,9 +28,10 @@ namespace acl {
|
|||
class DynShapeProcess {
|
||||
public:
|
||||
explicit DynShapeProcess(const AclModelOptionsPtr &options, size_t input_data_idx)
|
||||
: acl_options_(options), input_data_idx_(input_data_idx) {}
|
||||
: acl_options_(options), input_data_idx_(input_data_idx), batch_size_ptr_(nullptr), image_size_ptr_(nullptr) {}
|
||||
|
||||
int ProcDynamicInput(std::vector<KernelTensorPtr> *const inputs);
|
||||
void DestroyDynamicInput();
|
||||
|
||||
private:
|
||||
int AddBatchSizeInput(std::vector<KernelTensorPtr> *const inputs);
|
||||
|
@ -40,6 +41,8 @@ class DynShapeProcess {
|
|||
|
||||
AclModelOptionsPtr acl_options_;
|
||||
size_t input_data_idx_;
|
||||
AddressPtr batch_size_ptr_;
|
||||
AddressPtr image_size_ptr_;
|
||||
};
|
||||
|
||||
using DynShapeProcPtr = std::shared_ptr<DynShapeProcess>;
|
||||
|
|
|
@ -379,9 +379,6 @@ STATUS ModelProcess::SetBatchSize(const std::vector<KernelTensorPtr> &inputs) {
|
|||
MS_LOG(ERROR) << "Set dynamic batch size failed, model_id is " << model_id_;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
free(batch_size_tensor->GetData()->addr);
|
||||
batch_size_tensor->GetData()->addr = nullptr;
|
||||
batch_size_tensor->GetData()->size = 0;
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
|
@ -416,9 +413,6 @@ STATUS ModelProcess::SetImageSize(const std::vector<KernelTensorPtr> &inputs) {
|
|||
MS_LOG(ERROR) << "Set dynamic batch size failed, model_id is " << model_id_;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
free(image_size_tensor->GetData()->addr);
|
||||
image_size_tensor->GetData()->addr = nullptr;
|
||||
image_size_tensor->GetData()->size = 0;
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
|
|
|
@ -199,6 +199,23 @@ int CustomAscendKernelMod::SetInputAndOutputAddr(const std::vector<AddressPtr> &
|
|||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
bool CustomAscendKernelMod::IsDynamicInput() {
|
||||
if (acl_options_->batch_size.empty() && acl_options_->image_size.empty()) {
|
||||
MS_LOG(INFO) << "Inputs are not dynamic mode.";
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void CustomAscendKernelMod::UpdateOutputAddr(const std::vector<AddressPtr> &outputs) {
|
||||
for (size_t i = 0; i < outputs.size(); ++i) {
|
||||
if ((outputs[i]->addr != outputs_[i]->GetData()->addr) || (outputs[i]->size != outputs_[i]->GetData()->size)) {
|
||||
outputs[i]->addr = outputs_[i]->GetData()->addr;
|
||||
outputs[i]->size = outputs_[i]->GetData()->size;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool CustomAscendKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
|
||||
if (!load_model_) {
|
||||
|
@ -209,18 +226,20 @@ bool CustomAscendKernelMod::Launch(const std::vector<AddressPtr> &inputs, const
|
|||
MS_LOG(ERROR) << "Check input and output param failed.";
|
||||
return false;
|
||||
}
|
||||
if (dyn_shape_proc_->ProcDynamicInput(&inputs_) != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Proc dynamic batch size input failed.";
|
||||
return false;
|
||||
if (IsDynamicInput()) {
|
||||
if (dyn_shape_proc_->ProcDynamicInput(&inputs_) != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Proc dynamic batch size input failed.";
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (model_infer_->Inference(inputs_, outputs_) != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Custom kernel execute failed.";
|
||||
return false;
|
||||
}
|
||||
for (size_t i = 0; i < outputs.size(); ++i) {
|
||||
outputs[i]->addr = outputs_[i]->GetData()->addr;
|
||||
outputs[i]->size = outputs_[i]->GetData()->size;
|
||||
if (IsDynamicInput()) {
|
||||
dyn_shape_proc_->DestroyDynamicInput();
|
||||
}
|
||||
UpdateOutputAddr(outputs);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -56,6 +56,8 @@ class CustomAscendKernelMod : public kernel::KernelMod {
|
|||
bool InitParam(const std::vector<KernelTensorPtr> &inputs, const std::vector<KernelTensorPtr> &outputs);
|
||||
int SetInputAndOutputAddr(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
|
||||
int LoadModel();
|
||||
bool IsDynamicInput();
|
||||
void UpdateOutputAddr(const std::vector<AddressPtr> &outputs);
|
||||
|
||||
bool load_model_;
|
||||
std::vector<KernelTensorPtr> inputs_;
|
||||
|
|
|
@ -59,7 +59,13 @@ bool TransposeKernelMod::Init(const BaseOperatorPtr &base_operator, const std::v
|
|||
input_shape_ = inputs[kIndex0]->GetShapeVector();
|
||||
output_shape_ = outputs[kIndex0]->GetShapeVector();
|
||||
auto address_ptr = inputs[kIndex1]->GetData();
|
||||
if (address_ptr == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Address ptr is nullptr.";
|
||||
}
|
||||
int *addr = static_cast<int *>(address_ptr->addr);
|
||||
if (addr == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Cast addr failed.";
|
||||
}
|
||||
std::vector<int64_t> perm;
|
||||
for (size_t i = 0; i < (address_ptr->size) / sizeof(int); ++i) {
|
||||
perm.emplace_back(static_cast<int64_t>(addr[i]));
|
||||
|
@ -107,6 +113,9 @@ bool TransposeKernelMod::Init(const BaseOperatorPtr &base_operator, const std::v
|
|||
} else {
|
||||
MS_LOG(EXCEPTION) << "Unsupported input data type: " << dtype_;
|
||||
}
|
||||
free(address_ptr->addr);
|
||||
inputs[kIndex1]->GetData()->addr = nullptr;
|
||||
inputs[kIndex1]->GetData()->size = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue