forked from mindspore-Ecosystem/mindspore
!40477 optimize process of free dynamic input memory
Merge pull request !40477 from zhengyuanhua/br3
This commit is contained in:
commit
79bdbbaca9
|
@ -32,10 +32,6 @@ constexpr auto kImageSizeHwNum = 2;
|
||||||
|
|
||||||
int DynShapeProcess::ProcDynamicInput(std::vector<KernelTensorPtr> *const inputs) {
|
int DynShapeProcess::ProcDynamicInput(std::vector<KernelTensorPtr> *const inputs) {
|
||||||
MS_CHECK_TRUE_MSG(acl_options_ != nullptr, lite::RET_ERROR, "Acl options ptr is nullptr.");
|
MS_CHECK_TRUE_MSG(acl_options_ != nullptr, lite::RET_ERROR, "Acl options ptr is nullptr.");
|
||||||
if (acl_options_->batch_size.empty() && acl_options_->image_size.empty()) {
|
|
||||||
MS_LOG(INFO) << "Inputs are not dynamic mode.";
|
|
||||||
return lite::RET_OK;
|
|
||||||
}
|
|
||||||
if (!acl_options_->batch_size.empty() && !acl_options_->image_size.empty()) {
|
if (!acl_options_->batch_size.empty() && !acl_options_->image_size.empty()) {
|
||||||
MS_LOG(ERROR) << "Batch size and image size can't be set at the same time.";
|
MS_LOG(ERROR) << "Batch size and image size can't be set at the same time.";
|
||||||
return lite::RET_ERROR;
|
return lite::RET_ERROR;
|
||||||
|
@ -67,8 +63,8 @@ int DynShapeProcess::AddBatchSizeInput(std::vector<KernelTensorPtr> *const input
|
||||||
free(batch_size_addr);
|
free(batch_size_addr);
|
||||||
return lite::RET_ERROR;
|
return lite::RET_ERROR;
|
||||||
}
|
}
|
||||||
auto batch_size_ptr = std::make_shared<Address>(batch_size_addr, sizeof(int32_t));
|
batch_size_ptr_ = std::make_shared<Address>(batch_size_addr, sizeof(int32_t));
|
||||||
if (batch_size_ptr == nullptr) {
|
if (batch_size_ptr_ == nullptr) {
|
||||||
MS_LOG(ERROR) << "Create Address failed.";
|
MS_LOG(ERROR) << "Create Address failed.";
|
||||||
free(batch_size_addr);
|
free(batch_size_addr);
|
||||||
return lite::RET_ERROR;
|
return lite::RET_ERROR;
|
||||||
|
@ -80,7 +76,7 @@ int DynShapeProcess::AddBatchSizeInput(std::vector<KernelTensorPtr> *const input
|
||||||
return lite::RET_ERROR;
|
return lite::RET_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
tensor_ptr->SetData(batch_size_ptr);
|
tensor_ptr->SetData(batch_size_ptr_);
|
||||||
auto abstract = std::make_shared<abstract::AbstractTensor>(kInt32, std::vector<int64_t>());
|
auto abstract = std::make_shared<abstract::AbstractTensor>(kInt32, std::vector<int64_t>());
|
||||||
tensor_ptr->SetAbstract(abstract);
|
tensor_ptr->SetAbstract(abstract);
|
||||||
inputs->emplace_back(tensor_ptr);
|
inputs->emplace_back(tensor_ptr);
|
||||||
|
@ -98,8 +94,8 @@ int DynShapeProcess::AddImageSizeInput(std::vector<KernelTensorPtr> *const input
|
||||||
free(image_size_addr);
|
free(image_size_addr);
|
||||||
return lite::RET_ERROR;
|
return lite::RET_ERROR;
|
||||||
}
|
}
|
||||||
auto image_size_ptr = std::make_shared<Address>(image_size_addr, kImageSizeHwNum * sizeof(int32_t));
|
image_size_ptr_ = std::make_shared<Address>(image_size_addr, kImageSizeHwNum * sizeof(int32_t));
|
||||||
if (image_size_ptr == nullptr) {
|
if (image_size_ptr_ == nullptr) {
|
||||||
MS_LOG(ERROR) << "Create Address failed.";
|
MS_LOG(ERROR) << "Create Address failed.";
|
||||||
free(image_size_addr);
|
free(image_size_addr);
|
||||||
return lite::RET_ERROR;
|
return lite::RET_ERROR;
|
||||||
|
@ -111,7 +107,7 @@ int DynShapeProcess::AddImageSizeInput(std::vector<KernelTensorPtr> *const input
|
||||||
return lite::RET_ERROR;
|
return lite::RET_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
tensor_ptr->SetData(image_size_ptr);
|
tensor_ptr->SetData(image_size_ptr_);
|
||||||
auto abstract = std::make_shared<abstract::AbstractTensor>(kInt32, std::vector<int64_t>());
|
auto abstract = std::make_shared<abstract::AbstractTensor>(kInt32, std::vector<int64_t>());
|
||||||
tensor_ptr->SetAbstract(abstract);
|
tensor_ptr->SetAbstract(abstract);
|
||||||
inputs->emplace_back(tensor_ptr);
|
inputs->emplace_back(tensor_ptr);
|
||||||
|
@ -180,5 +176,18 @@ int DynShapeProcess::GetRealImageSize(std::vector<KernelTensorPtr> *const inputs
|
||||||
MS_LOG(DEBUG) << "Current height " << height << " width " << width;
|
MS_LOG(DEBUG) << "Current height " << height << " width " << width;
|
||||||
return lite::RET_OK;
|
return lite::RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void DynShapeProcess::DestroyDynamicInput() {
|
||||||
|
if (batch_size_ptr_ != nullptr && batch_size_ptr_->addr != nullptr) {
|
||||||
|
free(batch_size_ptr_->addr);
|
||||||
|
batch_size_ptr_->addr = nullptr;
|
||||||
|
batch_size_ptr_->size = 0;
|
||||||
|
}
|
||||||
|
if (image_size_ptr_ != nullptr && image_size_ptr_->addr != nullptr) {
|
||||||
|
free(image_size_ptr_->addr);
|
||||||
|
image_size_ptr_->addr = nullptr;
|
||||||
|
image_size_ptr_->size = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
} // namespace acl
|
} // namespace acl
|
||||||
} // namespace mindspore::kernel
|
} // namespace mindspore::kernel
|
||||||
|
|
|
@ -28,9 +28,10 @@ namespace acl {
|
||||||
class DynShapeProcess {
|
class DynShapeProcess {
|
||||||
public:
|
public:
|
||||||
explicit DynShapeProcess(const AclModelOptionsPtr &options, size_t input_data_idx)
|
explicit DynShapeProcess(const AclModelOptionsPtr &options, size_t input_data_idx)
|
||||||
: acl_options_(options), input_data_idx_(input_data_idx) {}
|
: acl_options_(options), input_data_idx_(input_data_idx), batch_size_ptr_(nullptr), image_size_ptr_(nullptr) {}
|
||||||
|
|
||||||
int ProcDynamicInput(std::vector<KernelTensorPtr> *const inputs);
|
int ProcDynamicInput(std::vector<KernelTensorPtr> *const inputs);
|
||||||
|
void DestroyDynamicInput();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int AddBatchSizeInput(std::vector<KernelTensorPtr> *const inputs);
|
int AddBatchSizeInput(std::vector<KernelTensorPtr> *const inputs);
|
||||||
|
@ -40,6 +41,8 @@ class DynShapeProcess {
|
||||||
|
|
||||||
AclModelOptionsPtr acl_options_;
|
AclModelOptionsPtr acl_options_;
|
||||||
size_t input_data_idx_;
|
size_t input_data_idx_;
|
||||||
|
AddressPtr batch_size_ptr_;
|
||||||
|
AddressPtr image_size_ptr_;
|
||||||
};
|
};
|
||||||
|
|
||||||
using DynShapeProcPtr = std::shared_ptr<DynShapeProcess>;
|
using DynShapeProcPtr = std::shared_ptr<DynShapeProcess>;
|
||||||
|
|
|
@ -379,9 +379,6 @@ STATUS ModelProcess::SetBatchSize(const std::vector<KernelTensorPtr> &inputs) {
|
||||||
MS_LOG(ERROR) << "Set dynamic batch size failed, model_id is " << model_id_;
|
MS_LOG(ERROR) << "Set dynamic batch size failed, model_id is " << model_id_;
|
||||||
return lite::RET_ERROR;
|
return lite::RET_ERROR;
|
||||||
}
|
}
|
||||||
free(batch_size_tensor->GetData()->addr);
|
|
||||||
batch_size_tensor->GetData()->addr = nullptr;
|
|
||||||
batch_size_tensor->GetData()->size = 0;
|
|
||||||
return lite::RET_OK;
|
return lite::RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -416,9 +413,6 @@ STATUS ModelProcess::SetImageSize(const std::vector<KernelTensorPtr> &inputs) {
|
||||||
MS_LOG(ERROR) << "Set dynamic batch size failed, model_id is " << model_id_;
|
MS_LOG(ERROR) << "Set dynamic batch size failed, model_id is " << model_id_;
|
||||||
return lite::RET_ERROR;
|
return lite::RET_ERROR;
|
||||||
}
|
}
|
||||||
free(image_size_tensor->GetData()->addr);
|
|
||||||
image_size_tensor->GetData()->addr = nullptr;
|
|
||||||
image_size_tensor->GetData()->size = 0;
|
|
||||||
return lite::RET_OK;
|
return lite::RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -199,6 +199,23 @@ int CustomAscendKernelMod::SetInputAndOutputAddr(const std::vector<AddressPtr> &
|
||||||
return lite::RET_OK;
|
return lite::RET_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool CustomAscendKernelMod::IsDynamicInput() {
|
||||||
|
if (acl_options_->batch_size.empty() && acl_options_->image_size.empty()) {
|
||||||
|
MS_LOG(INFO) << "Inputs are not dynamic mode.";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CustomAscendKernelMod::UpdateOutputAddr(const std::vector<AddressPtr> &outputs) {
|
||||||
|
for (size_t i = 0; i < outputs.size(); ++i) {
|
||||||
|
if ((outputs[i]->addr != outputs_[i]->GetData()->addr) || (outputs[i]->size != outputs_[i]->GetData()->size)) {
|
||||||
|
outputs[i]->addr = outputs_[i]->GetData()->addr;
|
||||||
|
outputs[i]->size = outputs_[i]->GetData()->size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool CustomAscendKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
bool CustomAscendKernelMod::Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
|
const std::vector<AddressPtr> &outputs, void *stream_ptr) {
|
||||||
if (!load_model_) {
|
if (!load_model_) {
|
||||||
|
@ -209,18 +226,20 @@ bool CustomAscendKernelMod::Launch(const std::vector<AddressPtr> &inputs, const
|
||||||
MS_LOG(ERROR) << "Check input and output param failed.";
|
MS_LOG(ERROR) << "Check input and output param failed.";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (dyn_shape_proc_->ProcDynamicInput(&inputs_) != lite::RET_OK) {
|
if (IsDynamicInput()) {
|
||||||
MS_LOG(ERROR) << "Proc dynamic batch size input failed.";
|
if (dyn_shape_proc_->ProcDynamicInput(&inputs_) != lite::RET_OK) {
|
||||||
return false;
|
MS_LOG(ERROR) << "Proc dynamic batch size input failed.";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (model_infer_->Inference(inputs_, outputs_) != lite::RET_OK) {
|
if (model_infer_->Inference(inputs_, outputs_) != lite::RET_OK) {
|
||||||
MS_LOG(ERROR) << "Custom kernel execute failed.";
|
MS_LOG(ERROR) << "Custom kernel execute failed.";
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
for (size_t i = 0; i < outputs.size(); ++i) {
|
if (IsDynamicInput()) {
|
||||||
outputs[i]->addr = outputs_[i]->GetData()->addr;
|
dyn_shape_proc_->DestroyDynamicInput();
|
||||||
outputs[i]->size = outputs_[i]->GetData()->size;
|
|
||||||
}
|
}
|
||||||
|
UpdateOutputAddr(outputs);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -56,6 +56,8 @@ class CustomAscendKernelMod : public kernel::KernelMod {
|
||||||
bool InitParam(const std::vector<KernelTensorPtr> &inputs, const std::vector<KernelTensorPtr> &outputs);
|
bool InitParam(const std::vector<KernelTensorPtr> &inputs, const std::vector<KernelTensorPtr> &outputs);
|
||||||
int SetInputAndOutputAddr(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
|
int SetInputAndOutputAddr(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &outputs);
|
||||||
int LoadModel();
|
int LoadModel();
|
||||||
|
bool IsDynamicInput();
|
||||||
|
void UpdateOutputAddr(const std::vector<AddressPtr> &outputs);
|
||||||
|
|
||||||
bool load_model_;
|
bool load_model_;
|
||||||
std::vector<KernelTensorPtr> inputs_;
|
std::vector<KernelTensorPtr> inputs_;
|
||||||
|
|
|
@ -59,7 +59,13 @@ bool TransposeKernelMod::Init(const BaseOperatorPtr &base_operator, const std::v
|
||||||
input_shape_ = inputs[kIndex0]->GetShapeVector();
|
input_shape_ = inputs[kIndex0]->GetShapeVector();
|
||||||
output_shape_ = outputs[kIndex0]->GetShapeVector();
|
output_shape_ = outputs[kIndex0]->GetShapeVector();
|
||||||
auto address_ptr = inputs[kIndex1]->GetData();
|
auto address_ptr = inputs[kIndex1]->GetData();
|
||||||
|
if (address_ptr == nullptr) {
|
||||||
|
MS_LOG(EXCEPTION) << "Address ptr is nullptr.";
|
||||||
|
}
|
||||||
int *addr = static_cast<int *>(address_ptr->addr);
|
int *addr = static_cast<int *>(address_ptr->addr);
|
||||||
|
if (addr == nullptr) {
|
||||||
|
MS_LOG(EXCEPTION) << "Cast addr failed.";
|
||||||
|
}
|
||||||
std::vector<int64_t> perm;
|
std::vector<int64_t> perm;
|
||||||
for (size_t i = 0; i < (address_ptr->size) / sizeof(int); ++i) {
|
for (size_t i = 0; i < (address_ptr->size) / sizeof(int); ++i) {
|
||||||
perm.emplace_back(static_cast<int64_t>(addr[i]));
|
perm.emplace_back(static_cast<int64_t>(addr[i]));
|
||||||
|
@ -107,6 +113,9 @@ bool TransposeKernelMod::Init(const BaseOperatorPtr &base_operator, const std::v
|
||||||
} else {
|
} else {
|
||||||
MS_LOG(EXCEPTION) << "Unsupported input data type: " << dtype_;
|
MS_LOG(EXCEPTION) << "Unsupported input data type: " << dtype_;
|
||||||
}
|
}
|
||||||
|
free(address_ptr->addr);
|
||||||
|
inputs[kIndex1]->GetData()->addr = nullptr;
|
||||||
|
inputs[kIndex1]->GetData()->size = 0;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue