support dynamic batch size

This commit is contained in:
zhengyuanhua 2021-10-20 19:36:18 +08:00
parent 826cff9499
commit 200a18103e
12 changed files with 105 additions and 55 deletions

View File

@ -42,6 +42,7 @@ typedef struct NpuDeviceInfo {
/// \brief Ascend310DeviceInfo defined for Ascend's configuration information.
typedef struct AscendDeviceInfo {
uint32_t device_id_;
std::string batch_size_;
} AscendDeviceInfo;
/// \brief DeviceInfo defined for backend's configuration information.
struct DeviceInfo {

View File

@ -70,6 +70,17 @@ std::shared_ptr<mindspore::KirinNPUDeviceInfo> NPUDeviceInfoFromNPUDeviceContext
return npu_info;
}
std::vector<size_t> GetBatchSize(const std::string &batch_size) {
char *ptr = nullptr;
size_t val = strtol(batch_size.c_str(), &ptr, 0);
bool ret = (ptr == (batch_size.c_str() + batch_size.size()));
if (!ret) {
return {};
}
MS_LOG(INFO) << "Batch size: " << val;
return {val};
}
std::shared_ptr<mindspore::Ascend310DeviceInfo> Ascend310DeviceInfoFromAscend310DeviceContext(
const lite::DeviceContext &ascend310_context) {
if (ascend310_context.device_type_ != DT_ASCEND310) {
@ -79,6 +90,11 @@ std::shared_ptr<mindspore::Ascend310DeviceInfo> Ascend310DeviceInfoFromAscend310
auto ascend310_info = std::make_shared<mindspore::Ascend310DeviceInfo>();
MS_CHECK_TRUE_RET(ascend310_info != nullptr, nullptr);
ascend310_info->SetDeviceID(ascend310_context.device_info_.ascend310_device_info_.device_id_);
std::string batch_size = ascend310_context.device_info_.ascend310_device_info_.batch_size_;
if (!batch_size.empty()) {
auto val = GetBatchSize(batch_size);
ascend310_info->SetDynamicBatchSize(val);
}
return ascend310_info;
}
} // namespace

View File

@ -74,7 +74,7 @@ Status AddNpuDevice(Context *a_context, lite::InnerContext *l_context, DeviceInf
Status AddAscend310Device(Context *a_context, lite::InnerContext *l_context, DeviceInfoContext *device) {
lite::DeviceInfo device_info = {0};
auto ascend310_context = device->Cast<Ascend310DeviceInfo>();
device_info.ascend310_device_info_ = {ascend310_context->GetDeviceID()};
device_info.ascend310_device_info_ = {ascend310_context->GetDeviceID(), ascend310_context->GetDynamicBatchSize()};
l_context->device_list_.push_back({lite::DT_ASCEND310, device_info});
return kSuccess;
}

View File

@ -21,11 +21,15 @@
namespace mindspore::kernel {
namespace acl {
const uint64_t kBatchSizeInvalid = 0;
typedef struct AclModelOptions {
int32_t device_id;
std::string dump_cfg_path;
} AclModelOptions;
uint64_t batch_size;
AclModelOptions() : device_id(0), dump_cfg_path(""), batch_size(kBatchSizeInvalid) {}
} AclModelOptions;
} // namespace acl
} // namespace mindspore::kernel
#endif // MINDSPORE_LITE_SRC_RUNTIME_AGENT_ACL_MODEL_OPTIONS_H_

View File

@ -27,7 +27,7 @@ namespace acl {
CustomAscend310Kernel::CustomAscend310Kernel(const std::vector<mindspore::MSTensor> &inputs,
const std::vector<mindspore::MSTensor> &outputs,
const schema::Primitive *primitive, const mindspore::Context *ctx)
: Kernel(inputs, outputs, primitive, ctx), load_model_(false), model_infer_(nullptr) {}
: Kernel(inputs, outputs, primitive, ctx), load_model_(false), acl_options_({}), model_infer_(nullptr) {}
CustomAscend310Kernel::~CustomAscend310Kernel() {
if (load_model_) {
@ -38,7 +38,23 @@ CustomAscend310Kernel::~CustomAscend310Kernel() {
}
}
AclModelOptions CustomAscend310Kernel::GetAclModelOptions(const mindspore::Context *ctx) const {
STATUS CustomAscend310Kernel::ParseBatchSize(const std::string &batch_size, AclModelOptions *options) {
CHECK_NULL_RETURN(options);
if (!batch_size.empty()) {
char *ptr = nullptr;
options->batch_size = strtol(batch_size.c_str(), &ptr, 0);
bool ret = (ptr == (batch_size.c_str() + batch_size.size()));
if (!ret) {
options->batch_size = kBatchSizeInvalid;
MS_LOG(ERROR) << "Convert batch size failed, val: " << batch_size;
return lite::RET_ERROR;
}
MS_LOG(INFO) << "Batch size of context is " << options->batch_size;
}
return lite::RET_OK;
}
AclModelOptions CustomAscend310Kernel::GetAclModelOptions(const mindspore::Context *ctx) {
AclModelOptions options;
options.device_id = 0;
if (ctx == nullptr) {
@ -62,6 +78,11 @@ AclModelOptions CustomAscend310Kernel::GetAclModelOptions(const mindspore::Conte
}
options.device_id = static_cast<int32_t>(ascend31o_info->GetDeviceID());
auto batch_size = ascend31o_info->GetDynamicBatchSize();
if (ParseBatchSize(batch_size, &options) != lite::RET_OK) {
MS_LOG(WARNING) << "Parse batch size failed.";
return options;
}
return options;
}
@ -74,8 +95,8 @@ STATUS CustomAscend310Kernel::PrepareModelInfer() {
int idx = inputs_.size() - 1;
Buffer om_data(inputs_[idx].Data().get(), inputs_[idx].DataSize());
if (model_infer_ == nullptr) {
auto options = GetAclModelOptions(context_);
model_infer_ = std::make_shared<ModelInfer>(om_data, options);
acl_options_ = GetAclModelOptions(context_);
model_infer_ = std::make_shared<ModelInfer>(om_data, acl_options_);
CHECK_NULL_RETURN(model_infer_);
}
int ret = model_infer_->Init();
@ -116,12 +137,26 @@ STATUS CustomAscend310Kernel::ReSize() {
return Prepare();
}
STATUS CustomAscend310Kernel::ProcDynamicBatchSizeInput(std::vector<mindspore::MSTensor> *input) {
CHECK_NULL_RETURN(input);
if (acl_options_.batch_size != kBatchSizeInvalid) {
mindspore::MSTensor dynamic_input("batch", DataType::kNumberTypeInt32, {1}, &acl_options_.batch_size,
sizeof(int32_t));
input->push_back(dynamic_input);
}
return lite::RET_OK;
}
STATUS CustomAscend310Kernel::Execute() {
if (!load_model_) {
MS_LOG(WARNING) << "Custom kernel has not been prepared.";
return lite::RET_OK;
}
std::vector<mindspore::MSTensor> inputs(inputs_.begin(), inputs_.end() - 1);
if (ProcDynamicBatchSizeInput(&inputs) != lite::RET_OK) {
MS_LOG(ERROR) << "Proc dynamic batch size input failed.";
return lite::RET_ERROR;
}
if (model_infer_->Inference(inputs, &outputs_) != lite::RET_OK) {
MS_LOG(ERROR) << "Custom kernel execute failed.";
return lite::RET_ERROR;

View File

@ -18,6 +18,7 @@
#define MINDSPORE_LITE_SRC_RUNTIME_KERNEL_ASCEND310_KERNEL_CUSTOM_H_
#include <vector>
#include <string>
#include <memory>
#include "src/runtime/kernel/ascend310/src/acl_model_options.h"
#include "src/runtime/kernel/ascend310/src/model_infer.h"
@ -42,9 +43,12 @@ class CustomAscend310Kernel : public kernel::Kernel {
private:
STATUS PrepareModelInfer();
AclModelOptions GetAclModelOptions(const mindspore::Context *ctx) const;
AclModelOptions GetAclModelOptions(const mindspore::Context *ctx);
STATUS ProcDynamicBatchSizeInput(std::vector<mindspore::MSTensor> *input);
STATUS ParseBatchSize(const std::string &batch_size, AclModelOptions *options);
bool load_model_;
AclModelOptions acl_options_;
std::shared_ptr<ModelInfer> model_infer_;
};
} // namespace acl

View File

@ -27,7 +27,7 @@ ModelInfer::ModelInfer(const Buffer &om_data, const AclModelOptions &options)
context_(nullptr),
om_data_(om_data),
options_(options),
model_process_(),
model_process_(options),
acl_env_(nullptr) {}
STATUS ModelInfer::Init() {

View File

@ -201,7 +201,7 @@ STATUS ModelProcess::InitOutputsBuffer() {
aclError ret;
outputs_ = aclmdlCreateDataset();
if (outputs_ == nullptr) {
MS_LOG(ERROR) << "Create input dataset failed";
MS_LOG(ERROR) << "Create output dataset failed";
return lite::RET_ERROR;
}
size_t output_size = aclmdlGetNumOutputs(model_desc_);
@ -217,7 +217,7 @@ STATUS ModelProcess::InitOutputsBuffer() {
aclmdlIODims dims;
ret = aclmdlGetOutputDims(model_desc_, i, &dims);
if (ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Get input shape failed";
MS_LOG(ERROR) << "Get output shape failed";
if (!is_run_on_device_) {
aclrtFree(data_mem_buffer);
} else {
@ -235,7 +235,7 @@ STATUS ModelProcess::InitOutputsBuffer() {
if (output_name.empty()) {
MS_LOG(WARNING) << "Get name of output " << i << " failed.";
}
MS_LOG(INFO) << "Name of input " << i << " is " << output_name;
MS_LOG(INFO) << "Name of om output " << i << " is " << output_name << "Buffer size " << buffer_size;
output_infos_.emplace_back(
AclTensorInfo{data_mem_buffer, data_mem_buffer, buffer_size, data_type, shape, output_name});
}
@ -310,35 +310,25 @@ STATUS ModelProcess::UnLoad() {
return lite::RET_OK;
}
size_t ModelProcess::GetDynamicDims(const std::vector<AclTensorInfo> &inputs) {
size_t max_num = 0;
for (auto input : inputs) {
size_t cur_num = std::count(input.dims.begin(), input.dims.end(), -1);
if (cur_num > max_num) {
max_num = cur_num;
}
}
return max_num;
}
STATUS ModelProcess::SetBatchSize(const std::vector<mindspore::MSTensor> &inputs) {
size_t index;
aclError ret;
for (size_t i = 0; i < inputs.size(); i++) {
input_infos_[i].buffer_size = inputs[i].DataSize();
}
auto *p = reinterpret_cast<const float *>(inputs[inputs.size() - 1].Data().get());
auto *p = reinterpret_cast<const int32_t *>(inputs[inputs.size() - 1].Data().get());
if (p == nullptr) {
MS_LOG(ERROR) << "Pointer is nullptr.";
return lite::RET_OK;
}
auto dynamicBatchSize = p[0];
auto batch_size = p[0];
ret = aclmdlGetInputIndexByName(model_desc_, ACL_DYNAMIC_TENSOR_NAME, &index);
if (ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Get index failed";
return lite::RET_ERROR;
}
ret = aclmdlSetDynamicBatchSize(model_id_, inputs_, index, dynamicBatchSize);
MS_LOG(INFO) << "Set Batch size(" << batch_size << ") of input " << index << ".";
ret = aclmdlSetDynamicBatchSize(model_id_, inputs_, index, batch_size);
if (ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Set dynamic batch size failed, model_id is " << model_id_;
return lite::RET_ERROR;
@ -347,8 +337,8 @@ STATUS ModelProcess::SetBatchSize(const std::vector<mindspore::MSTensor> &inputs
}
STATUS ModelProcess::CheckTensorByTensorInfo(const std::vector<mindspore::MSTensor> &tensor,
const std::vector<AclTensorInfo> &tensor_info, size_t dynamic_nums) {
if (dynamic_nums == 0) {
const std::vector<AclTensorInfo> &tensor_info) {
if (!IsDynamicShape()) {
for (size_t i = 0; i < tensor_info.size(); ++i) {
if (tensor[i].Shape() != tensor_info[i].dims) {
MS_LOG(ERROR) << "Note: input " << i << " shape not match, required " << ShapeToString(tensor_info[i].dims)
@ -371,8 +361,8 @@ STATUS ModelProcess::CheckTensorByTensorInfo(const std::vector<mindspore::MSTens
return lite::RET_OK;
}
STATUS ModelProcess::ProcDynamicShape(const std::vector<mindspore::MSTensor> &inputs, size_t dynamic_nums) {
if (dynamic_nums == kDynamicBatchSize) {
STATUS ModelProcess::ProcDynamicShape(const std::vector<mindspore::MSTensor> &inputs) {
if (IsDynamicBatchSize()) {
if (SetBatchSize(inputs) != lite::RET_OK) {
MS_LOG(ERROR) << "Failed to convert dynamic batch size";
return lite::RET_ERROR;
@ -381,19 +371,19 @@ STATUS ModelProcess::ProcDynamicShape(const std::vector<mindspore::MSTensor> &in
MS_LOG(ERROR) << "Reset output size failed";
return lite::RET_ERROR;
}
} else if (dynamic_nums == kDynamicImageSize) {
MS_LOG(ERROR) << "Only dynamic batch size is supported";
return lite::RET_ERROR;
}
return lite::RET_OK;
}
bool ModelProcess::IsDynamicShape() { return IsDynamicBatchSize(); }
bool ModelProcess::IsDynamicBatchSize() { return options_.batch_size != kBatchSizeInvalid; }
STATUS ModelProcess::CheckAndInitInput(const std::vector<mindspore::MSTensor> &inputs) {
aclError ret;
inputs_ = aclmdlCreateDataset();
size_t dynamic_nums = GetDynamicDims(input_infos_);
// check inputs
if (CheckTensorByTensorInfo(inputs, input_infos_, dynamic_nums) != lite::RET_OK) {
if (CheckTensorByTensorInfo(inputs, input_infos_) != lite::RET_OK) {
MS_LOG(ERROR) << "Check input tensor failed.";
return lite::RET_ERROR;
}
@ -407,7 +397,8 @@ STATUS ModelProcess::CheckAndInitInput(const std::vector<mindspore::MSTensor> &i
info.cur_device_data = info.device_data;
ret = aclrtMemcpy(info.cur_device_data, info.buffer_size, data, input.DataSize(), ACL_MEMCPY_HOST_TO_DEVICE);
if (ret != ACL_ERROR_NONE) {
MS_LOG(ERROR) << "Acl memcpy input " << i << " data to device failed, buffer size " << input.DataSize();
MS_LOG(ERROR) << "Acl memcpy input " << i << " data to device failed, src input size: " << input.DataSize()
<< "dst device buffer size: " << info.buffer_size;
return lite::RET_ERROR;
}
input_buffer = info.cur_device_data;
@ -426,7 +417,7 @@ STATUS ModelProcess::CheckAndInitInput(const std::vector<mindspore::MSTensor> &i
return lite::RET_ERROR;
}
}
if (ProcDynamicShape(inputs, dynamic_nums) != lite::RET_OK) {
if (ProcDynamicShape(inputs) != lite::RET_OK) {
MS_LOG(ERROR) << "Proc input dynamic shape failed.";
return lite::RET_ERROR;
}
@ -445,10 +436,12 @@ STATUS ModelProcess::ResetOutputSize() {
MS_LOG(ERROR) << "get output dim error.";
return lite::RET_ERROR;
}
std::vector<int64_t> shape(output_dims.dims, output_dims.dims + output_dims.dimCount);
for (size_t i = 0; i < output_dims.dimCount; i++) {
dims *= output_dims.dims[i];
}
output_type = aclmdlGetOutputDataType(model_desc_, index);
output_infos_[index].dims = shape;
output_infos_[index].buffer_size = dims * aclDataTypeSize(output_type);
}
return lite::RET_OK;
@ -470,8 +463,11 @@ STATUS ModelProcess::SortTensorInfoByName(const std::vector<mindspore::MSTensor>
std::string name = tensor[i].Name();
size_t j;
for (j = 0; j < size; j++) {
if (name.find((*tensor_info)[j].name) != std::string::npos) {
if (name.find((*tensor_info)[j].name) != std::string::npos ||
(*tensor_info)[j].name.find(name) != std::string::npos) {
if (i != j) {
std::swap((*tensor_info)[i], (*tensor_info)[j]);
}
break;
}
}

View File

@ -25,6 +25,7 @@
#include "acl/acl_rt.h"
#include "include/api/types.h"
#include "include/errorcode.h"
#include "src/runtime/kernel/ascend310/src/acl_model_options.h"
namespace mindspore::kernel {
namespace acl {
@ -40,8 +41,9 @@ struct AclTensorInfo {
class ModelProcess {
public:
ModelProcess()
: model_id_(0xffffffff),
explicit ModelProcess(const AclModelOptions &options)
: options_(options),
model_id_(0xffffffff),
is_run_on_device_(false),
model_desc_(nullptr),
inputs_(nullptr),
@ -65,22 +67,23 @@ class ModelProcess {
STATUS CheckAndInitInput(const std::vector<mindspore::MSTensor> &inputs);
STATUS SortTensorInfoByName(const std::vector<mindspore::MSTensor> &tensor, std::vector<AclTensorInfo> *tensor_info);
STATUS CheckTensorByTensorInfo(const std::vector<mindspore::MSTensor> &tensor,
const std::vector<AclTensorInfo> &tensor_info, size_t dynamic_nums);
const std::vector<AclTensorInfo> &tensor_info);
STATUS GetOutputs(std::vector<mindspore::MSTensor> *outputs);
STATUS ConstructTensor(std::vector<mindspore::MSTensor> *outputs);
STATUS SetBatchSize(const std::vector<mindspore::MSTensor> &inputs);
STATUS InitInputsBuffer();
STATUS InitOutputsBuffer();
STATUS ResetOutputSize();
size_t GetDynamicDims(const std::vector<AclTensorInfo> &);
STATUS ProcDynamicShape(const std::vector<mindspore::MSTensor> &inputs, size_t dynamic_nums);
STATUS ProcDynamicShape(const std::vector<mindspore::MSTensor> &inputs);
std::string VectorToString(const std::vector<int64_t> &);
bool IsDynamicShape();
bool IsDynamicBatchSize();
void DestroyInputsDataset();
void DestroyInputsDataMem();
void DestroyInputsBuffer();
void DestroyOutputsBuffer();
AclModelOptions options_;
uint32_t model_id_;
// if run one device(AICPU), there is no need to alloc device memory and copy inputs to(/outputs from) device
bool is_run_on_device_;

View File

@ -352,9 +352,7 @@ STATUS AclPassImpl::TraceOutput(const AnfNodePtr &node) {
static size_t iter = 0;
CHECK_NULL_RETURN(node);
AnfNodePtr cur_node = node;
AnfNodePtr pre_node = nullptr;
while (cur_node->isa<CNode>() && IsPrimitiveCNode(cur_node, prim::kPrimTupleGetItem)) {
pre_node = cur_node;
auto tmp = cur_node->cast<CNodePtr>();
CHECK_NULL_RETURN(tmp);
cur_node = tmp->input(kTupleGetItemFirstInputIdx);
@ -384,9 +382,6 @@ STATUS AclPassImpl::TraceOutput(const AnfNodePtr &node) {
} else {
MS_LOG(INFO) << "Graph out name: " << cnode->fullname_with_scope();
graph_output_names_.emplace_back(cnode->fullname_with_scope());
if (pre_node != nullptr && IsPrimitiveCNode(pre_node, prim::kPrimTupleGetItem)) {
cnode = pre_node->cast<CNodePtr>();
}
std::vector<int64_t> dims;
if (lite::acl::GetShapeVectorFromCNode(cnode, &dims) != lite::RET_OK) {
MS_LOG(ERROR) << "Get node shape failed.";

View File

@ -61,11 +61,7 @@ STATUS Conv2dTransposeMapper::Mapper(const CNodePtr &cnode) {
}
STATUS Conv2dTransposeMapper::AdjustGeAttr(const CNodePtr &cnode, const PrimitivePtr &dst_prim) {
std::vector<int64_t> shape;
if (acl::GetShapeVectorFromCNode(cnode, &shape) != lite::RET_OK) {
MS_LOG(ERROR) << "Get shape failed from conv2d transpose.";
return lite::RET_ERROR;
}
std::vector<int64_t> shape = {0, 0, 0, 0};
dst_prim->AddAttr("input_size", MakeValue(shape));
dst_prim->AddAttr("format", MakeValue("NCHW"));

View File

@ -75,7 +75,7 @@ CNodePtr CreateTupleGetItemNode(const FuncGraphPtr &func_graph, const CNodePtr &
static STATUS AdapteNodeWithMultiOutputs(const FuncGraphPtr &func_graph, const CNodePtr &cnode,
const FuncGraphManagerPtr &manager) {
std::string cnode_func_name = GetCNodeFuncName(cnode);
if (cnode_func_name == prim::kTupleGetItem || cnode_func_name == kNameReturn) {
if (cnode_func_name == prim::kTupleGetItem) {
return lite::RET_OK;
}