forked from mindspore-Ecosystem/mindspore
update graph output addr and tensor shape
This commit is contained in:
parent
4294c11a89
commit
ca26d3db66
|
@ -81,6 +81,8 @@ int DynShapeProcess::AddBatchSizeInput(std::vector<KernelTensorPtr> *const input
|
|||
}
|
||||
|
||||
tensor_ptr->SetData(batch_size_ptr);
|
||||
auto abstract = std::make_shared<abstract::AbstractTensor>(kInt32, std::vector<int64_t>());
|
||||
tensor_ptr->SetAbstract(abstract);
|
||||
inputs->emplace_back(tensor_ptr);
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
@ -110,6 +112,8 @@ int DynShapeProcess::AddImageSizeInput(std::vector<KernelTensorPtr> *const input
|
|||
}
|
||||
|
||||
tensor_ptr->SetData(image_size_ptr);
|
||||
auto abstract = std::make_shared<abstract::AbstractTensor>(kInt32, std::vector<int64_t>());
|
||||
tensor_ptr->SetAbstract(abstract);
|
||||
inputs->emplace_back(tensor_ptr);
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
@ -139,8 +143,9 @@ int DynShapeProcess::GetRealBatchSize(std::vector<KernelTensorPtr> *const inputs
|
|||
|
||||
int DynShapeProcess::GetRealImageSize(std::vector<KernelTensorPtr> *const inputs, int32_t *image_size, int32_t num) {
|
||||
MS_CHECK_TRUE_MSG(image_size != nullptr, lite::RET_ERROR, "Image size ptr is nullptr.");
|
||||
if (input_data_idx_ >= inputs->size()) {
|
||||
MS_LOG(ERROR) << "Input data index " << input_data_idx_ << " is larger than input size " << inputs->size();
|
||||
if (input_data_idx_ >= inputs->size() || input_data_idx_ >= acl_options_->input_format.size()) {
|
||||
MS_LOG(ERROR) << "Input data index " << input_data_idx_ << " is invalid, inputs size " << inputs->size()
|
||||
<< " input formats size " << acl_options_->input_format.size();
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
auto tensor = (*inputs)[input_data_idx_];
|
||||
|
@ -149,7 +154,7 @@ int DynShapeProcess::GetRealImageSize(std::vector<KernelTensorPtr> *const inputs
|
|||
MS_LOG(ERROR) << "Shape size " << shape.size() << " is invalid, input index = " << input_data_idx_;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
auto format = tensor->GetFormat();
|
||||
auto format = acl_options_->input_format[input_data_idx_];
|
||||
uint64_t height;
|
||||
uint64_t width;
|
||||
if (format == mindspore::Format::NHWC) {
|
||||
|
|
|
@ -166,5 +166,6 @@ std::set<uint64_t> ModelInfer::GetDynamicBatch() { return model_process_.GetDyna
|
|||
|
||||
// need to be called after model load;
|
||||
std::set<std::pair<uint64_t, uint64_t>> ModelInfer::GetDynamicImage() { return model_process_.GetDynamicImage(); }
|
||||
std::vector<Format> ModelInfer::GetInputFormat() { return model_process_.GetInputFormat(); }
|
||||
} // namespace acl
|
||||
} // namespace mindspore::kernel
|
||||
|
|
|
@ -45,6 +45,7 @@ class ModelInfer {
|
|||
std::set<uint64_t> GetDynamicBatch();
|
||||
// need to be called after model load
|
||||
std::set<std::pair<uint64_t, uint64_t>> GetDynamicImage();
|
||||
std::vector<Format> GetInputFormat();
|
||||
|
||||
private:
|
||||
STATUS LoadAclModel(const Buffer &om_data);
|
||||
|
|
|
@ -60,32 +60,6 @@ inline static void PushbackIfNotNull(U *vec, T &&item) {
|
|||
}
|
||||
}
|
||||
|
||||
static STATUS ConstructTensorDesc(const std::vector<AclTensorInfo> &acl_tensor_list, std::vector<std::string> *names,
|
||||
std::vector<std::vector<int64_t>> *shapes, std::vector<enum TypeId> *data_types,
|
||||
std::vector<size_t> *mem_sizes) {
|
||||
ClearIfNotNull(names);
|
||||
ClearIfNotNull(shapes);
|
||||
ClearIfNotNull(data_types);
|
||||
ClearIfNotNull(mem_sizes);
|
||||
for (size_t i = 0; i < acl_tensor_list.size(); ++i) {
|
||||
const auto &info = acl_tensor_list[i];
|
||||
PushbackIfNotNull(names, info.name);
|
||||
PushbackIfNotNull(shapes, info.dims);
|
||||
PushbackIfNotNull(data_types, TransToDataType(info.data_type));
|
||||
PushbackIfNotNull(mem_sizes, info.buffer_size);
|
||||
}
|
||||
|
||||
if (names->size() != acl_tensor_list.size() || shapes->size() != acl_tensor_list.size() ||
|
||||
data_types->size() != acl_tensor_list.size() || mem_sizes->size() != acl_tensor_list.size()) {
|
||||
MS_LOG(ERROR) << "Inner error, size do not match: names size " << names->size() << " shapes size " << shapes->size()
|
||||
<< " data types size " << data_types->size() << " mem sizes size " << mem_sizes->size()
|
||||
<< " acl_tensor_list size " << acl_tensor_list.size();
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
static std::string ShapeToString(const std::vector<int64_t> &shape) {
|
||||
std::string result = "[";
|
||||
for (size_t i = 0; i < shape.size(); ++i) {
|
||||
|
@ -140,6 +114,26 @@ std::set<uint64_t> ModelProcess::GetDynamicBatch() {
|
|||
return batch;
|
||||
}
|
||||
|
||||
std::vector<Format> ModelProcess::GetInputFormat() {
|
||||
if (model_desc_ == nullptr) {
|
||||
MS_LOG(ERROR) << " Model desc is nullptr.";
|
||||
return std::vector<Format>();
|
||||
}
|
||||
std::vector<Format> input_formats;
|
||||
static const std::map<aclFormat, enum Format> acl_format_map = {{ACL_FORMAT_NCHW, NCHW}, {ACL_FORMAT_NHWC, NHWC}};
|
||||
size_t input_size = aclmdlGetNumInputs(model_desc_);
|
||||
for (size_t i = 0; i < input_size; ++i) {
|
||||
aclFormat format = aclmdlGetInputFormat(model_desc_, i);
|
||||
auto iter = acl_format_map.find(format);
|
||||
if (iter != acl_format_map.end()) {
|
||||
input_formats.emplace_back(iter->second);
|
||||
} else {
|
||||
MS_LOG(WARNING) << "Find input " << i << " format failed, cur format: " << static_cast<int32_t>(format);
|
||||
}
|
||||
}
|
||||
return input_formats;
|
||||
}
|
||||
|
||||
std::set<std::pair<uint64_t, uint64_t>> ModelProcess::GetDynamicImage() {
|
||||
if (model_desc_ == nullptr) {
|
||||
MS_LOG(ERROR) << " Model desc is nullptr.";
|
||||
|
@ -366,7 +360,7 @@ STATUS ModelProcess::SetBatchSize(const std::vector<KernelTensorPtr> &inputs) {
|
|||
num = batch_size_tensor->GetData()->size / data_type_size;
|
||||
}
|
||||
if (num != kBatchSizeNum) {
|
||||
MS_LOG(ERROR) << "Batch size num should be " << kBatchSizeNum;
|
||||
MS_LOG(ERROR) << "Batch size num should be " << kBatchSizeNum << ",real num " << num;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
auto *ptr = reinterpret_cast<const int32_t *>(batch_size_tensor->GetData()->addr);
|
||||
|
@ -385,6 +379,9 @@ STATUS ModelProcess::SetBatchSize(const std::vector<KernelTensorPtr> &inputs) {
|
|||
MS_LOG(ERROR) << "Set dynamic batch size failed, model_id is " << model_id_;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
free(batch_size_tensor->GetData()->addr);
|
||||
batch_size_tensor->GetData()->addr = nullptr;
|
||||
batch_size_tensor->GetData()->size = 0;
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
|
@ -399,7 +396,7 @@ STATUS ModelProcess::SetImageSize(const std::vector<KernelTensorPtr> &inputs) {
|
|||
num = image_size_tensor->GetData()->size / data_type_size;
|
||||
}
|
||||
if (num != kImageSizeHwNum) {
|
||||
MS_LOG(ERROR) << "Image size hw num should be " << kImageSizeHwNum;
|
||||
MS_LOG(ERROR) << "Image size hw num should be " << kImageSizeHwNum << ", real num " << num;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
auto *hw = reinterpret_cast<const int32_t *>(image_size_tensor->GetData()->addr);
|
||||
|
@ -419,6 +416,9 @@ STATUS ModelProcess::SetImageSize(const std::vector<KernelTensorPtr> &inputs) {
|
|||
MS_LOG(ERROR) << "Set dynamic batch size failed, model_id is " << model_id_;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
free(image_size_tensor->GetData()->addr);
|
||||
image_size_tensor->GetData()->addr = nullptr;
|
||||
image_size_tensor->GetData()->size = 0;
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
|
@ -587,12 +587,48 @@ STATUS ModelProcess::PredictFromHost(const std::vector<KernelTensorPtr> &inputs,
|
|||
return lite::RET_OK;
|
||||
}
|
||||
|
||||
void ModelProcess::UpdateOutputInfo(const std::vector<KernelTensorPtr> &outputs) {
|
||||
if (model_desc_ == nullptr) {
|
||||
MS_LOG(ERROR) << " Model desc is nullptr.";
|
||||
return;
|
||||
}
|
||||
if (outputs.size() != output_infos_.size()) {
|
||||
MS_LOG(ERROR) << "Actual tensor count not match, required count " << output_infos_.size() << ", given count "
|
||||
<< outputs.size();
|
||||
return;
|
||||
}
|
||||
for (size_t i = 0; i < output_infos_.size(); ++i) {
|
||||
struct aclmdlIODims output_dims;
|
||||
auto ret = aclmdlGetCurOutputDims(model_desc_, i, &output_dims);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "get output " << i << " dim error.";
|
||||
return;
|
||||
}
|
||||
std::vector<int64_t> shape(output_dims.dims, output_dims.dims + output_dims.dimCount);
|
||||
bool is_dynamic =
|
||||
std::any_of(output_infos_[i].dims.begin(), output_infos_[i].dims.end(), [](int64_t dim) { return dim < 0; });
|
||||
if (is_dynamic) {
|
||||
size_t dims = 1;
|
||||
for (size_t j = 0; j < output_dims.dimCount; ++j) {
|
||||
dims *= output_dims.dims[j];
|
||||
}
|
||||
aclDataType output_type = aclmdlGetOutputDataType(model_desc_, i);
|
||||
output_infos_[i].dims = shape;
|
||||
output_infos_[i].buffer_size = dims * aclDataTypeSize(output_type);
|
||||
}
|
||||
outputs[i]->SetShapeVector(shape);
|
||||
}
|
||||
MS_LOG(DEBUG) << "Update output shape success.";
|
||||
}
|
||||
|
||||
STATUS ModelProcess::GetOutputs(const std::vector<KernelTensorPtr> &outputs) {
|
||||
if (outputs.empty()) {
|
||||
MS_LOG(ERROR) << "Ms tensor outputs is empty.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
|
||||
UpdateOutputInfo(outputs);
|
||||
|
||||
if (ConstructTensor(outputs) != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Construct ms tensor failed.";
|
||||
return lite::RET_ERROR;
|
||||
|
@ -601,40 +637,31 @@ STATUS ModelProcess::GetOutputs(const std::vector<KernelTensorPtr> &outputs) {
|
|||
}
|
||||
|
||||
STATUS ModelProcess::ConstructTensor(const std::vector<KernelTensorPtr> &outputs) {
|
||||
if (outputs.size() != output_infos_.size()) {
|
||||
MS_LOG(ERROR) << "Actual tensor count not match, required count " << output_infos_.size() << ", given count "
|
||||
<< outputs.size();
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
std::vector<std::string> names;
|
||||
std::vector<std::vector<int64_t>> shapes;
|
||||
std::vector<enum TypeId> data_types;
|
||||
std::vector<size_t> mem_sizes;
|
||||
if (ConstructTensorDesc(output_infos_, &names, &shapes, &data_types, &mem_sizes) != lite::RET_OK) {
|
||||
MS_LOG(ERROR) << "Construct tensor desc failed.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
// set output info and malloc data size
|
||||
for (size_t i = 0; i < output_infos_.size(); ++i) {
|
||||
if (outputs[i]->GetData()->size != mem_sizes[i]) {
|
||||
MS_LOG(ERROR) << "Ms tensor size " << outputs[i]->GetData()->size << " not match model tensor size "
|
||||
<< mem_sizes[i];
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
}
|
||||
aclrtMemcpyKind kind = is_run_on_device_ ? ACL_MEMCPY_HOST_TO_HOST : ACL_MEMCPY_DEVICE_TO_HOST;
|
||||
for (size_t i = 0; i < output_infos_.size(); ++i) {
|
||||
if (output_infos_[i].cur_device_data == nullptr) {
|
||||
// when run on device, cur_device_data is nullptr before first execute
|
||||
MS_LOG(WARNING) << "Output device add is nullptr.";
|
||||
continue;
|
||||
}
|
||||
auto ret = aclrtMemcpy(outputs[i]->GetData()->addr, outputs[i]->GetData()->size, output_infos_[i].cur_device_data,
|
||||
void *output_addr = nullptr;
|
||||
if (outputs[i]->GetData()->size != output_infos_[i].buffer_size) {
|
||||
output_addr = malloc(output_infos_[i].buffer_size);
|
||||
if (output_addr == nullptr) {
|
||||
MS_LOG(ERROR) << "Failed to malloc output " << i << " memory size " << output_infos_[i].buffer_size;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
} else {
|
||||
output_addr = outputs[i]->GetData()->addr;
|
||||
}
|
||||
auto ret = aclrtMemcpy(output_addr, output_infos_[i].buffer_size, output_infos_[i].cur_device_data,
|
||||
output_infos_[i].buffer_size, kind);
|
||||
if (ret != ACL_ERROR_NONE) {
|
||||
MS_LOG(ERROR) << "Memcpy input " << i << " from " << (is_run_on_device_ ? "host" : "device")
|
||||
<< " to host failed, memory size " << output_infos_[i].buffer_size;
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
outputs[i]->GetData()->addr = output_addr;
|
||||
outputs[i]->GetData()->size = output_infos_[i].buffer_size;
|
||||
}
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
|
|
@ -66,6 +66,7 @@ class ModelProcess {
|
|||
uint32_t model_id() const { return model_id_; }
|
||||
std::set<uint64_t> GetDynamicBatch();
|
||||
std::set<std::pair<uint64_t, uint64_t>> GetDynamicImage();
|
||||
std::vector<Format> GetInputFormat();
|
||||
|
||||
private:
|
||||
STATUS CreateDataBuffer(void **data_mem_buffer, size_t buffer_size, aclmdlDataset *dataset);
|
||||
|
@ -73,6 +74,7 @@ class ModelProcess {
|
|||
STATUS CheckTensorByTensorInfo(const std::vector<KernelTensorPtr> &tensor,
|
||||
const std::vector<AclTensorInfo> &tensor_info);
|
||||
STATUS GetOutputs(const std::vector<KernelTensorPtr> &outputs);
|
||||
void UpdateOutputInfo(const std::vector<KernelTensorPtr> &outputs);
|
||||
STATUS ConstructTensor(const std::vector<KernelTensorPtr> &outputs);
|
||||
STATUS SetBatchSize(const std::vector<KernelTensorPtr> &inputs);
|
||||
STATUS SetImageSize(const std::vector<KernelTensorPtr> &inputs);
|
||||
|
|
|
@ -19,8 +19,10 @@
|
|||
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <memory>
|
||||
#include "mindapi/base/format.h"
|
||||
|
||||
namespace mindspore::kernel {
|
||||
namespace acl {
|
||||
|
@ -29,6 +31,7 @@ struct AclModelOptions {
|
|||
std::string dump_cfg_path;
|
||||
std::set<uint64_t> batch_size;
|
||||
std::set<std::pair<uint64_t, uint64_t>> image_size;
|
||||
std::vector<Format> input_format;
|
||||
|
||||
AclModelOptions() : device_id(0) {}
|
||||
};
|
||||
|
|
|
@ -35,6 +35,7 @@ AscendKernelPlugin::AscendKernelPlugin() : handle_(nullptr), create_kernel_map_(
|
|||
void AscendKernelPlugin::Register() {
|
||||
#if !defined(_WIN32)
|
||||
if (is_registered_) {
|
||||
MS_LOG(INFO) << "Create kernel map has been created.";
|
||||
return;
|
||||
}
|
||||
std::string ascend_kernel_plugin_path;
|
||||
|
@ -82,6 +83,7 @@ void AscendKernelPlugin::DestroyAscendKernelMap() {
|
|||
return;
|
||||
}
|
||||
destroy_map_func(create_kernel_map_);
|
||||
is_registered_ = false;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -88,6 +88,7 @@ bool CustomAscendKernelMod::InitParam(const std::vector<KernelTensorPtr> &inputs
|
|||
MS_LOG(ERROR) << "Input " << idx << " is invalid.";
|
||||
return false;
|
||||
}
|
||||
// buffer deep copy
|
||||
Buffer om_data(inputs[idx]->GetData()->addr, inputs[idx]->GetData()->size);
|
||||
model_infer_ = std::make_shared<ModelInfer>(om_data, acl_options_);
|
||||
if (model_infer_ == nullptr) {
|
||||
|
@ -100,6 +101,11 @@ bool CustomAscendKernelMod::InitParam(const std::vector<KernelTensorPtr> &inputs
|
|||
MS_LOG(ERROR) << "Create DynShapeProcess failed.";
|
||||
return false;
|
||||
}
|
||||
if (inputs[idx]->GetData()->addr != nullptr) {
|
||||
free(inputs[idx]->GetData()->addr);
|
||||
inputs[idx]->GetData()->addr = nullptr;
|
||||
inputs[idx]->GetData()->size = 0;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -141,6 +147,7 @@ int CustomAscendKernelMod::LoadModel() {
|
|||
}
|
||||
acl_options_->batch_size = model_infer_->GetDynamicBatch();
|
||||
acl_options_->image_size = model_infer_->GetDynamicImage();
|
||||
acl_options_->input_format = model_infer_->GetInputFormat();
|
||||
|
||||
MS_LOG(INFO) << "Load om data success.";
|
||||
return lite::RET_OK;
|
||||
|
@ -172,18 +179,22 @@ int CustomAscendKernelMod::SetInputAndOutputAddr(const std::vector<AddressPtr> &
|
|||
return lite::RET_ERROR;
|
||||
}
|
||||
for (size_t i = 0; i < inputs_.size(); ++i) {
|
||||
if (inputs[i] == nullptr || inputs_[i] == nullptr) {
|
||||
MS_LOG(ERROR) << "Input " << i << " is nullptr.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
if (inputs[i]->addr == nullptr || inputs[i]->size == 0) {
|
||||
MS_LOG(ERROR) << "Input " << i << " addr is invalid.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
inputs_[i]->SetData(inputs[i]);
|
||||
}
|
||||
for (size_t j = 0; j < outputs_.size(); ++j) {
|
||||
if (outputs[j]->addr == nullptr || outputs[j]->size == 0) {
|
||||
MS_LOG(ERROR) << "Output " << j << " addr is invalid.";
|
||||
for (size_t i = 0; i < outputs_.size(); ++i) {
|
||||
if (outputs[i] == nullptr || outputs_[i] == nullptr) {
|
||||
MS_LOG(ERROR) << "Output " << i << " is nullptr.";
|
||||
return lite::RET_ERROR;
|
||||
}
|
||||
outputs_[j]->SetData(outputs[j]);
|
||||
outputs_[i]->SetData(outputs[i]);
|
||||
}
|
||||
return lite::RET_OK;
|
||||
}
|
||||
|
@ -206,6 +217,10 @@ bool CustomAscendKernelMod::Launch(const std::vector<AddressPtr> &inputs, const
|
|||
MS_LOG(ERROR) << "Custom kernel execute failed.";
|
||||
return false;
|
||||
}
|
||||
for (size_t i = 0; i < outputs.size(); ++i) {
|
||||
outputs[i]->addr = outputs_[i]->GetData()->addr;
|
||||
outputs[i]->size = outputs_[i]->GetData()->size;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -48,6 +48,8 @@ class CustomAscendKernelMod : public kernel::KernelMod {
|
|||
bool Launch(const std::vector<AddressPtr> &inputs, const std::vector<AddressPtr> &workspace,
|
||||
const std::vector<AddressPtr> &outputs, void *stream_ptr) override;
|
||||
|
||||
std::vector<KernelTensorPtr> RetrieveOutputShape() { return outputs_; }
|
||||
|
||||
private:
|
||||
void RecordInputDataIndex(const std::vector<KernelTensorPtr> &inputs);
|
||||
void SetDeviceId();
|
||||
|
|
|
@ -45,8 +45,10 @@ Status SingleOpInferSession::AscendInit(const std::shared_ptr<Context> &context)
|
|||
auto ascend_device_info = device_info->Cast<mindspore::AscendDeviceInfo>();
|
||||
MS_EXCEPTION_IF_NULL(ascend_device_info);
|
||||
device_id_ = ascend_device_info->GetDeviceID();
|
||||
return kSuccess;
|
||||
}
|
||||
}
|
||||
MS_LOG(DEBUG) << "There is no ascend device info, no need to register ascend plugin.";
|
||||
return kSuccess;
|
||||
}
|
||||
|
||||
|
@ -167,6 +169,7 @@ Status SingleOpInferSession::RunGraph(const std::vector<tensor::TensorPtr> &inpu
|
|||
bool ret = true;
|
||||
try {
|
||||
ret = kernel_mod->Launch(kernel_inputs, kernel_workspaces, kernel_outputs, 0);
|
||||
RuntimeUtils::UpdateKernelNodeOutputInfo(kernel_node, kernel_outputs);
|
||||
} catch (std::exception &e) {
|
||||
MS_LOG(EXCEPTION) << e.what();
|
||||
}
|
||||
|
|
|
@ -28,7 +28,11 @@
|
|||
#include "backend/common/session/anf_runtime_algorithm.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace {
|
||||
constexpr auto kNameCustomAscend = "CustomAscend";
|
||||
const size_t tensor_max_size_utils = 0x1000000;
|
||||
} // namespace
|
||||
|
||||
void *RuntimeUtils::GetAddressPtr(device::DeviceAddressPtr address_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(address_ptr);
|
||||
return address_ptr->ptr_;
|
||||
|
@ -73,6 +77,10 @@ void RuntimeUtils::CopyInputTensorsToKernelGraph(const std::vector<tensor::Tenso
|
|||
auto input = inputs[i];
|
||||
auto graph_input = graph_inputs[i];
|
||||
auto graph_input_addr = AnfAlgo::GetMutableOutputAddr(graph_input, 0);
|
||||
if (graph_input_addr->ptr_ == nullptr) {
|
||||
MS_LOG(EXCEPTION) << "Output_idx" << i << " of input " << graph_input->DebugString()
|
||||
<< " output addr ptr is nullptr.";
|
||||
}
|
||||
memcpy(graph_input_addr->ptr_, input->data_c(), graph_input_addr->size_);
|
||||
}
|
||||
}
|
||||
|
@ -219,4 +227,59 @@ device::DeviceAddressPtr RuntimeUtils::CreateDeviceAddress(void *device_ptr, siz
|
|||
TypeId type_id) {
|
||||
return std::make_shared<InferDeviceAddress>(device_ptr, device_size, format, type_id);
|
||||
}
|
||||
|
||||
void RuntimeUtils::UpdateKernelNodeOutputInfo(const AnfNodePtr &kernel_node,
|
||||
const std::vector<kernel::AddressPtr> &output_addrs) {
|
||||
std::string kernel_name = common::AnfAlgo::GetCNodeName(kernel_node);
|
||||
if (kernel_name == kNameCustomAscend) {
|
||||
size_t output_num = common::AnfAlgo::GetOutputTensorNum(kernel_node);
|
||||
if (output_addrs.size() != output_num) {
|
||||
MS_LOG(ERROR) << "Output addr size[" << output_addrs.size() << "] is not equal to node outputs size["
|
||||
<< output_num << "]";
|
||||
return;
|
||||
}
|
||||
// update output addr
|
||||
bool is_update_shape = false;
|
||||
for (size_t i = 0; i < output_num; ++i) {
|
||||
auto device_address = AnfAlgo::GetMutableOutputAddr(kernel_node, i);
|
||||
auto addr_ptr = device_address->GetMutablePtr();
|
||||
if (addr_ptr != nullptr && output_addrs[i]->addr != addr_ptr) {
|
||||
free(addr_ptr);
|
||||
device_address->set_ptr(output_addrs[i]->addr);
|
||||
device_address->SetSize(output_addrs[i]->size);
|
||||
is_update_shape = true;
|
||||
}
|
||||
}
|
||||
if (!is_update_shape) {
|
||||
MS_LOG(DEBUG) << "There is no need to update output shape.";
|
||||
return;
|
||||
}
|
||||
// update output shape
|
||||
auto kernel_mod = AnfAlgo::GetKernelMod(kernel_node);
|
||||
MS_EXCEPTION_IF_NULL(kernel_mod);
|
||||
auto kernel_tensors = kernel_mod->RetrieveOutputShape();
|
||||
if (kernel_tensors.empty()) {
|
||||
MS_LOG(ERROR) << "The output shape size of custom ascend is empty.";
|
||||
return;
|
||||
}
|
||||
auto abstract = kernel_node->abstract();
|
||||
MS_EXCEPTION_IF_NULL(abstract);
|
||||
if (utils::isa<abstract::AbstractTuplePtr>(abstract)) {
|
||||
auto abstract_tuple = abstract->cast<abstract::AbstractTuplePtr>();
|
||||
MS_EXCEPTION_IF_NULL(abstract_tuple);
|
||||
if (abstract_tuple->elements().size() != kernel_tensors.size()) {
|
||||
MS_LOG(ERROR) << "Abstract size[" << abstract_tuple->elements().size() << "] is not equal to output shape size["
|
||||
<< kernel_tensors.size() << "]";
|
||||
return;
|
||||
}
|
||||
for (size_t i = 0; i < abstract_tuple->elements().size(); ++i) {
|
||||
auto tmp_abstract = abstract_tuple->elements()[i];
|
||||
MS_EXCEPTION_IF_NULL(tmp_abstract);
|
||||
tmp_abstract->set_shape(std::make_shared<abstract::Shape>(kernel_tensors[i]->GetShapeVector()));
|
||||
}
|
||||
} else {
|
||||
abstract->set_shape(std::make_shared<abstract::Shape>(kernel_tensors[0]->GetShapeVector()));
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -46,6 +46,8 @@ class RuntimeUtils {
|
|||
static void AssignKernelOutputAddress(KernelGraphPtr kernel_graph);
|
||||
static device::DeviceAddressPtr CreateDeviceAddress(void *device_ptr, size_t device_size, const string &format,
|
||||
TypeId type_id);
|
||||
static void UpdateKernelNodeOutputInfo(const AnfNodePtr &kernel_node,
|
||||
const std::vector<kernel::AddressPtr> &output_addrs);
|
||||
};
|
||||
} // namespace mindspore
|
||||
|
||||
|
|
|
@ -3,27 +3,27 @@
|
|||
|
||||
# Run in Ascend
|
||||
#onnx
|
||||
#hdc_resnet_1w_class.onnx 3
|
||||
#googlenet-9.onnx;1:data_0;1,3,224,224;;offline_resize 5
|
||||
#cbg_ai_gender_resnet34_mutiscal_v1_1012.onnx;1:input.1;1,1,300,64;;offline_resize 5
|
||||
hdc_resnet_1w_class.onnx 3
|
||||
googlenet-9.onnx;1:data_0;1,3,224,224;;offline_resize 5
|
||||
cbg_ai_gender_resnet34_mutiscal_v1_1012.onnx;1:input.1;1,1,300,64;;offline_resize 5
|
||||
hdc_efficientnet_b3_1w_class.onnx;1:input.1;1,3,224,224;;offline_resize 5
|
||||
|
||||
#mindir
|
||||
mindspore_uniir_mobilenetv2.mindir 0.5
|
||||
|
||||
#caffe
|
||||
#hdc_resnet 4
|
||||
#machine_vision_mobile_net101_resnet 5
|
||||
#ml_video_edit_img_segment 5
|
||||
#mtk_face_recognition_v1 5
|
||||
hdc_resnet 4
|
||||
machine_vision_mobile_net101_resnet 5
|
||||
ml_video_edit_img_segment 5
|
||||
mtk_face_recognition_v1 5
|
||||
|
||||
#tf
|
||||
#cbg_ai_ocr_direction.pb;1:input;1,32,32,1;;offline_resize 2
|
||||
#cbg_ai_ocr_detect_straight.pb;1:input;1,32,32,3;;offline_resize 5
|
||||
#cbg_ai_ocr_inception_curve.pb;1:input;1,960,960,3;;offline_resize 5
|
||||
#cbg_ai_ocr_language_classify.pb;1:input_0;2,32,512,1;;offline_resize 5
|
||||
#cbg_ai_ocr_recognize_latin.pb;1:input_0;1,64,64,1;;offline_resize 5
|
||||
#open_source_inception_v3.pb;1:input;2,299,299,3;;offline_resize 5
|
||||
#open_source_mobilenet_v2.pb;1:Placeholder;1,224,224,3;;offline_resize 5
|
||||
#open_source_squeeze_net.pb;1:Placeholder;2,224,224,3;;offline_resize 5
|
||||
#open_source_densenet.pb;1:Placeholder;2,224,224,3;;offline_resize 5
|
||||
cbg_ai_ocr_direction.pb;1:input;1,32,32,1;;offline_resize 2
|
||||
cbg_ai_ocr_detect_straight.pb;1:input;1,32,32,3;;offline_resize 5
|
||||
cbg_ai_ocr_inception_curve.pb;1:input;1,960,960,3;;offline_resize 5
|
||||
cbg_ai_ocr_language_classify.pb;1:input_0;2,32,512,1;;offline_resize 5
|
||||
cbg_ai_ocr_recognize_latin.pb;1:input_0;1,64,64,1;;offline_resize 5
|
||||
open_source_inception_v3.pb;1:input;2,299,299,3;;offline_resize 5
|
||||
open_source_mobilenet_v2.pb;1:Placeholder;1,224,224,3;;offline_resize 5
|
||||
open_source_squeeze_net.pb;1:Placeholder;2,224,224,3;;offline_resize 5
|
||||
open_source_densenet.pb;1:Placeholder;2,224,224,3;;offline_resize 5
|
||||
|
|
|
@ -131,7 +131,12 @@ function Convert() {
|
|||
fi
|
||||
if [ $? = 0 ]; then
|
||||
converter_result='converter '${model_type}''${quant_type}' '${model_name}' pass';echo ${converter_result} >> $5
|
||||
model_size=`ls ${output_file}.ms -l|awk -F ' ' '{print $5}'`
|
||||
local model_size
|
||||
if [[ ${export_mindir} =~ "MINDIR" ]]; then
|
||||
model_size=`ls ${output_file}.mindir -l|awk -F ' ' '{print $5}'`
|
||||
else
|
||||
model_size=`ls ${output_file}.ms -l|awk -F ' ' '{print $5}'`
|
||||
fi
|
||||
let calib_final_size=${calib_size}+50
|
||||
if [[ -n ${calib_size} ]];then
|
||||
if [ ${model_size} -gt ${calib_final_size} ]; then
|
||||
|
|
|
@ -276,7 +276,7 @@ class MS_API BenchmarkBase {
|
|||
}
|
||||
|
||||
CheckTensor *calibTensor = iter->second;
|
||||
if (calibTensor->shape != castedMSShape) {
|
||||
if (!CheckShapeValid(calibTensor->shape, castedMSShape)) {
|
||||
std::ostringstream oss;
|
||||
oss << "Shape of mslite output(";
|
||||
for (auto dim : castedMSShape) {
|
||||
|
@ -380,7 +380,7 @@ class MS_API BenchmarkBase {
|
|||
}
|
||||
|
||||
CheckTensor *calibTensor = iter->second;
|
||||
if (calibTensor->shape != castedMSShape) {
|
||||
if (!CheckShapeValid(calibTensor->shape, castedMSShape)) {
|
||||
std::ostringstream oss;
|
||||
oss << "Shape of mslite output(";
|
||||
for (auto dim : castedMSShape) {
|
||||
|
@ -433,6 +433,34 @@ class MS_API BenchmarkBase {
|
|||
[&]() { return static_cast<T>(distribution(random_engine_)); });
|
||||
}
|
||||
|
||||
bool CheckShapeValid(const std::vector<size_t> &calib_output_shape, const std::vector<size_t> &real_output_shape) {
|
||||
if (calib_output_shape == real_output_shape) {
|
||||
return true;
|
||||
}
|
||||
// (1, 225) compare with (1, 225, 1, 1) return true
|
||||
size_t min_size =
|
||||
calib_output_shape.size() > real_output_shape.size() ? real_output_shape.size() : calib_output_shape.size();
|
||||
size_t i = 0;
|
||||
for (i = 0; i < min_size; ++i) {
|
||||
if (calib_output_shape[i] != real_output_shape[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
while (i < calib_output_shape.size()) {
|
||||
if (calib_output_shape[i] != 1) {
|
||||
return false;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
while (i < real_output_shape.size()) {
|
||||
if (real_output_shape[i] != 1) {
|
||||
return false;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
int CheckThreadNumValid();
|
||||
|
||||
int CheckModelValid();
|
||||
|
|
Loading…
Reference in New Issue