fix bug in AscendHostQueue, replace data node with const in init_graph,

and set heterogenous
This commit is contained in:
xiao_yao1994 2022-11-03 15:14:32 +08:00
parent 7262ee8ac9
commit efe85bb667
7 changed files with 63 additions and 115 deletions

View File

@ -546,6 +546,7 @@ bool AscendHostQueue::SerializeDataItemInfos(std::vector<DataItemInfo> *items, v
sizeof(DataItemInfo::ItemInfo) + (*items)[i].item_info.dim_num * sizeof(int64_t) + (*items)[i].item_info.data_len;
}
total_size += sizeof(RuntimeTensorDesc);
auto errno_ret = rtMbufAlloc(buff, total_size);
if (errno_ret != ACL_RT_SUCCESS) {
MS_LOG(ERROR) << "Call rtMbufAlloc with size[" << total_size << "] failed, ret = " << errno_ret;
@ -571,7 +572,7 @@ bool AscendHostQueue::SerializeDataItemInfos(std::vector<DataItemInfo> *items, v
if ((head_buf != nullptr) && (head_size > kMbufHeadEndOfSequencePos)) {
MS_LOG(DEBUG) << "Host queue set end_of_sequence mbuf head.";
}
data = ::ge::ValueToPtr(::ge::PtrToValue(data) + sizeof(RuntimeTensorDesc));
size_t offset = 0UL;
for (size_t i = 0UL; i < count; ++i) {
errno_ret = memcpy_s(::ge::ValueToPtr(::ge::PtrToValue(data) + offset), sizeof(DataItemInfo::ItemInfo),

View File

@ -103,6 +103,20 @@ class AscendHostQueue : public DataQueue {
DataQueueStatus Front(std::vector<DataQueueItem> *data) const override { return DataQueueStatus::SUCCESS; }
DataQueueStatus Pop() override { return DataQueueStatus::SUCCESS; }
static constexpr int64_t kMaxDimSize = 32;
#pragma pack(push, 1)
struct RuntimeTensorDesc {
uint64_t data_addr;
int64_t data_offset_size;
int64_t dtype;
int64_t shape[kMaxDimSize + 1];
int64_t original_shape[kMaxDimSize + 1];
int64_t format;
int64_t sub_format;
uint8_t reserved[456];
};
#pragma pack(pop)
struct DataItemInfo {
struct ItemInfo {
int32_t version;

View File

@ -25,8 +25,6 @@
#include "plugin/device/ascend/hal/device/tensorprint_utils.h"
#include "acl/acl_tdt.h"
#include "acl/acl_base.h"
#include "runtime/dev.h"
#include "runtime/config.h"
#include "toolchain/plog.h"
#include "framework/common/helper/model_helper.h"
#include "common/util/error_manager/error_manager.h"
@ -226,11 +224,6 @@ void AscendDeprecatedInterface::DumpProfileParallelStrategy(const FuncGraphPtr &
bool AscendDeprecatedInterface::OpenTsd(const std::shared_ptr<MsContext> &ms_context_ptr) {
MS_EXCEPTION_IF_NULL(ms_context_ptr);
// set MS_CTX_ENABLE_GE_HETEROGENOUS true if ge heterogeneous mode
int32_t is_heterogeneous = 0;
(void)rtGetIsHeterogenous(&is_heterogeneous);
ms_context_ptr->set_param<bool>(MS_CTX_ENABLE_GE_HETEROGENOUS, is_heterogeneous == 1);
if (ms_context_ptr->get_param<bool>(MS_CTX_IS_PYNATIVE_GE_INIT)) {
return true;
}

View File

@ -35,6 +35,8 @@
#include "runtime/hardware/device_context_manager.h"
#include "plugin/device/ascend/hal/hccl_adapter/hccl_adapter.h"
#include "plugin/device/ascend/optimizer/ge_optimization.h"
#include "runtime/config.h"
#include "runtime/dev.h"
namespace mindspore {
namespace device {
@ -93,41 +95,6 @@ transform::TensorOrderMap GetParams(const FuncGraphPtr &anf_graph) {
return res;
}
std::tuple<std::vector<transform::GeTensorPtr>, std::vector<transform::GeTensorPtr>> GetInputTensor(
const FuncGraphPtr &anf_graph) {
MS_EXCEPTION_IF_NULL(anf_graph);
transform::TensorOrderMap init_input_map;
std::vector<tensor::TensorPtr> init_input;
std::vector<tensor::TensorPtr> compute_input;
for (auto &anf_node : anf_graph->parameters()) {
MS_EXCEPTION_IF_NULL(anf_node);
auto para = anf_node->cast<ParameterPtr>();
MS_EXCEPTION_IF_NULL(para);
if (para->has_default()) {
auto value = para->default_param();
MS_EXCEPTION_IF_NULL(value);
init_input_map.emplace(para->name(), value->cast<std::shared_ptr<tensor::Tensor>>());
} else {
auto abstract = para->abstract();
MS_EXCEPTION_IF_NULL(abstract);
auto undetermined_abstract = abstract->cast<std::shared_ptr<abstract::AbstractUndetermined>>();
MS_EXCEPTION_IF_NULL(undetermined_abstract);
MS_EXCEPTION_IF_NULL(undetermined_abstract->element());
auto base_shape = para->Shape();
MS_EXCEPTION_IF_NULL(base_shape);
auto type = undetermined_abstract->element()->BuildType();
MS_EXCEPTION_IF_NULL(type);
auto shape = base_shape->cast<abstract::ShapePtr>();
compute_input.emplace_back(
std::make_shared<tensor::Tensor>(type->type_id(), (shape != nullptr ? shape->shape() : ShapeVector{})));
}
}
(void)std::transform(init_input_map.begin(), init_input_map.end(), std::back_inserter(init_input),
[](const std::pair<std::string, tensor::TensorPtr> &item) { return item.second; });
return {transform::ConvertInputTensors(init_input, kOpFormat_NCHW),
transform::ConvertInputTensors(compute_input, kOpFormat_NCHW)};
}
bool AddDFGraph(const FuncGraphPtr &anf_graph, const transform::TensorOrderMap &init_inputs_map, bool export_air) {
MS_EXCEPTION_IF_NULL(anf_graph);
auto converter = transform::NewConverter(anf_graph);
@ -164,6 +131,25 @@ bool AddDFGraph(const FuncGraphPtr &anf_graph, const transform::TensorOrderMap &
return true;
}
std::vector<transform::GeTensorPtr> GetInputTensors(const FuncGraphPtr &anf_graph) {
MS_EXCEPTION_IF_NULL(anf_graph);
transform::TensorOrderMap init_input_map;
std::vector<tensor::TensorPtr> init_input;
for (auto &anf_node : anf_graph->parameters()) {
MS_EXCEPTION_IF_NULL(anf_node);
auto para = anf_node->cast<ParameterPtr>();
MS_EXCEPTION_IF_NULL(para);
if (para->has_default()) {
auto value = para->default_param();
MS_EXCEPTION_IF_NULL(value);
init_input_map.emplace(para->name(), value->cast<std::shared_ptr<tensor::Tensor>>());
}
}
(void)std::transform(init_input_map.begin(), init_input_map.end(), std::back_inserter(init_input),
[](const std::pair<std::string, tensor::TensorPtr> &item) { return item.second; });
return transform::ConvertInputTensors(init_input, kOpFormat_NCHW);
}
void RunGEInitGraph(const FuncGraphPtr &anf_graph) {
MS_LOG(DEBUG) << "ExecInitGraph start.";
@ -182,7 +168,6 @@ void RunGEInitGraph(const FuncGraphPtr &anf_graph) {
}
std::vector<transform::GeTensorPtr> ge_tensors;
std::tie(ge_tensors, std::ignore) = GetInputTensor(anf_graph);
{
// Release GIL before calling into (potentially long-running) C++ code
mindspore::ScopedLongRunning long_running;
@ -196,6 +181,7 @@ void RunGEInitGraph(const FuncGraphPtr &anf_graph) {
if ((ConfigManager::GetInstance().parallel_strategy() == ParallelStrategy::DISTRIBUTION) &&
(transform::GetGraphByName(BROADCAST_GRAPH_NAME) != nullptr)) {
run_options.name = BROADCAST_GRAPH_NAME;
ge_tensors = GetInputTensors(anf_graph);
ret = transform::RunGraph(graph_runner, run_options, ge_tensors, &ge_outputs);
if (ret != transform::Status::SUCCESS) {
MS_LOG(EXCEPTION) << "Exec BROADCAST_GRAPH_NAME failed.";
@ -412,13 +398,18 @@ void GeDeviceContext::Initialize() {
MS_EXCEPTION_IF_NULL(device_res_manager_);
device_res_manager_->Initialize();
MS_EXCEPTION_IF_NULL(MsContext::GetInstance());
InitGe(MsContext::GetInstance());
auto ms_context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(ms_context);
// set MS_CTX_ENABLE_GE_HETEROGENOUS true according to heterogeneous mode
int32_t is_heterogenous = 0;
(void)rtGetIsHeterogenous(&is_heterogenous);
ms_context->set_param<bool>(MS_CTX_ENABLE_GE_HETEROGENOUS, is_heterogenous == 1);
InitGe(ms_context);
std::string rank_id = common::GetEnv("RANK_ID");
std::string rank_table_file = common::GetEnv("RANK_TABLE_FILE");
if (!rank_id.empty() && !rank_table_file.empty()) {
(void)hccl::HcclAdapter::GetInstance().InitHccl(MsContext::GetInstance()->get_param<uint32_t>(MS_CTX_DEVICE_ID),
rank_id, rank_table_file, hccl::HcclMode::kGraph);
(void)hccl::HcclAdapter::GetInstance().InitHccl(ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID), rank_id,
rank_table_file, hccl::HcclMode::kGraph);
}
initialized_ = true;

View File

@ -315,8 +315,7 @@ void DfGraphConvertor::SetupBroadcast(const std::shared_ptr<HcomBroadcast> &broa
this->broadcast_graph_ = broadcast_graph;
}
void DfGraphConvertor::InitParamWithData(const TensorOrderMap &tensors) {
int index = 0;
void DfGraphConvertor::InitParamWithConst(const TensorOrderMap &tensors) {
std::vector<Operator> init_input;
for (auto it : tensors) {
std::string name = it.first;
@ -335,50 +334,36 @@ void DfGraphConvertor::InitParamWithData(const TensorOrderMap &tensors) {
if (op_itor == op_cache_.end()) {
MS_LOG(EXCEPTION) << "Can not find op for node " << node->ToString() << ".";
}
auto adpt = FindAdapter(kNameParam, training_);
if (adpt == nullptr) {
auto adpt_const = FindAdapter(kNameConst, training_);
if (adpt_const == nullptr) {
continue;
}
auto param_op = adpt->generate(name + "_data");
MS_LOG(INFO) << "Add parameter " << name << " as input, index " << index << ".";
auto const_op = adpt_const->generate(name + "_const");
(void)adpt_const->setAttr(const_op, "value", it.second);
auto desc = TransformUtil::GetGeTensorDesc(it.second->shape_c(), it.second->data_type(), kOpFormat_NCHW);
if (desc == nullptr) {
MS_LOG(WARNING) << "Create const " << name << " output descriptor failed!";
continue;
}
(void)std::static_pointer_cast<Constant>(const_op)->update_output_desc_y(*desc);
if (!training_) {
auto adpt_const = FindAdapter(kNameConst, training_);
if (adpt_const == nullptr) {
continue;
}
auto const_op = adpt_const->generate(name + "_const");
(void)adpt_const->setAttr(const_op, "value", it.second);
auto const_op_desc = TransformUtil::GetGeTensorDesc(it.second->shape_c(), it.second->data_type(), kOpFormat_NCHW);
if (const_op_desc == nullptr) {
MS_LOG(WARNING) << "Create variable " << name << " output descriptor failed!";
continue;
}
(void)std::static_pointer_cast<Constant>(const_op)->update_output_desc_y(*const_op_desc);
const_op_to_value_[const_op] = it.second;
vars_[name] = const_op;
op_itor->second = const_op;
continue;
}
// create tensor descriptor for output descriptor
auto desc = TransformUtil::GetGeTensorDesc(it.second->shape_c(), it.second->data_type(), kOpFormat_NCHW);
if (desc == nullptr) {
MS_LOG(ERROR) << "Create variable " << name << " output descriptor failed!";
continue;
}
// we need three variable ops for each graph with same name
// build init subgraph
if (it.second->is_init() == 0) {
(void)std::static_pointer_cast<Data>(param_op)->set_attr_index(index++);
auto init_var = std::make_shared<Variable>(name);
auto assign_op = std::make_shared<Assign>("assign_" + name);
(void)init_var->update_output_desc_y(*desc);
(void)assign_op->set_input_ref(*init_var).set_input_value(*param_op);
(void)assign_op->set_input_ref(*init_var).set_input_value(*const_op);
init_input.push_back(*init_var);
init_ops_.push_back(param_op);
init_ops_.push_back(const_op);
init_ops_.push_back(assign_op);
init_ops_.push_back(init_var);
}
@ -419,7 +404,7 @@ DfGraphConvertor &DfGraphConvertor::InitParam(const TensorOrderMap &tensors) {
}
}
}
InitParamWithData(tensors);
InitParamWithConst(tensors);
init_sout_ << "}" << endl;
return *this;
}

View File

@ -138,7 +138,7 @@ class DfGraphConvertor {
DfGraphConvertor &InitParam(const TensorOrderMap &tensors);
DfGraphConvertor &GenerateCheckpointGraph();
DfGraphConvertor &GenerateBroadcastGraph(const TensorOrderMap &tensors);
void InitParamWithData(const TensorOrderMap &tensors);
void InitParamWithConst(const TensorOrderMap &tensors);
OutHandler GetNormalOpInput(const AnfNodePtr &node, const AnfNodePtr &pred);
void DrawOpInput(const AnfNodePtr &node, const AnfNodePtr &pred, size_t i);
void SetOpInput(const OpAdapterPtr &adpt, const CNodePtr &node);

View File

@ -135,41 +135,6 @@ void CreateSessionAndGraphRunner() {
}
}
std::tuple<std::vector<transform::GeTensorPtr>, std::vector<transform::GeTensorPtr>> GetInputTensor(
const FuncGraphPtr &anf_graph) {
MS_EXCEPTION_IF_NULL(anf_graph);
transform::TensorOrderMap init_input_map;
std::vector<tensor::TensorPtr> init_input;
std::vector<tensor::TensorPtr> compute_input;
for (auto &anf_node : anf_graph->parameters()) {
MS_EXCEPTION_IF_NULL(anf_node);
auto para = anf_node->cast<ParameterPtr>();
MS_EXCEPTION_IF_NULL(para);
if (para->has_default()) {
auto value = para->default_param();
MS_EXCEPTION_IF_NULL(value);
init_input_map.emplace(para->name(), value->cast<std::shared_ptr<tensor::Tensor>>());
} else {
auto abstract = para->abstract();
MS_EXCEPTION_IF_NULL(abstract);
auto undetermined_abstract = abstract->cast<std::shared_ptr<abstract::AbstractUndetermined>>();
MS_EXCEPTION_IF_NULL(undetermined_abstract);
MS_EXCEPTION_IF_NULL(undetermined_abstract->element());
auto base_shape = para->Shape();
MS_EXCEPTION_IF_NULL(base_shape);
auto type = undetermined_abstract->element()->BuildType();
MS_EXCEPTION_IF_NULL(type);
auto shape = base_shape->cast<abstract::ShapePtr>();
compute_input.emplace_back(
std::make_shared<tensor::Tensor>(type->type_id(), (shape != nullptr ? shape->shape() : ShapeVector{})));
}
}
(void)std::transform(init_input_map.begin(), init_input_map.end(), std::back_inserter(init_input),
[](const std::pair<std::string, tensor::TensorPtr> &item) { return item.second; });
return {transform::ConvertInputTensors(init_input, kOpFormat_NCHW),
transform::ConvertInputTensors(compute_input, kOpFormat_NCHW)};
}
void RunGeInitGraph(const FuncGraphPtr &anf_graph) {
MS_LOG(DEBUG) << "ExecInitGraph start.";
@ -188,7 +153,6 @@ void RunGeInitGraph(const FuncGraphPtr &anf_graph) {
}
std::vector<transform::GeTensorPtr> ge_tensors;
std::tie(ge_tensors, std::ignore) = GetInputTensor(anf_graph);
{
// Release GIL before calling into (potentially long-running) C++ code
mindspore::ScopedLongRunning long_running;