fix bug in AscendHostQueue, replace data node with const in init_graph,
and set heterogenous
This commit is contained in:
parent
7262ee8ac9
commit
efe85bb667
|
@ -546,6 +546,7 @@ bool AscendHostQueue::SerializeDataItemInfos(std::vector<DataItemInfo> *items, v
|
|||
sizeof(DataItemInfo::ItemInfo) + (*items)[i].item_info.dim_num * sizeof(int64_t) + (*items)[i].item_info.data_len;
|
||||
}
|
||||
|
||||
total_size += sizeof(RuntimeTensorDesc);
|
||||
auto errno_ret = rtMbufAlloc(buff, total_size);
|
||||
if (errno_ret != ACL_RT_SUCCESS) {
|
||||
MS_LOG(ERROR) << "Call rtMbufAlloc with size[" << total_size << "] failed, ret = " << errno_ret;
|
||||
|
@ -571,7 +572,7 @@ bool AscendHostQueue::SerializeDataItemInfos(std::vector<DataItemInfo> *items, v
|
|||
if ((head_buf != nullptr) && (head_size > kMbufHeadEndOfSequencePos)) {
|
||||
MS_LOG(DEBUG) << "Host queue set end_of_sequence mbuf head.";
|
||||
}
|
||||
|
||||
data = ::ge::ValueToPtr(::ge::PtrToValue(data) + sizeof(RuntimeTensorDesc));
|
||||
size_t offset = 0UL;
|
||||
for (size_t i = 0UL; i < count; ++i) {
|
||||
errno_ret = memcpy_s(::ge::ValueToPtr(::ge::PtrToValue(data) + offset), sizeof(DataItemInfo::ItemInfo),
|
||||
|
|
|
@ -103,6 +103,20 @@ class AscendHostQueue : public DataQueue {
|
|||
DataQueueStatus Front(std::vector<DataQueueItem> *data) const override { return DataQueueStatus::SUCCESS; }
|
||||
DataQueueStatus Pop() override { return DataQueueStatus::SUCCESS; }
|
||||
|
||||
static constexpr int64_t kMaxDimSize = 32;
|
||||
#pragma pack(push, 1)
|
||||
struct RuntimeTensorDesc {
|
||||
uint64_t data_addr;
|
||||
int64_t data_offset_size;
|
||||
int64_t dtype;
|
||||
int64_t shape[kMaxDimSize + 1];
|
||||
int64_t original_shape[kMaxDimSize + 1];
|
||||
int64_t format;
|
||||
int64_t sub_format;
|
||||
uint8_t reserved[456];
|
||||
};
|
||||
#pragma pack(pop)
|
||||
|
||||
struct DataItemInfo {
|
||||
struct ItemInfo {
|
||||
int32_t version;
|
||||
|
|
|
@ -25,8 +25,6 @@
|
|||
#include "plugin/device/ascend/hal/device/tensorprint_utils.h"
|
||||
#include "acl/acl_tdt.h"
|
||||
#include "acl/acl_base.h"
|
||||
#include "runtime/dev.h"
|
||||
#include "runtime/config.h"
|
||||
#include "toolchain/plog.h"
|
||||
#include "framework/common/helper/model_helper.h"
|
||||
#include "common/util/error_manager/error_manager.h"
|
||||
|
@ -226,11 +224,6 @@ void AscendDeprecatedInterface::DumpProfileParallelStrategy(const FuncGraphPtr &
|
|||
|
||||
bool AscendDeprecatedInterface::OpenTsd(const std::shared_ptr<MsContext> &ms_context_ptr) {
|
||||
MS_EXCEPTION_IF_NULL(ms_context_ptr);
|
||||
// set MS_CTX_ENABLE_GE_HETEROGENOUS true if ge heterogeneous mode
|
||||
int32_t is_heterogeneous = 0;
|
||||
(void)rtGetIsHeterogenous(&is_heterogeneous);
|
||||
ms_context_ptr->set_param<bool>(MS_CTX_ENABLE_GE_HETEROGENOUS, is_heterogeneous == 1);
|
||||
|
||||
if (ms_context_ptr->get_param<bool>(MS_CTX_IS_PYNATIVE_GE_INIT)) {
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -35,6 +35,8 @@
|
|||
#include "runtime/hardware/device_context_manager.h"
|
||||
#include "plugin/device/ascend/hal/hccl_adapter/hccl_adapter.h"
|
||||
#include "plugin/device/ascend/optimizer/ge_optimization.h"
|
||||
#include "runtime/config.h"
|
||||
#include "runtime/dev.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
|
@ -93,41 +95,6 @@ transform::TensorOrderMap GetParams(const FuncGraphPtr &anf_graph) {
|
|||
return res;
|
||||
}
|
||||
|
||||
std::tuple<std::vector<transform::GeTensorPtr>, std::vector<transform::GeTensorPtr>> GetInputTensor(
|
||||
const FuncGraphPtr &anf_graph) {
|
||||
MS_EXCEPTION_IF_NULL(anf_graph);
|
||||
transform::TensorOrderMap init_input_map;
|
||||
std::vector<tensor::TensorPtr> init_input;
|
||||
std::vector<tensor::TensorPtr> compute_input;
|
||||
for (auto &anf_node : anf_graph->parameters()) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
auto para = anf_node->cast<ParameterPtr>();
|
||||
MS_EXCEPTION_IF_NULL(para);
|
||||
if (para->has_default()) {
|
||||
auto value = para->default_param();
|
||||
MS_EXCEPTION_IF_NULL(value);
|
||||
init_input_map.emplace(para->name(), value->cast<std::shared_ptr<tensor::Tensor>>());
|
||||
} else {
|
||||
auto abstract = para->abstract();
|
||||
MS_EXCEPTION_IF_NULL(abstract);
|
||||
auto undetermined_abstract = abstract->cast<std::shared_ptr<abstract::AbstractUndetermined>>();
|
||||
MS_EXCEPTION_IF_NULL(undetermined_abstract);
|
||||
MS_EXCEPTION_IF_NULL(undetermined_abstract->element());
|
||||
auto base_shape = para->Shape();
|
||||
MS_EXCEPTION_IF_NULL(base_shape);
|
||||
auto type = undetermined_abstract->element()->BuildType();
|
||||
MS_EXCEPTION_IF_NULL(type);
|
||||
auto shape = base_shape->cast<abstract::ShapePtr>();
|
||||
compute_input.emplace_back(
|
||||
std::make_shared<tensor::Tensor>(type->type_id(), (shape != nullptr ? shape->shape() : ShapeVector{})));
|
||||
}
|
||||
}
|
||||
(void)std::transform(init_input_map.begin(), init_input_map.end(), std::back_inserter(init_input),
|
||||
[](const std::pair<std::string, tensor::TensorPtr> &item) { return item.second; });
|
||||
return {transform::ConvertInputTensors(init_input, kOpFormat_NCHW),
|
||||
transform::ConvertInputTensors(compute_input, kOpFormat_NCHW)};
|
||||
}
|
||||
|
||||
bool AddDFGraph(const FuncGraphPtr &anf_graph, const transform::TensorOrderMap &init_inputs_map, bool export_air) {
|
||||
MS_EXCEPTION_IF_NULL(anf_graph);
|
||||
auto converter = transform::NewConverter(anf_graph);
|
||||
|
@ -164,6 +131,25 @@ bool AddDFGraph(const FuncGraphPtr &anf_graph, const transform::TensorOrderMap &
|
|||
return true;
|
||||
}
|
||||
|
||||
std::vector<transform::GeTensorPtr> GetInputTensors(const FuncGraphPtr &anf_graph) {
|
||||
MS_EXCEPTION_IF_NULL(anf_graph);
|
||||
transform::TensorOrderMap init_input_map;
|
||||
std::vector<tensor::TensorPtr> init_input;
|
||||
for (auto &anf_node : anf_graph->parameters()) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
auto para = anf_node->cast<ParameterPtr>();
|
||||
MS_EXCEPTION_IF_NULL(para);
|
||||
if (para->has_default()) {
|
||||
auto value = para->default_param();
|
||||
MS_EXCEPTION_IF_NULL(value);
|
||||
init_input_map.emplace(para->name(), value->cast<std::shared_ptr<tensor::Tensor>>());
|
||||
}
|
||||
}
|
||||
(void)std::transform(init_input_map.begin(), init_input_map.end(), std::back_inserter(init_input),
|
||||
[](const std::pair<std::string, tensor::TensorPtr> &item) { return item.second; });
|
||||
return transform::ConvertInputTensors(init_input, kOpFormat_NCHW);
|
||||
}
|
||||
|
||||
void RunGEInitGraph(const FuncGraphPtr &anf_graph) {
|
||||
MS_LOG(DEBUG) << "ExecInitGraph start.";
|
||||
|
||||
|
@ -182,7 +168,6 @@ void RunGEInitGraph(const FuncGraphPtr &anf_graph) {
|
|||
}
|
||||
|
||||
std::vector<transform::GeTensorPtr> ge_tensors;
|
||||
std::tie(ge_tensors, std::ignore) = GetInputTensor(anf_graph);
|
||||
{
|
||||
// Release GIL before calling into (potentially long-running) C++ code
|
||||
mindspore::ScopedLongRunning long_running;
|
||||
|
@ -196,6 +181,7 @@ void RunGEInitGraph(const FuncGraphPtr &anf_graph) {
|
|||
if ((ConfigManager::GetInstance().parallel_strategy() == ParallelStrategy::DISTRIBUTION) &&
|
||||
(transform::GetGraphByName(BROADCAST_GRAPH_NAME) != nullptr)) {
|
||||
run_options.name = BROADCAST_GRAPH_NAME;
|
||||
ge_tensors = GetInputTensors(anf_graph);
|
||||
ret = transform::RunGraph(graph_runner, run_options, ge_tensors, &ge_outputs);
|
||||
if (ret != transform::Status::SUCCESS) {
|
||||
MS_LOG(EXCEPTION) << "Exec BROADCAST_GRAPH_NAME failed.";
|
||||
|
@ -412,13 +398,18 @@ void GeDeviceContext::Initialize() {
|
|||
MS_EXCEPTION_IF_NULL(device_res_manager_);
|
||||
device_res_manager_->Initialize();
|
||||
|
||||
MS_EXCEPTION_IF_NULL(MsContext::GetInstance());
|
||||
InitGe(MsContext::GetInstance());
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
MS_EXCEPTION_IF_NULL(ms_context);
|
||||
// set MS_CTX_ENABLE_GE_HETEROGENOUS true according to heterogeneous mode
|
||||
int32_t is_heterogenous = 0;
|
||||
(void)rtGetIsHeterogenous(&is_heterogenous);
|
||||
ms_context->set_param<bool>(MS_CTX_ENABLE_GE_HETEROGENOUS, is_heterogenous == 1);
|
||||
InitGe(ms_context);
|
||||
std::string rank_id = common::GetEnv("RANK_ID");
|
||||
std::string rank_table_file = common::GetEnv("RANK_TABLE_FILE");
|
||||
if (!rank_id.empty() && !rank_table_file.empty()) {
|
||||
(void)hccl::HcclAdapter::GetInstance().InitHccl(MsContext::GetInstance()->get_param<uint32_t>(MS_CTX_DEVICE_ID),
|
||||
rank_id, rank_table_file, hccl::HcclMode::kGraph);
|
||||
(void)hccl::HcclAdapter::GetInstance().InitHccl(ms_context->get_param<uint32_t>(MS_CTX_DEVICE_ID), rank_id,
|
||||
rank_table_file, hccl::HcclMode::kGraph);
|
||||
}
|
||||
|
||||
initialized_ = true;
|
||||
|
|
|
@ -315,8 +315,7 @@ void DfGraphConvertor::SetupBroadcast(const std::shared_ptr<HcomBroadcast> &broa
|
|||
this->broadcast_graph_ = broadcast_graph;
|
||||
}
|
||||
|
||||
void DfGraphConvertor::InitParamWithData(const TensorOrderMap &tensors) {
|
||||
int index = 0;
|
||||
void DfGraphConvertor::InitParamWithConst(const TensorOrderMap &tensors) {
|
||||
std::vector<Operator> init_input;
|
||||
for (auto it : tensors) {
|
||||
std::string name = it.first;
|
||||
|
@ -335,50 +334,36 @@ void DfGraphConvertor::InitParamWithData(const TensorOrderMap &tensors) {
|
|||
if (op_itor == op_cache_.end()) {
|
||||
MS_LOG(EXCEPTION) << "Can not find op for node " << node->ToString() << ".";
|
||||
}
|
||||
auto adpt = FindAdapter(kNameParam, training_);
|
||||
if (adpt == nullptr) {
|
||||
|
||||
auto adpt_const = FindAdapter(kNameConst, training_);
|
||||
if (adpt_const == nullptr) {
|
||||
continue;
|
||||
}
|
||||
auto param_op = adpt->generate(name + "_data");
|
||||
MS_LOG(INFO) << "Add parameter " << name << " as input, index " << index << ".";
|
||||
auto const_op = adpt_const->generate(name + "_const");
|
||||
(void)adpt_const->setAttr(const_op, "value", it.second);
|
||||
|
||||
auto desc = TransformUtil::GetGeTensorDesc(it.second->shape_c(), it.second->data_type(), kOpFormat_NCHW);
|
||||
if (desc == nullptr) {
|
||||
MS_LOG(WARNING) << "Create const " << name << " output descriptor failed!";
|
||||
continue;
|
||||
}
|
||||
(void)std::static_pointer_cast<Constant>(const_op)->update_output_desc_y(*desc);
|
||||
if (!training_) {
|
||||
auto adpt_const = FindAdapter(kNameConst, training_);
|
||||
if (adpt_const == nullptr) {
|
||||
continue;
|
||||
}
|
||||
auto const_op = adpt_const->generate(name + "_const");
|
||||
(void)adpt_const->setAttr(const_op, "value", it.second);
|
||||
|
||||
auto const_op_desc = TransformUtil::GetGeTensorDesc(it.second->shape_c(), it.second->data_type(), kOpFormat_NCHW);
|
||||
if (const_op_desc == nullptr) {
|
||||
MS_LOG(WARNING) << "Create variable " << name << " output descriptor failed!";
|
||||
continue;
|
||||
}
|
||||
(void)std::static_pointer_cast<Constant>(const_op)->update_output_desc_y(*const_op_desc);
|
||||
const_op_to_value_[const_op] = it.second;
|
||||
vars_[name] = const_op;
|
||||
op_itor->second = const_op;
|
||||
continue;
|
||||
}
|
||||
|
||||
// create tensor descriptor for output descriptor
|
||||
auto desc = TransformUtil::GetGeTensorDesc(it.second->shape_c(), it.second->data_type(), kOpFormat_NCHW);
|
||||
if (desc == nullptr) {
|
||||
MS_LOG(ERROR) << "Create variable " << name << " output descriptor failed!";
|
||||
continue;
|
||||
}
|
||||
|
||||
// we need three variable ops for each graph with same name
|
||||
// build init subgraph
|
||||
if (it.second->is_init() == 0) {
|
||||
(void)std::static_pointer_cast<Data>(param_op)->set_attr_index(index++);
|
||||
auto init_var = std::make_shared<Variable>(name);
|
||||
auto assign_op = std::make_shared<Assign>("assign_" + name);
|
||||
(void)init_var->update_output_desc_y(*desc);
|
||||
(void)assign_op->set_input_ref(*init_var).set_input_value(*param_op);
|
||||
(void)assign_op->set_input_ref(*init_var).set_input_value(*const_op);
|
||||
init_input.push_back(*init_var);
|
||||
init_ops_.push_back(param_op);
|
||||
init_ops_.push_back(const_op);
|
||||
init_ops_.push_back(assign_op);
|
||||
init_ops_.push_back(init_var);
|
||||
}
|
||||
|
@ -419,7 +404,7 @@ DfGraphConvertor &DfGraphConvertor::InitParam(const TensorOrderMap &tensors) {
|
|||
}
|
||||
}
|
||||
}
|
||||
InitParamWithData(tensors);
|
||||
InitParamWithConst(tensors);
|
||||
init_sout_ << "}" << endl;
|
||||
return *this;
|
||||
}
|
||||
|
|
|
@ -138,7 +138,7 @@ class DfGraphConvertor {
|
|||
DfGraphConvertor &InitParam(const TensorOrderMap &tensors);
|
||||
DfGraphConvertor &GenerateCheckpointGraph();
|
||||
DfGraphConvertor &GenerateBroadcastGraph(const TensorOrderMap &tensors);
|
||||
void InitParamWithData(const TensorOrderMap &tensors);
|
||||
void InitParamWithConst(const TensorOrderMap &tensors);
|
||||
OutHandler GetNormalOpInput(const AnfNodePtr &node, const AnfNodePtr &pred);
|
||||
void DrawOpInput(const AnfNodePtr &node, const AnfNodePtr &pred, size_t i);
|
||||
void SetOpInput(const OpAdapterPtr &adpt, const CNodePtr &node);
|
||||
|
|
|
@ -135,41 +135,6 @@ void CreateSessionAndGraphRunner() {
|
|||
}
|
||||
}
|
||||
|
||||
std::tuple<std::vector<transform::GeTensorPtr>, std::vector<transform::GeTensorPtr>> GetInputTensor(
|
||||
const FuncGraphPtr &anf_graph) {
|
||||
MS_EXCEPTION_IF_NULL(anf_graph);
|
||||
transform::TensorOrderMap init_input_map;
|
||||
std::vector<tensor::TensorPtr> init_input;
|
||||
std::vector<tensor::TensorPtr> compute_input;
|
||||
for (auto &anf_node : anf_graph->parameters()) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
auto para = anf_node->cast<ParameterPtr>();
|
||||
MS_EXCEPTION_IF_NULL(para);
|
||||
if (para->has_default()) {
|
||||
auto value = para->default_param();
|
||||
MS_EXCEPTION_IF_NULL(value);
|
||||
init_input_map.emplace(para->name(), value->cast<std::shared_ptr<tensor::Tensor>>());
|
||||
} else {
|
||||
auto abstract = para->abstract();
|
||||
MS_EXCEPTION_IF_NULL(abstract);
|
||||
auto undetermined_abstract = abstract->cast<std::shared_ptr<abstract::AbstractUndetermined>>();
|
||||
MS_EXCEPTION_IF_NULL(undetermined_abstract);
|
||||
MS_EXCEPTION_IF_NULL(undetermined_abstract->element());
|
||||
auto base_shape = para->Shape();
|
||||
MS_EXCEPTION_IF_NULL(base_shape);
|
||||
auto type = undetermined_abstract->element()->BuildType();
|
||||
MS_EXCEPTION_IF_NULL(type);
|
||||
auto shape = base_shape->cast<abstract::ShapePtr>();
|
||||
compute_input.emplace_back(
|
||||
std::make_shared<tensor::Tensor>(type->type_id(), (shape != nullptr ? shape->shape() : ShapeVector{})));
|
||||
}
|
||||
}
|
||||
(void)std::transform(init_input_map.begin(), init_input_map.end(), std::back_inserter(init_input),
|
||||
[](const std::pair<std::string, tensor::TensorPtr> &item) { return item.second; });
|
||||
return {transform::ConvertInputTensors(init_input, kOpFormat_NCHW),
|
||||
transform::ConvertInputTensors(compute_input, kOpFormat_NCHW)};
|
||||
}
|
||||
|
||||
void RunGeInitGraph(const FuncGraphPtr &anf_graph) {
|
||||
MS_LOG(DEBUG) << "ExecInitGraph start.";
|
||||
|
||||
|
@ -188,7 +153,6 @@ void RunGeInitGraph(const FuncGraphPtr &anf_graph) {
|
|||
}
|
||||
|
||||
std::vector<transform::GeTensorPtr> ge_tensors;
|
||||
std::tie(ge_tensors, std::ignore) = GetInputTensor(anf_graph);
|
||||
{
|
||||
// Release GIL before calling into (potentially long-running) C++ code
|
||||
mindspore::ScopedLongRunning long_running;
|
||||
|
|
Loading…
Reference in New Issue