forked from mindspore-Ecosystem/mindspore
codex && code review
This commit is contained in:
parent
cdaaf38206
commit
091d88849d
|
@ -28,6 +28,7 @@ PluginIntf *ProfilingEngineImpl::CreatePlugin() {
|
||||||
int ProfilingEngineImpl::ReleasePlugin(PluginIntf *plugin) {
|
int ProfilingEngineImpl::ReleasePlugin(PluginIntf *plugin) {
|
||||||
if (plugin != nullptr) {
|
if (plugin != nullptr) {
|
||||||
delete plugin;
|
delete plugin;
|
||||||
|
plugin = nullptr;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
@ -15,11 +15,8 @@
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#include "device/ascend/profiling/profiling_manager.h"
|
#include "device/ascend/profiling/profiling_manager.h"
|
||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <nlohmann/json.hpp>
|
|
||||||
#include "securec/include/securec.h"
|
#include "securec/include/securec.h"
|
||||||
#include "./prof_mgr_core.h"
|
#include "./prof_mgr_core.h"
|
||||||
#include "device/ascend/profiling/plugin_impl.h"
|
#include "device/ascend/profiling/plugin_impl.h"
|
||||||
|
@ -30,9 +27,6 @@
|
||||||
#include "utils/convert_utils.h"
|
#include "utils/convert_utils.h"
|
||||||
#include "runtime/base.h"
|
#include "runtime/base.h"
|
||||||
|
|
||||||
using std::vector;
|
|
||||||
using Json = nlohmann::json;
|
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace device {
|
namespace device {
|
||||||
namespace ascend {
|
namespace ascend {
|
||||||
|
@ -124,35 +118,43 @@ bool ProfilingManager::StartupProfiling(uint32_t device_id) {
|
||||||
auto context = MsContext::GetInstance();
|
auto context = MsContext::GetInstance();
|
||||||
MS_EXCEPTION_IF_NULL(context);
|
MS_EXCEPTION_IF_NULL(context);
|
||||||
const string prof_options_str = context->profiling_options();
|
const string prof_options_str = context->profiling_options();
|
||||||
vector<string> opts = Split(prof_options_str, ':');
|
std::vector<string> opts = Split(prof_options_str, ':');
|
||||||
if (opts.empty()) {
|
if (opts.empty()) {
|
||||||
MS_LOG(WARNING) << "Profiling is enabled, but profiling option is not set!";
|
MS_LOG(WARNING) << "Profiling is enabled, but profiling option is not set!";
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
// current one docker only use one device`
|
// current one docker only use one device`
|
||||||
Json p_device;
|
nlohmann::json p_device;
|
||||||
// JOBID
|
// JOBID
|
||||||
auto job_id = GetJobId();
|
auto job_id = GetJobId();
|
||||||
p_device["jobID"] = std::to_string(job_id);
|
p_device["jobID"] = std::to_string(job_id);
|
||||||
// device_id
|
// device_id
|
||||||
p_device["deviceID"] = std::to_string(device_id);
|
p_device["deviceID"] = std::to_string(device_id);
|
||||||
// features:'training_trace', 'task_trace' etc
|
// features:'training_trace', 'task_trace' etc
|
||||||
Json features;
|
nlohmann::json features;
|
||||||
for (vector<string>::size_type i = 0; i < opts.size(); i++) {
|
for (std::vector<string>::size_type i = 0; i < opts.size(); i++) {
|
||||||
Json f;
|
nlohmann::json f;
|
||||||
f["name"] = opts[i];
|
f["name"] = opts[i];
|
||||||
features[i] = f;
|
features[i] = f;
|
||||||
}
|
}
|
||||||
p_device["features"] = features;
|
p_device["features"] = features;
|
||||||
// only one device, but sProfMgrStartUp API require for device list
|
// only one device, but sProfMgrStartUp API require for device list
|
||||||
Json devices;
|
nlohmann::json devices;
|
||||||
devices[0] = p_device;
|
devices[0] = p_device;
|
||||||
Json startCfg;
|
nlohmann::json startCfg;
|
||||||
startCfg["startCfg"] = devices;
|
startCfg["startCfg"] = devices;
|
||||||
|
|
||||||
|
if (!ProfStartUp(NOT_NULL(&startCfg))) {
|
||||||
|
MS_LOG(ERROR) << "ProfMgrStartUp failed.";
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool ProfilingManager::ProfStartUp(NotNull<nlohmann::json *> startCfg) {
|
||||||
// convert json to string
|
// convert json to string
|
||||||
std::stringstream ss;
|
std::stringstream ss;
|
||||||
ss << startCfg;
|
ss << *startCfg;
|
||||||
std::string cfg = ss.str();
|
std::string cfg = ss.str();
|
||||||
MS_LOG(INFO) << "profiling config " << cfg;
|
MS_LOG(INFO) << "profiling config " << cfg;
|
||||||
auto ret = rtProfilerStart();
|
auto ret = rtProfilerStart();
|
||||||
|
|
|
@ -20,18 +20,15 @@
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
#include <nlohmann/json.hpp>
|
||||||
|
#include "utils/contract.h"
|
||||||
#include "utils/context/ms_context.h"
|
#include "utils/context/ms_context.h"
|
||||||
|
|
||||||
using std::map;
|
using std::map;
|
||||||
using std::string;
|
using std::string;
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
namespace device {
|
namespace device {
|
||||||
namespace ascend {
|
namespace ascend {
|
||||||
// PROFILING_CUSTOM_LOGID_START 3
|
|
||||||
const uint64_t kProfilingFpStartLogId = 1;
|
|
||||||
const uint64_t kProfilingBpEndLogId = 2;
|
|
||||||
const uint64_t kProfilingIterEndLogId = 255;
|
|
||||||
|
|
||||||
class ProfilingEngineImpl;
|
class ProfilingEngineImpl;
|
||||||
class ProfilingManager {
|
class ProfilingManager {
|
||||||
public:
|
public:
|
||||||
|
@ -52,6 +49,7 @@ class ProfilingManager {
|
||||||
~ProfilingManager() { prof_handle_ = nullptr; }
|
~ProfilingManager() { prof_handle_ = nullptr; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
bool ProfStartUp(NotNull<nlohmann::json *> json);
|
||||||
std::shared_ptr<ProfilingEngineImpl> engine_0_;
|
std::shared_ptr<ProfilingEngineImpl> engine_0_;
|
||||||
uint32_t device_id_;
|
uint32_t device_id_;
|
||||||
void *prof_handle_;
|
void *prof_handle_;
|
||||||
|
|
|
@ -33,6 +33,10 @@ constexpr char kCustomNode[] = "PROFILING_CUSTOM_";
|
||||||
constexpr char kFpStartNode[] = "PROFILING_FP_START";
|
constexpr char kFpStartNode[] = "PROFILING_FP_START";
|
||||||
constexpr char kBpEndNode[] = "PROFILING_BP_END";
|
constexpr char kBpEndNode[] = "PROFILING_BP_END";
|
||||||
constexpr char kIterEndNode[] = "PROFILING_ITER_END";
|
constexpr char kIterEndNode[] = "PROFILING_ITER_END";
|
||||||
|
// PROFILING_CUSTOM_LOGID_START 3
|
||||||
|
constexpr uint64_t kProfilingFpStartLogId = 1;
|
||||||
|
constexpr uint64_t kProfilingBpEndLogId = 2;
|
||||||
|
constexpr uint64_t kProfilingIterEndLogId = 255;
|
||||||
std::map<uint32_t, std::vector<CNodePtr>> ProfilingUtils::graph_profiling_cnode_;
|
std::map<uint32_t, std::vector<CNodePtr>> ProfilingUtils::graph_profiling_cnode_;
|
||||||
std::map<uint32_t, std::vector<std::string>> ProfilingUtils::graph_kernel_name_;
|
std::map<uint32_t, std::vector<std::string>> ProfilingUtils::graph_kernel_name_;
|
||||||
std::map<uint32_t, std::vector<std::shared_ptr<ProfDesc>>> ProfilingUtils::graph_point_;
|
std::map<uint32_t, std::vector<std::shared_ptr<ProfDesc>>> ProfilingUtils::graph_point_;
|
||||||
|
|
|
@ -58,9 +58,9 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr<HcclTaskInfo> &task_info
|
||||||
if (task_info->hccl_type() == kBroadcastOpName) {
|
if (task_info->hccl_type() == kBroadcastOpName) {
|
||||||
// call hcom broadcast interface to run op
|
// call hcom broadcast interface to run op
|
||||||
const string tag_broadcast = kHcomBroadcast + std::to_string(task_counter++) + kUnderline + std::to_string(0);
|
const string tag_broadcast = kHcomBroadcast + std::to_string(task_counter++) + kUnderline + std::to_string(0);
|
||||||
ret = hcom_broadcast(tag_broadcast.c_str(), reinterpret_cast<void *>(task_info->input_data_addr()),
|
ret = hcom_broadcast(tag_broadcast.c_str(), task_info->input_data_addr(), static_cast<u64>(task_info->count()),
|
||||||
static_cast<u64>(task_info->count()), static_cast<hcclDataType_t>(task_info->data_type()),
|
static_cast<hcclDataType_t>(task_info->data_type()), static_cast<u32>(task_info->root_id()),
|
||||||
static_cast<u32>(task_info->root_id()), hccl_group.c_str(), stream);
|
hccl_group.c_str(), stream);
|
||||||
if (ret != HCCL_SUCCESS) {
|
if (ret != HCCL_SUCCESS) {
|
||||||
MS_LOG(ERROR) << "hcom_broadcast fail, return ret: " << static_cast<int>(ret);
|
MS_LOG(ERROR) << "hcom_broadcast fail, return ret: " << static_cast<int>(ret);
|
||||||
return false;
|
return false;
|
||||||
|
@ -68,9 +68,9 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr<HcclTaskInfo> &task_info
|
||||||
} else if (task_info->hccl_type() == kAllGatherOpName) {
|
} else if (task_info->hccl_type() == kAllGatherOpName) {
|
||||||
// call hcom allgather interface to run op
|
// call hcom allgather interface to run op
|
||||||
const string tag_all_gather = kHcomAllGather + std::to_string(task_counter++) + kUnderline + std::to_string(0);
|
const string tag_all_gather = kHcomAllGather + std::to_string(task_counter++) + kUnderline + std::to_string(0);
|
||||||
ret = hcom_all_gather(tag_all_gather.c_str(), reinterpret_cast<void *>(task_info->input_data_addr()),
|
ret = hcom_all_gather(tag_all_gather.c_str(), task_info->input_data_addr(), task_info->output_data_addr(),
|
||||||
reinterpret_cast<void *>(task_info->output_data_addr()), static_cast<u64>(task_info->count()),
|
static_cast<u64>(task_info->count()), static_cast<hcclDataType_t>(task_info->data_type()),
|
||||||
static_cast<hcclDataType_t>(task_info->data_type()), hccl_group.c_str(), stream);
|
hccl_group.c_str(), stream);
|
||||||
if (ret != HCCL_SUCCESS) {
|
if (ret != HCCL_SUCCESS) {
|
||||||
MS_LOG(ERROR) << "hcom_all_gather fail, return ret: " << ret;
|
MS_LOG(ERROR) << "hcom_all_gather fail, return ret: " << ret;
|
||||||
return false;
|
return false;
|
||||||
|
@ -78,9 +78,8 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr<HcclTaskInfo> &task_info
|
||||||
} else if (task_info->hccl_type() == kAllReduceOpName) {
|
} else if (task_info->hccl_type() == kAllReduceOpName) {
|
||||||
// call hcom allreduce interface to run op
|
// call hcom allreduce interface to run op
|
||||||
const string tag_all_reduce = kHcomAllReduce + std::to_string(task_counter++) + kUnderline + std::to_string(0);
|
const string tag_all_reduce = kHcomAllReduce + std::to_string(task_counter++) + kUnderline + std::to_string(0);
|
||||||
ret = hcom_all_reduce(tag_all_reduce.c_str(), reinterpret_cast<void *>(task_info->input_data_addr()),
|
ret = hcom_all_reduce(tag_all_reduce.c_str(), task_info->input_data_addr(), task_info->output_data_addr(),
|
||||||
reinterpret_cast<void *>(task_info->output_data_addr()), static_cast<u64>(task_info->count()),
|
static_cast<u64>(task_info->count()), static_cast<hcclDataType_t>(task_info->data_type()),
|
||||||
static_cast<hcclDataType_t>(task_info->data_type()),
|
|
||||||
static_cast<hcclRedOp_t>(task_info->op_type()), hccl_group.c_str(), stream);
|
static_cast<hcclRedOp_t>(task_info->op_type()), hccl_group.c_str(), stream);
|
||||||
if (ret != HCCL_SUCCESS) {
|
if (ret != HCCL_SUCCESS) {
|
||||||
MS_LOG(ERROR) << "hcom_all_reduce fail, return ret: " << ret;
|
MS_LOG(ERROR) << "hcom_all_reduce fail, return ret: " << ret;
|
||||||
|
@ -90,8 +89,7 @@ bool RuntimeUtils::HcomDistribute(const std::shared_ptr<HcclTaskInfo> &task_info
|
||||||
// call hcom reducescatter interface to run op
|
// call hcom reducescatter interface to run op
|
||||||
const string tag_reduce_scatter =
|
const string tag_reduce_scatter =
|
||||||
kHcomReduceScatter + std::to_string(task_counter++) + kUnderline + std::to_string(0);
|
kHcomReduceScatter + std::to_string(task_counter++) + kUnderline + std::to_string(0);
|
||||||
ret = hcom_reduce_scatter(tag_reduce_scatter.c_str(), reinterpret_cast<void *>(task_info->input_data_addr()),
|
ret = hcom_reduce_scatter(tag_reduce_scatter.c_str(), task_info->input_data_addr(), task_info->output_data_addr(),
|
||||||
reinterpret_cast<void *>(task_info->output_data_addr()),
|
|
||||||
static_cast<u64>(task_info->count()), static_cast<hcclDataType_t>(task_info->data_type()),
|
static_cast<u64>(task_info->count()), static_cast<hcclDataType_t>(task_info->data_type()),
|
||||||
static_cast<hcclRedOp_t>(task_info->op_type()), hccl_group.c_str(), stream);
|
static_cast<hcclRedOp_t>(task_info->op_type()), hccl_group.c_str(), stream);
|
||||||
if (ret != HCCL_SUCCESS) {
|
if (ret != HCCL_SUCCESS) {
|
||||||
|
|
|
@ -40,39 +40,46 @@ bool TaskGenerator::GenTasks(const std::vector<CNodePtr> &anf_node_list, std::ve
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void TaskGenerator::LaunchAddrCleanAkgKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs) {
|
||||||
|
MS_EXCEPTION_IF_NULL(anf_node_ptr);
|
||||||
|
MS_EXCEPTION_IF_NULL(kernel_inputs);
|
||||||
|
// akg process
|
||||||
|
// set atomic clean addr
|
||||||
|
if (AnfAlgo::HasNodeAttr(kAttrAtomicOutputIndexs, anf_node_ptr)) {
|
||||||
|
auto clean_output_indexs = AnfAlgo::GetNodeAttr<std::vector<size_t>>(anf_node_ptr, kAttrAtomicOutputIndexs);
|
||||||
|
auto graph = anf_node_ptr->func_graph();
|
||||||
|
MS_EXCEPTION_IF_NULL(graph);
|
||||||
|
auto manager = graph->manager();
|
||||||
|
MS_EXCEPTION_IF_NULL(manager);
|
||||||
|
auto node_users = manager->node_users();
|
||||||
|
if (node_users[anf_node_ptr].empty()) {
|
||||||
|
MS_LOG(EXCEPTION) << "Node users of " << anf_node_ptr->ToString() << " is empty.";
|
||||||
|
}
|
||||||
|
auto depend_node = node_users[anf_node_ptr].pop().first;
|
||||||
|
if (!IsPrimitiveCNode(depend_node, prim::kPrimDepend)) {
|
||||||
|
MS_LOG(EXCEPTION) << "Checking Depend node failed";
|
||||||
|
}
|
||||||
|
if (node_users[depend_node].empty()) {
|
||||||
|
MS_LOG(EXCEPTION) << "Node users of " << depend_node->ToString() << " is empty.";
|
||||||
|
}
|
||||||
|
auto post_node = node_users[depend_node].pop().first;
|
||||||
|
for (auto index : clean_output_indexs) {
|
||||||
|
auto device_address = AnfAlgo::GetOutputAddr(post_node, index);
|
||||||
|
kernel::AddressPtr input = std::make_shared<kernel::Address>();
|
||||||
|
MS_EXCEPTION_IF_NULL(input);
|
||||||
|
input->addr = device_address->ptr_;
|
||||||
|
input->size = device_address->size_;
|
||||||
|
kernel_inputs->push_back(input);
|
||||||
|
}
|
||||||
|
MS_LOG(DEBUG) << "AtomicAddClean clean output size: " << clean_output_indexs.size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void TaskGenerator::LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs) {
|
void TaskGenerator::LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs) {
|
||||||
MS_EXCEPTION_IF_NULL(anf_node_ptr);
|
MS_EXCEPTION_IF_NULL(anf_node_ptr);
|
||||||
|
MS_EXCEPTION_IF_NULL(kernel_inputs);
|
||||||
if (anf_node_ptr->inputs().size() != 2) {
|
if (anf_node_ptr->inputs().size() != 2) {
|
||||||
// akg process
|
LaunchAddrCleanAkgKernel(anf_node_ptr, kernel_inputs);
|
||||||
// set atomic clean addr
|
|
||||||
if (AnfAlgo::HasNodeAttr(kAttrAtomicOutputIndexs, anf_node_ptr)) {
|
|
||||||
auto clean_output_indexs = AnfAlgo::GetNodeAttr<std::vector<size_t>>(anf_node_ptr, kAttrAtomicOutputIndexs);
|
|
||||||
auto graph = anf_node_ptr->func_graph();
|
|
||||||
MS_EXCEPTION_IF_NULL(graph);
|
|
||||||
auto manager = graph->manager();
|
|
||||||
MS_EXCEPTION_IF_NULL(manager);
|
|
||||||
auto node_users = manager->node_users();
|
|
||||||
if (node_users[anf_node_ptr].empty()) {
|
|
||||||
MS_LOG(EXCEPTION) << "Node users of " << anf_node_ptr->ToString() << " is empty.";
|
|
||||||
}
|
|
||||||
auto depend_node = node_users[anf_node_ptr].pop().first;
|
|
||||||
if (!IsPrimitiveCNode(depend_node, prim::kPrimDepend)) {
|
|
||||||
MS_LOG(EXCEPTION) << "Checking Depend node failed";
|
|
||||||
}
|
|
||||||
if (node_users[depend_node].empty()) {
|
|
||||||
MS_LOG(EXCEPTION) << "Node users of " << depend_node->ToString() << " is empty.";
|
|
||||||
}
|
|
||||||
auto post_node = node_users[depend_node].pop().first;
|
|
||||||
for (auto index : clean_output_indexs) {
|
|
||||||
auto device_address = AnfAlgo::GetOutputAddr(post_node, index);
|
|
||||||
kernel::AddressPtr input = std::make_shared<kernel::Address>();
|
|
||||||
input->addr = device_address->ptr_;
|
|
||||||
MS_EXCEPTION_IF_NULL(input->addr);
|
|
||||||
input->size = device_address->size_;
|
|
||||||
kernel_inputs->push_back(input);
|
|
||||||
}
|
|
||||||
MS_LOG(DEBUG) << "AtomicAddClean clean output size: " << clean_output_indexs.size();
|
|
||||||
}
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
MS_EXCEPTION_IF_NULL(anf_node_ptr->inputs()[1]);
|
MS_EXCEPTION_IF_NULL(anf_node_ptr->inputs()[1]);
|
||||||
|
|
|
@ -48,6 +48,7 @@ class TaskGenerator {
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static void LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs);
|
static void LaunchAddrCleanKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs);
|
||||||
|
static void LaunchAddrCleanAkgKernel(const CNodePtr &anf_node_ptr, AddressPtrList *kernel_inputs);
|
||||||
static bool LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_id, std::vector<TaskInfoPtr> *task_info_list);
|
static bool LaunchKernel(const CNodePtr &anf_node_ptr, uint32_t stream_id, std::vector<TaskInfoPtr> *task_info_list);
|
||||||
static bool LaunchAllKernel(const std::vector<CNodePtr> &anf_node_list, std::vector<TaskInfoPtr> *task_info_list,
|
static bool LaunchAllKernel(const std::vector<CNodePtr> &anf_node_list, std::vector<TaskInfoPtr> *task_info_list,
|
||||||
uint32_t graph_id);
|
uint32_t graph_id);
|
||||||
|
|
|
@ -79,6 +79,7 @@ void CPUKernelRuntime::AssignValueNodeAddress(session::KernelGraph *kernel_graph
|
||||||
std::vector<int> data_shape = tensor->shape();
|
std::vector<int> data_shape = tensor->shape();
|
||||||
size_t tensor_size = std::accumulate(data_shape.begin(), data_shape.end(), type_size, std::multiplies<size_t>());
|
size_t tensor_size = std::accumulate(data_shape.begin(), data_shape.end(), type_size, std::multiplies<size_t>());
|
||||||
DeviceAddressPtr address = CreateDeviceAddress(nullptr, tensor_size, kOpFormat_DEFAULT, kNumberTypeFloat32);
|
DeviceAddressPtr address = CreateDeviceAddress(nullptr, tensor_size, kOpFormat_DEFAULT, kNumberTypeFloat32);
|
||||||
|
MS_EXCEPTION_IF_NULL(address);
|
||||||
if (tensor->data_type() == kNumberTypeFloat32 || tensor->data_type() == kNumberTypeInt32) {
|
if (tensor->data_type() == kNumberTypeFloat32 || tensor->data_type() == kNumberTypeInt32) {
|
||||||
address->ptr_ = tensor->data_c(false);
|
address->ptr_ = tensor->data_c(false);
|
||||||
} else {
|
} else {
|
||||||
|
|
Loading…
Reference in New Issue