forked from mindspore-Ecosystem/mindspore
profiling feature enhancement
This commit is contained in:
parent
7798c85e70
commit
89f0b3b1bb
|
@ -702,7 +702,7 @@ void AscendStreamAssign::PrintGraphExeOrders(const shared_ptr<mindspore::session
|
|||
<< AnfAlgo::GetStreamId(cur_cnode_ptr) << "], event_id["
|
||||
<< GetValue<uint32_t>(primitive->GetAttr(kAttrEventId)) << "]";
|
||||
} else {
|
||||
MS_LOG(INFO) << "node name[" << AnfAlgo::GetCNodeName(cur_cnode_ptr) << "], logic id["
|
||||
MS_LOG(INFO) << "node name[" << cur_cnode_ptr->fullname_with_scope() << "], logic id["
|
||||
<< AnfAlgo::GetStreamDistinctionLabel(cur_cnode_ptr.get()) << "], stream id["
|
||||
<< AnfAlgo::GetStreamId(cur_cnode_ptr) << "]";
|
||||
}
|
||||
|
|
|
@ -29,10 +29,6 @@ namespace ascend {
|
|||
// PROFILING_CUSTOM_LOGID_START 3
|
||||
const uint64_t kProfilingFpStartLogId = 1;
|
||||
const uint64_t kProfilingBpEndLogId = 2;
|
||||
const uint64_t kProfilingAllReduce1Start = 3;
|
||||
const uint64_t kProfilingAllReduce1End = 4;
|
||||
const uint64_t kProfilingAllReduce2Start = 5;
|
||||
const uint64_t kProfilingAllReduce2End = 6;
|
||||
const uint64_t kProfilingIterEndLogId = 255;
|
||||
|
||||
class ProfilingEngineImpl;
|
||||
|
|
|
@ -14,10 +14,8 @@
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "device/ascend/profiling/profiling_utils.h"
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "device/ascend/profiling/profiling_utils.h"
|
||||
#include "kernel/kernel.h"
|
||||
#include "device/ascend/profiling/profiling_manager.h"
|
||||
#include "session/anf_runtime_algorithm.h"
|
||||
|
@ -27,82 +25,61 @@
|
|||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
const char ProfilingUtils::kProfiling[] = "Profiling";
|
||||
const char ProfilingUtils::kNotify[] = "notify";
|
||||
const char ProfilingUtils::kProfilerTraceId[] = "profiler_trace_id";
|
||||
const char ProfilingUtils::kFlags[] = "flags";
|
||||
constexpr uint32_t kMaxProfilingNodeNum = 100;
|
||||
constexpr char kCustomNode[] = "PROFILING_CUSTOM_";
|
||||
constexpr char kFpStartNode[] = "PROFILING_FP_START";
|
||||
constexpr char kBpEndNode[] = "PROFILING_BP_END";
|
||||
constexpr char kIterEndNode[] = "PROFILING_ITER_END";
|
||||
std::unordered_map<uint32_t, std::vector<std::string>> ProfilingUtils::graph_kernel_name_;
|
||||
bool ProfilingUtils::GetProfilingTraceInfo(const std::shared_ptr<session::KernelGraph> &graph_ptr,
|
||||
ProfilingTraceInfo *profiling_trace_info) {
|
||||
MS_EXCEPTION_IF_NULL(profiling_trace_info);
|
||||
MS_EXCEPTION_IF_NULL(graph_ptr);
|
||||
bool find_begin = false;
|
||||
bool first_allreduce = true;
|
||||
for (const auto &anf_node : graph_ptr->execution_order()) {
|
||||
if (anf_node->isa<CNode>()) {
|
||||
const std::string kernel_name = AnfAlgo::GetCNodeName(anf_node);
|
||||
if ((kernel_name == "Cast" || kernel_name == "Four2Five") && !find_begin) {
|
||||
profiling_trace_info->profiling_trace_begin = anf_node->fullname_with_scope();
|
||||
find_begin = true;
|
||||
}
|
||||
if (kernel_name == "Conv2DBackpropFilter") {
|
||||
profiling_trace_info->profiling_trace_bp_end = anf_node->fullname_with_scope();
|
||||
}
|
||||
if (kernel_name == kFusedMulApplyMomentumOpName || kernel_name == kApplyMomentumOpName) {
|
||||
profiling_trace_info->profiling_trace_netoutput = anf_node->fullname_with_scope();
|
||||
}
|
||||
if (kernel_name == kAllReduceOpName) {
|
||||
if (first_allreduce) {
|
||||
profiling_trace_info->profiling_allreduce1_start = anf_node->fullname_with_scope();
|
||||
profiling_trace_info->profiling_allreduce1_end = anf_node->fullname_with_scope();
|
||||
first_allreduce = false;
|
||||
} else {
|
||||
profiling_trace_info->profiling_allreduce2_start = anf_node->fullname_with_scope();
|
||||
profiling_trace_info->profiling_allreduce2_end = anf_node->fullname_with_scope();
|
||||
}
|
||||
}
|
||||
uint32_t ProfilingUtils::custom_node_index_ = 1;
|
||||
|
||||
ProfilingTraceInfo ProfilingUtils::GetProfilingTraceFromEnv(NotNull<session::KernelGraph *> graph_ptr) {
|
||||
MS_LOG(INFO) << "get env start";
|
||||
custom_node_index_ = 1;
|
||||
auto &cnode_exec_order = graph_ptr->execution_order();
|
||||
ProfilingTraceInfo profiling_trace;
|
||||
profiling_trace.trace_begin = GetTraceBegin(cnode_exec_order);
|
||||
profiling_trace.trace_bp_end = GetTraceBpEnd();
|
||||
profiling_trace.trace_netoutput = GetTraceNetoutput(cnode_exec_order);
|
||||
|
||||
MS_LOG(INFO) << "[profiling] trace_begin:" << profiling_trace.trace_begin
|
||||
<< " trace_bp_end:" << profiling_trace.trace_bp_end
|
||||
<< " trace_netoutput:" << profiling_trace.trace_netoutput;
|
||||
|
||||
for (uint32_t i = 1; i <= kMaxProfilingNodeNum; ++i) {
|
||||
std::string env_str = std::string(kCustomNode) + std::to_string(i);
|
||||
const char *node_full_name = std::getenv(env_str.c_str());
|
||||
if (node_full_name == nullptr) {
|
||||
break;
|
||||
}
|
||||
MS_LOG(INFO) << "Get profiling node:" << node_full_name;
|
||||
profiling_trace.trace_custom_node.insert(node_full_name);
|
||||
}
|
||||
MS_LOG(INFO) << "[profiling]begin:" << profiling_trace_info->profiling_trace_begin
|
||||
<< ", net_output:" << profiling_trace_info->profiling_trace_netoutput
|
||||
<< ", end:" << profiling_trace_info->profiling_trace_bp_end
|
||||
<< ", allreduce1:" << profiling_trace_info->profiling_allreduce1_start
|
||||
<< ", allreduce2:" << profiling_trace_info->profiling_allreduce2_start;
|
||||
return profiling_trace_info->IsValid();
|
||||
MS_LOG(INFO) << "get env end";
|
||||
return profiling_trace;
|
||||
}
|
||||
|
||||
bool ProfilingUtils::GetNetOutput(AnfNodePtr anf_node, std::string *profiling_trace_net_output) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
MS_EXCEPTION_IF_NULL(profiling_trace_net_output);
|
||||
MS_LOG(INFO) << "[profiling]Anf node's full name with scope:" << anf_node->fullname_with_scope();
|
||||
if (!profiling_trace_net_output->empty()) {
|
||||
MS_LOG(INFO) << "[profiling]Has got the net_output:" << profiling_trace_net_output->c_str();
|
||||
return true;
|
||||
}
|
||||
|
||||
if (AnfAlgo::IsRealKernel(anf_node)) {
|
||||
*profiling_trace_net_output = anf_node->fullname_with_scope();
|
||||
return true;
|
||||
}
|
||||
|
||||
auto cnode = anf_node->cast<CNodePtr>();
|
||||
if (cnode == nullptr) {
|
||||
MS_LOG(ERROR) << "[profiling]Anf node should be a CNode";
|
||||
return false;
|
||||
}
|
||||
|
||||
auto inputs = cnode->inputs();
|
||||
auto input_size = inputs.size();
|
||||
if (input_size < 2) {
|
||||
MS_LOG(ERROR) << "[profiling]Anf node' input size(" << input_size << ") < 2, don't support get apply kernel node.";
|
||||
return false;
|
||||
}
|
||||
return GetNetOutput(inputs[1], profiling_trace_net_output);
|
||||
std::string ProfilingUtils::GetTraceBegin(const std::vector<CNodePtr> &cnode_exec_order) {
|
||||
const char *trace_begin = std::getenv(kFpStartNode);
|
||||
auto &first_cnode = cnode_exec_order.front();
|
||||
MS_EXCEPTION_IF_NULL(first_cnode);
|
||||
return trace_begin == nullptr ? first_cnode->fullname_with_scope() : std::string(trace_begin);
|
||||
}
|
||||
|
||||
CNodePtr ProfilingUtils::CreateProfilingCNode(const std::shared_ptr<session::KernelGraph> &graph_ptr, bool notify,
|
||||
uint64_t profiler_trace_id, uint32_t flags) {
|
||||
MS_EXCEPTION_IF_NULL(graph_ptr);
|
||||
std::string ProfilingUtils::GetTraceBpEnd() {
|
||||
const char *trace_bp_end = std::getenv(kBpEndNode);
|
||||
return trace_bp_end == nullptr ? "" : std::string(trace_bp_end);
|
||||
}
|
||||
|
||||
std::string ProfilingUtils::GetTraceNetoutput(const std::vector<CNodePtr> &cnode_exec_order) {
|
||||
const char *trace_netoutput = std::getenv(kIterEndNode);
|
||||
auto &last_cnode = cnode_exec_order.back();
|
||||
MS_EXCEPTION_IF_NULL(last_cnode);
|
||||
return trace_netoutput == nullptr ? last_cnode->fullname_with_scope() : std::string(trace_netoutput);
|
||||
}
|
||||
|
||||
NotNull<CNodePtr> ProfilingUtils::CreateProfilingCNode(const ProfilingContent &profiling_content,
|
||||
NotNull<session::KernelGraph *> graph_ptr) {
|
||||
kernel::KernelBuildInfo::KernelBuildInfoBuilder selected_kernel_builder;
|
||||
selected_kernel_builder.SetInputsFormat({kOpFormat_DEFAULT, kOpFormat_DEFAULT});
|
||||
selected_kernel_builder.SetInputsDeviceType({TypeId::kNumberTypeInt32, TypeId::kNumberTypeInt32});
|
||||
|
@ -118,75 +95,79 @@ CNodePtr ProfilingUtils::CreateProfilingCNode(const std::shared_ptr<session::Ker
|
|||
AnfAlgo::SetSelectKernelBuildInfo(selected_kernel_builder.Build(), cnode_ptr.get());
|
||||
cnode_ptr->set_abstract(type_none_abstract);
|
||||
// set attr
|
||||
ValuePtr notify_value = MakeValue(notify);
|
||||
ValuePtr trace_id_value = MakeValue(profiler_trace_id);
|
||||
ValuePtr flags_value = MakeValue(flags);
|
||||
ValuePtr notify_value = MakeValue(profiling_content.notify);
|
||||
ValuePtr trace_id_value = MakeValue(profiling_content.profiler_trace_id);
|
||||
ValuePtr flags_value = MakeValue(profiling_content.flags);
|
||||
AnfAlgo::SetNodeAttr(ProfilingUtils::kNotify, notify_value, cnode_ptr);
|
||||
AnfAlgo::SetNodeAttr(ProfilingUtils::kProfilerTraceId, trace_id_value, cnode_ptr);
|
||||
AnfAlgo::SetNodeAttr(ProfilingUtils::kFlags, flags_value, cnode_ptr);
|
||||
return cnode_ptr;
|
||||
return NOT_NULL(cnode_ptr);
|
||||
}
|
||||
|
||||
void ProfilingUtils::ProfilingTraceFpStart(const std::shared_ptr<mindspore::session::KernelGraph> &graph_ptr,
|
||||
const mindspore::AnfNodePtr &anf_node,
|
||||
const mindspore::device::ascend::ProfilingTraceInfo &profiling_trace_info,
|
||||
std::vector<mindspore::CNodePtr> *kernel_list) {
|
||||
if (profiling_trace_info.IsValid() && profiling_trace_info.profiling_trace_begin == anf_node->fullname_with_scope()) {
|
||||
if (graph_ptr == nullptr || kernel_list == nullptr || anf_node == nullptr) {
|
||||
MS_LOG(ERROR) << "[profiling]input param invalid";
|
||||
return;
|
||||
}
|
||||
void ProfilingUtils::ProfilingTraceFpStart(const mindspore::AnfNodePtr &anf_node,
|
||||
const ProfilingTraceInfo &profiling_trace_info,
|
||||
NotNull<session::KernelGraph *> graph_ptr,
|
||||
NotNull<std::vector<mindspore::CNodePtr> *> kernel_list) {
|
||||
if (profiling_trace_info.trace_begin == anf_node->fullname_with_scope()) {
|
||||
auto job_id = ProfilingManager::GetInstance().GetJobId();
|
||||
// job task info
|
||||
CNodePtr job_kernel_ptr = CreateProfilingCNode(graph_ptr, false, job_id, 0);
|
||||
AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), job_kernel_ptr.get());
|
||||
AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), job_kernel_ptr.get());
|
||||
// fp task info
|
||||
CNodePtr start_kernel_ptr = CreateProfilingCNode(graph_ptr, false, kProfilingFpStartLogId, 0);
|
||||
AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), start_kernel_ptr.get());
|
||||
AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), start_kernel_ptr.get());
|
||||
kernel_list->emplace_back(job_kernel_ptr);
|
||||
kernel_list->emplace_back(start_kernel_ptr);
|
||||
ProfilingContent job_profiling_context = {false, job_id, 0};
|
||||
auto job_profiling_node = CreateProfilingCNodeWithStream(anf_node, job_profiling_context, graph_ptr);
|
||||
kernel_list->emplace_back(job_profiling_node);
|
||||
|
||||
ProfilingContent fp_profiling_content = {false, kProfilingFpStartLogId, 0};
|
||||
auto fp_profiling_node = CreateProfilingCNodeWithStream(anf_node, fp_profiling_content, graph_ptr);
|
||||
kernel_list->emplace_back(fp_profiling_node);
|
||||
}
|
||||
}
|
||||
|
||||
void ProfilingUtils::ProfilingAllReduce(const std::shared_ptr<session::KernelGraph> &graph_ptr,
|
||||
const AnfNodePtr &anf_node, int job_id, const std::string &profiling_node_name,
|
||||
std::vector<CNodePtr> *kernel_list) {
|
||||
MS_EXCEPTION_IF_NULL(graph_ptr);
|
||||
CNodePtr ProfilingUtils::CreateProfilingCNodeWithStream(const mindspore::AnfNodePtr &anf_node,
|
||||
const ProfilingContent &profiling_content,
|
||||
NotNull<session::KernelGraph *> graph_ptr) {
|
||||
CNodePtr profiling_node = CreateProfilingCNode(profiling_content, graph_ptr);
|
||||
AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), profiling_node.get());
|
||||
AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), profiling_node.get());
|
||||
return profiling_node;
|
||||
}
|
||||
|
||||
void ProfilingUtils::ProfilingCustomOp(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
|
||||
NotNull<session::KernelGraph *> graph_ptr,
|
||||
NotNull<std::vector<CNodePtr> *> kernel_list) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
auto iter = profiling_trace_info.trace_custom_node.find(anf_node->fullname_with_scope());
|
||||
if (iter == profiling_trace_info.trace_custom_node.end()) {
|
||||
return;
|
||||
}
|
||||
// custom op profiling job start from 3.
|
||||
ProfilingContent front_profiling_content = {false, 2 * custom_node_index_ + 1, 0};
|
||||
CNodePtr front_node = CreateProfilingCNodeWithStream(anf_node, front_profiling_content, graph_ptr);
|
||||
kernel_list->insert(kernel_list->end() - 1, front_node);
|
||||
|
||||
ProfilingContent back_profiling_content = {false, 2 * custom_node_index_ + 2, 0};
|
||||
CNodePtr back_node = CreateProfilingCNodeWithStream(anf_node, back_profiling_content, graph_ptr);
|
||||
kernel_list->insert(kernel_list->end(), back_node);
|
||||
++custom_node_index_;
|
||||
}
|
||||
|
||||
void ProfilingUtils::ProfilingTraceBpEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
|
||||
NotNull<session::KernelGraph *> graph_ptr,
|
||||
NotNull<std::vector<CNodePtr> *> kernel_list) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
if (profiling_trace_info.trace_bp_end == anf_node->fullname_with_scope()) {
|
||||
ProfilingContent bp_end_profiling_content = {false, kProfilingBpEndLogId, 0};
|
||||
CNodePtr bp_end_node = CreateProfilingCNodeWithStream(anf_node, bp_end_profiling_content, graph_ptr);
|
||||
kernel_list->emplace_back(bp_end_node);
|
||||
}
|
||||
}
|
||||
|
||||
void ProfilingUtils::ProfilingTraceEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
|
||||
NotNull<session::KernelGraph *> graph_ptr,
|
||||
NotNull<std::vector<mindspore::CNodePtr> *> kernel_list) {
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
MS_EXCEPTION_IF_NULL(kernel_list);
|
||||
auto full_scope_name = anf_node->fullname_with_scope();
|
||||
if (profiling_node_name == full_scope_name) {
|
||||
CNodePtr allreduce_kernel_ptr = CreateProfilingCNode(graph_ptr, false, job_id, 0);
|
||||
AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), allreduce_kernel_ptr.get());
|
||||
AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), allreduce_kernel_ptr.get());
|
||||
kernel_list->emplace_back(allreduce_kernel_ptr);
|
||||
}
|
||||
}
|
||||
|
||||
void ProfilingUtils::ProfilingTraceEnd(const std::shared_ptr<mindspore::session::KernelGraph> &graph_ptr,
|
||||
const mindspore::AnfNodePtr &anf_node,
|
||||
const mindspore::device::ascend::ProfilingTraceInfo &profiling_trace_info,
|
||||
std::vector<mindspore::CNodePtr> *kernel_list) {
|
||||
MS_EXCEPTION_IF_NULL(graph_ptr);
|
||||
MS_EXCEPTION_IF_NULL(anf_node);
|
||||
MS_EXCEPTION_IF_NULL(kernel_list);
|
||||
if (profiling_trace_info.IsValid()) {
|
||||
auto full_scope_name = anf_node->fullname_with_scope();
|
||||
if (profiling_trace_info.profiling_trace_netoutput == full_scope_name) {
|
||||
CNodePtr bp_kernel_ptr = CreateProfilingCNode(graph_ptr, true, kProfilingIterEndLogId, 0);
|
||||
AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), bp_kernel_ptr.get());
|
||||
AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), bp_kernel_ptr.get());
|
||||
kernel_list->emplace_back(bp_kernel_ptr);
|
||||
}
|
||||
|
||||
if (profiling_trace_info.profiling_trace_bp_end == full_scope_name) {
|
||||
CNodePtr end_task_info = CreateProfilingCNode(graph_ptr, false, kProfilingBpEndLogId, 0);
|
||||
AnfAlgo::SetStreamDistinctionLabel(AnfAlgo::GetStreamDistinctionLabel(anf_node.get()), end_task_info.get());
|
||||
AnfAlgo::SetStreamId(AnfAlgo::GetStreamId(anf_node), end_task_info.get());
|
||||
kernel_list->emplace_back(end_task_info);
|
||||
}
|
||||
if (profiling_trace_info.trace_netoutput == full_scope_name) {
|
||||
ProfilingContent bp_end_profiling_content = {true, kProfilingIterEndLogId, 0};
|
||||
CNodePtr bp_kernel_ptr = CreateProfilingCNodeWithStream(anf_node, bp_end_profiling_content, graph_ptr);
|
||||
kernel_list->emplace_back(bp_kernel_ptr);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -19,63 +19,102 @@
|
|||
#include <memory>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
#include "session/kernel_graph.h"
|
||||
#include "utils/contract.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace device {
|
||||
namespace ascend {
|
||||
struct ProfilingTraceInfo {
|
||||
// execute order's first execute op(like: Cast or Four2Five ...), except tdt op(GetNext ...)
|
||||
std::string profiling_trace_begin;
|
||||
std::string trace_begin;
|
||||
// get first net_output(apply kernel) from graph outputs: fp ->net_output<- bp
|
||||
std::string profiling_trace_bp_end;
|
||||
std::string trace_bp_end;
|
||||
// execute order's end execute (like: Conv2DBackpropFilter)
|
||||
std::string profiling_trace_netoutput;
|
||||
std::string trace_netoutput;
|
||||
|
||||
std::string profiling_allreduce1_start;
|
||||
|
||||
std::string profiling_allreduce1_end;
|
||||
|
||||
std::string profiling_allreduce2_start;
|
||||
|
||||
std::string profiling_allreduce2_end;
|
||||
// profiling specific op, such as AllReduce;
|
||||
std::set<std::string> trace_custom_node;
|
||||
|
||||
// 1. insert profiling_trace_begin if profiling_trace_bp_end is not empty.
|
||||
// 2. op lanuch get task info with callback func.
|
||||
// 3. insert profiling_trace_bp_end.
|
||||
// 4. insert profiling_trace_net_output if profiling_trace_bp_end is not empty.
|
||||
|
||||
bool IsValid() const { return !(profiling_trace_begin.empty() || profiling_trace_bp_end.empty()); }
|
||||
bool IsValid() const { return !(trace_begin.empty() || trace_bp_end.empty() || trace_netoutput.empty()); }
|
||||
};
|
||||
|
||||
struct ProfilingContent {
|
||||
// true -send data from device to host and finish profiling
|
||||
bool notify;
|
||||
uint64_t profiler_trace_id;
|
||||
uint32_t flags;
|
||||
};
|
||||
|
||||
class ProfilingUtils {
|
||||
public:
|
||||
ProfilingUtils() = default;
|
||||
~ProfilingUtils() = default;
|
||||
static bool GetProfilingTraceInfo(const std::shared_ptr<session::KernelGraph> &graph_ptr,
|
||||
ProfilingTraceInfo *profiling_trace_info);
|
||||
static void ProfilingTraceFpStart(const std::shared_ptr<session::KernelGraph> &graph_ptr, const AnfNodePtr &anf_node,
|
||||
const ProfilingTraceInfo &profiling_trace_info, std::vector<CNodePtr> *kernel_list);
|
||||
static void ProfilingAllReduce(const std::shared_ptr<session::KernelGraph> &graph_ptr, const AnfNodePtr &anf_node,
|
||||
int job_id, const std::string &profiling_node_name,
|
||||
std::vector<CNodePtr> *kernel_list);
|
||||
static void ProfilingTraceEnd(const std::shared_ptr<session::KernelGraph> &graph_ptr, const AnfNodePtr &anf_node,
|
||||
const ProfilingTraceInfo &profiling_trace_info, std::vector<CNodePtr> *kernel_list);
|
||||
|
||||
// Insert job_id profiling node and fp_start profiling node.
|
||||
// Job_id is got from envs, which shound be a number greater than 255
|
||||
// Fp_start node should been inserted in the start of a network, and the log_id is hard code to 1.
|
||||
static void ProfilingTraceFpStart(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
|
||||
NotNull<session::KernelGraph *> graph_ptr,
|
||||
NotNull<std::vector<CNodePtr> *> kernel_list);
|
||||
|
||||
// Insert net output profiling node, which tells the device to stop profiling.
|
||||
// The notify in struct ProfilingContent should be 'true', which tells the device to send data to host.
|
||||
static void ProfilingTraceEnd(const AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
|
||||
NotNull<session::KernelGraph *> graph_ptr,
|
||||
NotNull<std::vector<CNodePtr> *> kernel_list);
|
||||
|
||||
// Insert bp_end profiling node, which should been inserted after the last backpropagation CNode in the network.
|
||||
static void ProfilingTraceBpEnd(const mindspore::AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
|
||||
NotNull<session::KernelGraph *> graph_ptr,
|
||||
NotNull<std::vector<mindspore::CNodePtr> *> kernel_list);
|
||||
|
||||
// Mapping graph id and the kernels' name in the graph
|
||||
static void SetGraphKernelName(uint32_t graph_id, const std::vector<std::string> &kernel_names);
|
||||
|
||||
// Mapping task_id and kernel name for device to generate the time cost of specific kernel.
|
||||
// Device calculate the time cost of the task which is marked by task id.
|
||||
// But we need data of (kernel name , time cost)
|
||||
static void ReportProfilingData(uint32_t graph_id, const std::vector<uint32_t> &task_ids);
|
||||
|
||||
static const char kProfiling[];
|
||||
static const char kNotify[];
|
||||
static const char kProfilerTraceId[];
|
||||
static const char kFlags[];
|
||||
// Get profiling trace point from envs.
|
||||
// export PROFILING_FP_START='full name of the first cnode to execute'
|
||||
// export PROFILING_BP_END='full name of the last backpropagation cnode to execute'
|
||||
// export PROFILING_ITER_END='full name of last cnode in graph to execute'
|
||||
// And other cnode, like AllReduce, export PROFILING_CUSTOM_1='full name of AllReduce cnode'
|
||||
// GetNext, export PROFIFLING_CUSTOM_2='full name fo GetNext cnode'
|
||||
// The variable i in PROFILING_CUSTOM_i should start from 1 without interruption.
|
||||
static ProfilingTraceInfo GetProfilingTraceFromEnv(NotNull<session::KernelGraph *> graph_ptr);
|
||||
|
||||
// Insert two profiling trace points, one in front and one behind
|
||||
static void ProfilingCustomOp(const mindspore::AnfNodePtr &anf_node, const ProfilingTraceInfo &profiling_trace_info,
|
||||
NotNull<session::KernelGraph *> graph_ptr,
|
||||
NotNull<std::vector<mindspore::CNodePtr> *> kernel_list);
|
||||
|
||||
inline static constexpr char kProfiling[] = "Profiling";
|
||||
inline static constexpr char kNotify[] = "notify";
|
||||
inline static constexpr char kProfilerTraceId[] = "profiler_trace_id";
|
||||
inline static constexpr char kFlags[] = "flags";
|
||||
|
||||
private:
|
||||
static bool GetNetOutput(AnfNodePtr anf_node, std::string *profiling_trace_net_output);
|
||||
static CNodePtr CreateProfilingCNode(const std::shared_ptr<session::KernelGraph> &graph_ptr, bool notify,
|
||||
uint64_t profiler_trace_id, uint32_t flags);
|
||||
static NotNull<CNodePtr> CreateProfilingCNode(const ProfilingContent &profiling_content,
|
||||
NotNull<session::KernelGraph *> graph_ptr);
|
||||
static CNodePtr CreateProfilingCNodeWithStream(const AnfNodePtr &anf_node, const ProfilingContent &profiling_content,
|
||||
NotNull<session::KernelGraph *> graph_ptr);
|
||||
static std::string GetTraceBegin(const std::vector<CNodePtr> &cnode_exec_order);
|
||||
static std::string GetTraceBpEnd();
|
||||
static std::string GetTraceNetoutput(const std::vector<CNodePtr> &cnode_exec_order);
|
||||
|
||||
// graph id --> (kernel name list)
|
||||
static std::unordered_map<uint32_t, std::vector<std::string>> graph_kernel_name_;
|
||||
static uint32_t custom_node_index_;
|
||||
};
|
||||
} // namespace ascend
|
||||
} // namespace device
|
||||
|
|
|
@ -438,23 +438,22 @@ void KernelAdjust::LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs) {
|
|||
MS_LOG(INFO) << "---------------- LoadSwitchInputs End--";
|
||||
}
|
||||
|
||||
void KernelAdjust::Profiling(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) {
|
||||
void KernelAdjust::Profiling(NotNull<session::KernelGraph *> kernel_graph_ptr) {
|
||||
if (!ascend::ProfilingManager::GetInstance().IsProfiling()) {
|
||||
MS_LOG(INFO) << "No need to profiling";
|
||||
return;
|
||||
}
|
||||
ProfilingTraceInfo profiling_trace_info;
|
||||
if (ProfilingUtils::GetProfilingTraceInfo(kernel_graph_ptr, &profiling_trace_info)) {
|
||||
InsertProfilingKernel(kernel_graph_ptr, profiling_trace_info);
|
||||
} else {
|
||||
MS_LOG(WARNING) << "[profiling] GetProfilingTraceInfo failed";
|
||||
ProfilingTraceInfo profiling_trace_info = ProfilingUtils::GetProfilingTraceFromEnv(kernel_graph_ptr);
|
||||
if (!profiling_trace_info.IsValid()) {
|
||||
MS_LOG(WARNING) << "[profiling] no profiling node found!";
|
||||
return;
|
||||
}
|
||||
InsertProfilingKernel(profiling_trace_info, kernel_graph_ptr);
|
||||
}
|
||||
|
||||
void KernelAdjust::InsertProfilingKernel(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
|
||||
const ProfilingTraceInfo &profiling_trace_info) {
|
||||
void KernelAdjust::InsertProfilingKernel(const ProfilingTraceInfo &profiling_trace_info,
|
||||
NotNull<session::KernelGraph *> kernel_graph_ptr) {
|
||||
MS_LOG(INFO) << "[profiling] Insert profiling kernel start";
|
||||
MS_EXCEPTION_IF_NULL(kernel_graph_ptr);
|
||||
if (!profiling_trace_info.IsValid()) {
|
||||
MS_LOG(WARNING) << "Profiling trace point not found";
|
||||
return;
|
||||
|
@ -462,18 +461,12 @@ void KernelAdjust::InsertProfilingKernel(const std::shared_ptr<session::KernelGr
|
|||
std::vector<CNodePtr> new_cnode_list;
|
||||
std::vector<CNodePtr> cnode_ptr_list = kernel_graph_ptr->execution_order();
|
||||
for (const auto &cnode_ptr : cnode_ptr_list) {
|
||||
ProfilingUtils::ProfilingTraceFpStart(kernel_graph_ptr, cnode_ptr, profiling_trace_info, &new_cnode_list);
|
||||
ProfilingUtils::ProfilingAllReduce(kernel_graph_ptr, cnode_ptr, ascend::kProfilingAllReduce1Start,
|
||||
profiling_trace_info.profiling_allreduce1_start, &new_cnode_list);
|
||||
ProfilingUtils::ProfilingAllReduce(kernel_graph_ptr, cnode_ptr, ascend::kProfilingAllReduce2Start,
|
||||
profiling_trace_info.profiling_allreduce2_start, &new_cnode_list);
|
||||
ProfilingUtils::ProfilingTraceFpStart(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list));
|
||||
new_cnode_list.emplace_back(cnode_ptr);
|
||||
|
||||
ProfilingUtils::ProfilingAllReduce(kernel_graph_ptr, cnode_ptr, ascend::kProfilingAllReduce1End,
|
||||
profiling_trace_info.profiling_allreduce1_end, &new_cnode_list);
|
||||
ProfilingUtils::ProfilingAllReduce(kernel_graph_ptr, cnode_ptr, ascend::kProfilingAllReduce2End,
|
||||
profiling_trace_info.profiling_allreduce2_end, &new_cnode_list);
|
||||
ProfilingUtils::ProfilingTraceEnd(kernel_graph_ptr, cnode_ptr, profiling_trace_info, &new_cnode_list);
|
||||
ProfilingUtils::ProfilingCustomOp(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list));
|
||||
ProfilingUtils::ProfilingTraceBpEnd(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list));
|
||||
ProfilingUtils::ProfilingTraceEnd(cnode_ptr, profiling_trace_info, kernel_graph_ptr, NOT_NULL(&new_cnode_list));
|
||||
}
|
||||
kernel_graph_ptr->set_execution_order(new_cnode_list);
|
||||
}
|
||||
|
|
|
@ -48,7 +48,7 @@ class KernelAdjust {
|
|||
void SetStreamSwitchOps(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
|
||||
bool StepLoadCtrlInputs(const std::shared_ptr<session::Context> &context,
|
||||
const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
|
||||
void Profiling(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
|
||||
void Profiling(NotNull<session::KernelGraph *> kernel_graph_ptr);
|
||||
static bool NeedInsertSwitch();
|
||||
CNodePtr CreateSteamActiveOp(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr);
|
||||
|
||||
|
@ -66,8 +66,8 @@ class KernelAdjust {
|
|||
kernel::KernelBuildInfo::KernelBuildInfoBuilder CreateMngKernelBuilder(const std::vector<std::string> &formats,
|
||||
const std::vector<TypeId> &type_ids);
|
||||
void LoadSwitchInputs(std::vector<tensor::TensorPtr> *inputs);
|
||||
void InsertProfilingKernel(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr,
|
||||
const ProfilingTraceInfo &profiling_trace_info);
|
||||
void InsertProfilingKernel(const ProfilingTraceInfo &profiling_trace_info,
|
||||
NotNull<session::KernelGraph *> kernel_graph_ptr);
|
||||
};
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -246,7 +246,7 @@ void AscendBackendOptimization(const std::shared_ptr<session::KernelGraph> &kern
|
|||
kernel_graph->SetExecOrderByDefault();
|
||||
if (save_graphs) {
|
||||
std::string file_path = save_graphs_path + "/" + "hwopt_d_end.ir";
|
||||
DumpIR(file_path, kernel_graph);
|
||||
DumpIR(file_path, kernel_graph, true);
|
||||
DumpIRProto(kernel_graph, "after_hwopt");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -136,7 +136,7 @@ void AscendSession::BuildGraph(GraphId graph_id) {
|
|||
// Assign streams for control sink and hccl and so on
|
||||
AssignStream(graph);
|
||||
|
||||
device::KernelAdjust::GetInstance().Profiling(graph);
|
||||
device::KernelAdjust::GetInstance().Profiling(NOT_NULL(graph.get()));
|
||||
// build kernel if node is cnode
|
||||
BuildKernel(graph);
|
||||
auto ms_context = MsContext::GetInstance();
|
||||
|
|
|
@ -42,6 +42,6 @@ bool KernelAdjust::StepLoadCtrlInputs(const std::shared_ptr<session::Context> &c
|
|||
return true;
|
||||
}
|
||||
bool KernelAdjust::NeedInsertSwitch() { return true; }
|
||||
void KernelAdjust::Profiling(const std::shared_ptr<session::KernelGraph> &kernel_graph_ptr) { return; }
|
||||
void KernelAdjust::Profiling(NotNull<session::KernelGraph *> kernel_graph_ptr) { return; }
|
||||
} // namespace device
|
||||
} // namespace mindspore
|
||||
|
|
Loading…
Reference in New Issue