Report parallel strategy data to Ascend Profiler.
This commit is contained in:
parent
8f0b175207
commit
3b45ae8d7c
|
@ -186,6 +186,8 @@ Status ProfilingManager::ProfHandleStart() {
|
||||||
}
|
}
|
||||||
|
|
||||||
Status ProfilingManager::ProfHandleStop() {
|
Status ProfilingManager::ProfHandleStop() {
|
||||||
|
// Report Mindspore Framework data to Ascend Profiler before Stop
|
||||||
|
ProfilingUtils::ReportMindSporeFrameworkData();
|
||||||
MS_LOG(INFO) << "Begin to stop profiling. Current profiling state is " << cur_state_;
|
MS_LOG(INFO) << "Begin to stop profiling. Current profiling state is " << cur_state_;
|
||||||
cur_state_ = kProfilingStop;
|
cur_state_ = kProfilingStop;
|
||||||
return PROF_SUCCESS;
|
return PROF_SUCCESS;
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
#include "backend/common/session/kernel_graph.h"
|
#include "backend/common/session/kernel_graph.h"
|
||||||
#include "plugin/device/ascend/hal/common/ascend_utils.h"
|
#include "plugin/device/ascend/hal/common/ascend_utils.h"
|
||||||
#include "plugin/device/ascend/hal/profiler/ascend_profiling.h"
|
#include "plugin/device/ascend/hal/profiler/ascend_profiling.h"
|
||||||
|
#include "plugin/device/ascend/hal/profiler/parallel_strategy_profiling.h"
|
||||||
#include "plugin/device/ascend/hal/device/ascend_stream_manager.h"
|
#include "plugin/device/ascend/hal/device/ascend_stream_manager.h"
|
||||||
|
|
||||||
namespace mindspore {
|
namespace mindspore {
|
||||||
|
@ -174,6 +175,17 @@ uint32_t ProfilingReporter::GetTaskId(const string &node_name) {
|
||||||
return task_ids_[(uint32_t)index];
|
return task_ids_[(uint32_t)index];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ProfilingReporter::ReportParallelStrategy() const {
|
||||||
|
std::string parallel_data = profiler::ascend::ParallelStrategy::GetInstance()->GetParallelStrategyForReport();
|
||||||
|
if (parallel_data.empty()) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
MS_LOG(INFO) << "Start to report parallel strategy data to Ascend Profiler.";
|
||||||
|
std::string tag_name = "parallel_strategy";
|
||||||
|
ReportData(device_id_, reinterpret_cast<unsigned char *>(parallel_data.data()), parallel_data.size(), tag_name);
|
||||||
|
MS_LOG(INFO) << "Stop to report " << parallel_data.size() << "(Bytes) parallel strategy data to Ascend Profiler.";
|
||||||
|
}
|
||||||
|
|
||||||
void ProfilingReporter::ReportData(uint32_t device_id, unsigned char *data, size_t data_size,
|
void ProfilingReporter::ReportData(uint32_t device_id, unsigned char *data, size_t data_size,
|
||||||
const string &tag_name) const {
|
const string &tag_name) const {
|
||||||
ReporterData report_data{};
|
ReporterData report_data{};
|
||||||
|
@ -182,12 +194,12 @@ void ProfilingReporter::ReportData(uint32_t device_id, unsigned char *data, size
|
||||||
report_data.dataLen = data_size;
|
report_data.dataLen = data_size;
|
||||||
auto ret = memcpy_s(report_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, tag_name.c_str(), tag_name.length());
|
auto ret = memcpy_s(report_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, tag_name.c_str(), tag_name.length());
|
||||||
if (ret != 0) {
|
if (ret != 0) {
|
||||||
MS_LOG(EXCEPTION) << "Report data failed, tag is " << tag_name.c_str() << ", ret: " << ret;
|
MS_LOG(EXCEPTION) << "Report data failed, tag is " << tag_name << ", ret: " << ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto report_ret = ProfilingManager::GetInstance().CallMsprofReport(NOT_NULL(&report_data));
|
auto report_ret = ProfilingManager::GetInstance().CallMsprofReport(NOT_NULL(&report_data));
|
||||||
if (report_ret != 0) {
|
if (report_ret != 0) {
|
||||||
MS_LOG(EXCEPTION) << "Report data failed, tag is " << tag_name.c_str() << ", ret: " << ret << "."
|
MS_LOG(EXCEPTION) << "Report data failed, tag is " << tag_name << ", ret: " << report_ret << "."
|
||||||
<< GetErrorMessage(true);
|
<< GetErrorMessage(true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -68,6 +68,7 @@ class ProfilingReporter {
|
||||||
void DynamicNodeReport(const CNodePtr &node, uint32_t stream_id, uint32_t task_id,
|
void DynamicNodeReport(const CNodePtr &node, uint32_t stream_id, uint32_t task_id,
|
||||||
const KernelType kernel_type) const;
|
const KernelType kernel_type) const;
|
||||||
void ReportStepPoint(const vector<std::shared_ptr<StepPointDesc>> &points);
|
void ReportStepPoint(const vector<std::shared_ptr<StepPointDesc>> &points);
|
||||||
|
void ReportParallelStrategy() const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
uint32_t device_id_;
|
uint32_t device_id_;
|
||||||
|
|
|
@ -429,6 +429,22 @@ void ProfilingUtils::SetReportProfilingData(const std::vector<uint32_t> &task_id
|
||||||
GraphProfilingData report_data = {task_ids, stream_ids, graph_id, rt_model_id};
|
GraphProfilingData report_data = {task_ids, stream_ids, graph_id, rt_model_id};
|
||||||
(void)report_data_.emplace_back(report_data);
|
(void)report_data_.emplace_back(report_data);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Report MindSpore Framework data to Ascend Profiler
|
||||||
|
void ProfilingUtils::ReportMindSporeFrameworkData() {
|
||||||
|
auto context = MsContext::GetInstance();
|
||||||
|
MS_EXCEPTION_IF_NULL(context);
|
||||||
|
auto device_id = context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
|
||||||
|
uint32_t graph_id = 0;
|
||||||
|
uint32_t rt_model_id = 0;
|
||||||
|
std::vector<CNodePtr> cnode_list;
|
||||||
|
std::vector<uint32_t> stream_ids;
|
||||||
|
std::vector<uint32_t> task_ids;
|
||||||
|
ProfilingReporter repoter(device_id, graph_id, rt_model_id, cnode_list, stream_ids, task_ids);
|
||||||
|
MS_LOG(INFO) << "Start to report MindSpore Framework data to Ascend Profiler.";
|
||||||
|
repoter.ReportParallelStrategy();
|
||||||
|
MS_LOG(INFO) << "Stop to report MindSpore Framework data to Ascend Profiler.";
|
||||||
|
}
|
||||||
} // namespace ascend
|
} // namespace ascend
|
||||||
} // namespace device
|
} // namespace device
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
|
@ -77,6 +77,8 @@ class ProfilingUtils {
|
||||||
// Save graph information to Framework file
|
// Save graph information to Framework file
|
||||||
static void ReportProfilingData(const std::vector<uint32_t> &task_ids, const std::vector<uint32_t> &stream_ids,
|
static void ReportProfilingData(const std::vector<uint32_t> &task_ids, const std::vector<uint32_t> &stream_ids,
|
||||||
uint32_t graph_id, uint32_t rt_model_id);
|
uint32_t graph_id, uint32_t rt_model_id);
|
||||||
|
// Report MindSpore Framework data to Ascend Profiler
|
||||||
|
static void ReportMindSporeFrameworkData();
|
||||||
// Generate profiling trace
|
// Generate profiling trace
|
||||||
static ProfilingTraceInfo GenerateProfilingTrace(const session::KernelGraph &kernel_graph);
|
static ProfilingTraceInfo GenerateProfilingTrace(const session::KernelGraph &kernel_graph);
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,7 @@
|
||||||
#include "plugin/device/ascend/hal/profiler/parallel_strategy_profiling.h"
|
#include "plugin/device/ascend/hal/profiler/parallel_strategy_profiling.h"
|
||||||
|
|
||||||
#include "google/protobuf/util/json_util.h"
|
#include "google/protobuf/util/json_util.h"
|
||||||
|
#include "nlohmann/json.hpp"
|
||||||
|
|
||||||
#ifdef WITH_BACKEND
|
#ifdef WITH_BACKEND
|
||||||
#include "ps/ps_context.h"
|
#include "ps/ps_context.h"
|
||||||
|
@ -186,6 +187,34 @@ void ParallelStrategy::SaveParallelStrategyToFile() {
|
||||||
|
|
||||||
MS_LOG(INFO) << "Save profile parallel strategy success.";
|
MS_LOG(INFO) << "Save profile parallel strategy success.";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::string ParallelStrategy::GetParallelStrategyForReport() {
|
||||||
|
bool parallel_data_save_status = has_got_parallel_strategy_data;
|
||||||
|
std::string report_data;
|
||||||
|
irpb::ProfilingParallel profiling_parallel;
|
||||||
|
if (has_got_parallel_strategy_data) {
|
||||||
|
profiling_parallel = cache_profiling_parallel_pb;
|
||||||
|
} else {
|
||||||
|
FuncGraphPtr func_graph = nullptr;
|
||||||
|
profiling_parallel = GetProfilingParallel(func_graph);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto parallel_context = parallel::ParallelContext::GetInstance();
|
||||||
|
MS_EXCEPTION_IF_NULL(parallel_context);
|
||||||
|
(void)google::protobuf::util::MessageToJsonString(profiling_parallel, &report_data);
|
||||||
|
try {
|
||||||
|
nlohmann::json report_content = nlohmann::json::parse(report_data);
|
||||||
|
report_content["config"]["ai_framework_type"] = "MindSpore";
|
||||||
|
report_content["config"]["stage_num"] = parallel_context->pipeline_stage_split_num();
|
||||||
|
report_data = report_content.dump();
|
||||||
|
} catch (nlohmann::json::exception &e) {
|
||||||
|
MS_LOG(ERROR) << e.what();
|
||||||
|
report_data = "";
|
||||||
|
}
|
||||||
|
|
||||||
|
has_got_parallel_strategy_data = parallel_data_save_status;
|
||||||
|
return report_data;
|
||||||
|
}
|
||||||
} // namespace ascend
|
} // namespace ascend
|
||||||
} // namespace profiler
|
} // namespace profiler
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
|
@ -34,6 +34,7 @@ class ParallelStrategy {
|
||||||
~ParallelStrategy() {}
|
~ParallelStrategy() {}
|
||||||
BACKEND_EXPORT void DumpProfileParallelStrategy(const FuncGraphPtr &func_graph);
|
BACKEND_EXPORT void DumpProfileParallelStrategy(const FuncGraphPtr &func_graph);
|
||||||
void SaveParallelStrategyToFile();
|
void SaveParallelStrategyToFile();
|
||||||
|
std::string GetParallelStrategyForReport();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
irpb::ProfilingParallel GetProfilingParallel(const FuncGraphPtr &func_graph);
|
irpb::ProfilingParallel GetProfilingParallel(const FuncGraphPtr &func_graph);
|
||||||
|
|
|
@ -73,7 +73,7 @@ def test_ascend_profiling():
|
||||||
add = Net()
|
add = Net()
|
||||||
add(Tensor(x), Tensor(y))
|
add(Tensor(x), Tensor(y))
|
||||||
profiler.analyse()
|
profiler.analyse()
|
||||||
assert len(glob.glob(f"{tmpdir}/profiler*/*PROF*/device_*/data/Framework*")) == 4
|
assert len(glob.glob(f"{tmpdir}/profiler*/*PROF*/device_*/data/Framework*")) == 6
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.level0
|
@pytest.mark.level0
|
||||||
|
|
|
@ -265,6 +265,7 @@ if(ENABLE_SECURITY)
|
||||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/plugin/device/ascend/hal/profiler/options.cc")
|
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/plugin/device/ascend/hal/profiler/options.cc")
|
||||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/debug/data_dump/dump_json_parser.cc")
|
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/debug/data_dump/dump_json_parser.cc")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
list(REMOVE_ITEM MINDSPORE_SRC_LIST
|
list(REMOVE_ITEM MINDSPORE_SRC_LIST
|
||||||
"../../../mindspore/ccsrc/plugin/device/ascend/hal/profiler/parallel_strategy_profiling.cc")
|
"../../../mindspore/ccsrc/plugin/device/ascend/hal/profiler/parallel_strategy_profiling.cc")
|
||||||
|
|
||||||
|
|
|
@ -27,6 +27,12 @@ std::shared_ptr<ParallelStrategy> &ParallelStrategy::GetInstance(){
|
||||||
}
|
}
|
||||||
|
|
||||||
void ParallelStrategy::SaveParallelStrategyToFile(){}
|
void ParallelStrategy::SaveParallelStrategyToFile(){}
|
||||||
|
|
||||||
|
std::string ParallelStrategy::GetParallelStrategyForReport() {
|
||||||
|
std::string data = "{\"config\": {\"ai_frame_type\": \"MindSpore\",\"parallelType\":"
|
||||||
|
"\"semi_auto_parallel\",\"rankId\":0}}";
|
||||||
|
return data;
|
||||||
|
}
|
||||||
} // namespace ascend
|
} // namespace ascend
|
||||||
} // namespace profiler
|
} // namespace profiler
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
Loading…
Reference in New Issue