Report parallel strategy data to Ascend Profiler.

This commit is contained in:
qiukaida 2022-11-01 20:09:04 +08:00
parent 8f0b175207
commit 3b45ae8d7c
10 changed files with 73 additions and 3 deletions

View File

@ -186,6 +186,8 @@ Status ProfilingManager::ProfHandleStart() {
} }
Status ProfilingManager::ProfHandleStop() { Status ProfilingManager::ProfHandleStop() {
// Report Mindspore Framework data to Ascend Profiler before Stop
ProfilingUtils::ReportMindSporeFrameworkData();
MS_LOG(INFO) << "Begin to stop profiling. Current profiling state is " << cur_state_; MS_LOG(INFO) << "Begin to stop profiling. Current profiling state is " << cur_state_;
cur_state_ = kProfilingStop; cur_state_ = kProfilingStop;
return PROF_SUCCESS; return PROF_SUCCESS;

View File

@ -22,6 +22,7 @@
#include "backend/common/session/kernel_graph.h" #include "backend/common/session/kernel_graph.h"
#include "plugin/device/ascend/hal/common/ascend_utils.h" #include "plugin/device/ascend/hal/common/ascend_utils.h"
#include "plugin/device/ascend/hal/profiler/ascend_profiling.h" #include "plugin/device/ascend/hal/profiler/ascend_profiling.h"
#include "plugin/device/ascend/hal/profiler/parallel_strategy_profiling.h"
#include "plugin/device/ascend/hal/device/ascend_stream_manager.h" #include "plugin/device/ascend/hal/device/ascend_stream_manager.h"
namespace mindspore { namespace mindspore {
@ -174,6 +175,17 @@ uint32_t ProfilingReporter::GetTaskId(const string &node_name) {
return task_ids_[(uint32_t)index]; return task_ids_[(uint32_t)index];
} }
void ProfilingReporter::ReportParallelStrategy() const {
std::string parallel_data = profiler::ascend::ParallelStrategy::GetInstance()->GetParallelStrategyForReport();
if (parallel_data.empty()) {
return;
}
MS_LOG(INFO) << "Start to report parallel strategy data to Ascend Profiler.";
std::string tag_name = "parallel_strategy";
ReportData(device_id_, reinterpret_cast<unsigned char *>(parallel_data.data()), parallel_data.size(), tag_name);
MS_LOG(INFO) << "Stop to report " << parallel_data.size() << "(Bytes) parallel strategy data to Ascend Profiler.";
}
void ProfilingReporter::ReportData(uint32_t device_id, unsigned char *data, size_t data_size, void ProfilingReporter::ReportData(uint32_t device_id, unsigned char *data, size_t data_size,
const string &tag_name) const { const string &tag_name) const {
ReporterData report_data{}; ReporterData report_data{};
@ -182,12 +194,12 @@ void ProfilingReporter::ReportData(uint32_t device_id, unsigned char *data, size
report_data.dataLen = data_size; report_data.dataLen = data_size;
auto ret = memcpy_s(report_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, tag_name.c_str(), tag_name.length()); auto ret = memcpy_s(report_data.tag, MSPROF_ENGINE_MAX_TAG_LEN + 1, tag_name.c_str(), tag_name.length());
if (ret != 0) { if (ret != 0) {
MS_LOG(EXCEPTION) << "Report data failed, tag is " << tag_name.c_str() << ", ret: " << ret; MS_LOG(EXCEPTION) << "Report data failed, tag is " << tag_name << ", ret: " << ret;
} }
auto report_ret = ProfilingManager::GetInstance().CallMsprofReport(NOT_NULL(&report_data)); auto report_ret = ProfilingManager::GetInstance().CallMsprofReport(NOT_NULL(&report_data));
if (report_ret != 0) { if (report_ret != 0) {
MS_LOG(EXCEPTION) << "Report data failed, tag is " << tag_name.c_str() << ", ret: " << ret << "." MS_LOG(EXCEPTION) << "Report data failed, tag is " << tag_name << ", ret: " << report_ret << "."
<< GetErrorMessage(true); << GetErrorMessage(true);
} }
} }

View File

@ -68,6 +68,7 @@ class ProfilingReporter {
void DynamicNodeReport(const CNodePtr &node, uint32_t stream_id, uint32_t task_id, void DynamicNodeReport(const CNodePtr &node, uint32_t stream_id, uint32_t task_id,
const KernelType kernel_type) const; const KernelType kernel_type) const;
void ReportStepPoint(const vector<std::shared_ptr<StepPointDesc>> &points); void ReportStepPoint(const vector<std::shared_ptr<StepPointDesc>> &points);
void ReportParallelStrategy() const;
private: private:
uint32_t device_id_; uint32_t device_id_;

View File

@ -429,6 +429,22 @@ void ProfilingUtils::SetReportProfilingData(const std::vector<uint32_t> &task_id
GraphProfilingData report_data = {task_ids, stream_ids, graph_id, rt_model_id}; GraphProfilingData report_data = {task_ids, stream_ids, graph_id, rt_model_id};
(void)report_data_.emplace_back(report_data); (void)report_data_.emplace_back(report_data);
} }
// Report MindSpore Framework data to Ascend Profiler
void ProfilingUtils::ReportMindSporeFrameworkData() {
auto context = MsContext::GetInstance();
MS_EXCEPTION_IF_NULL(context);
auto device_id = context->get_param<uint32_t>(MS_CTX_DEVICE_ID);
uint32_t graph_id = 0;
uint32_t rt_model_id = 0;
std::vector<CNodePtr> cnode_list;
std::vector<uint32_t> stream_ids;
std::vector<uint32_t> task_ids;
ProfilingReporter repoter(device_id, graph_id, rt_model_id, cnode_list, stream_ids, task_ids);
MS_LOG(INFO) << "Start to report MindSpore Framework data to Ascend Profiler.";
repoter.ReportParallelStrategy();
MS_LOG(INFO) << "Stop to report MindSpore Framework data to Ascend Profiler.";
}
} // namespace ascend } // namespace ascend
} // namespace device } // namespace device
} // namespace mindspore } // namespace mindspore

View File

@ -77,6 +77,8 @@ class ProfilingUtils {
// Save graph information to Framework file // Save graph information to Framework file
static void ReportProfilingData(const std::vector<uint32_t> &task_ids, const std::vector<uint32_t> &stream_ids, static void ReportProfilingData(const std::vector<uint32_t> &task_ids, const std::vector<uint32_t> &stream_ids,
uint32_t graph_id, uint32_t rt_model_id); uint32_t graph_id, uint32_t rt_model_id);
// Report MindSpore Framework data to Ascend Profiler
static void ReportMindSporeFrameworkData();
// Generate profiling trace // Generate profiling trace
static ProfilingTraceInfo GenerateProfilingTrace(const session::KernelGraph &kernel_graph); static ProfilingTraceInfo GenerateProfilingTrace(const session::KernelGraph &kernel_graph);

View File

@ -24,6 +24,7 @@
#include "plugin/device/ascend/hal/profiler/parallel_strategy_profiling.h" #include "plugin/device/ascend/hal/profiler/parallel_strategy_profiling.h"
#include "google/protobuf/util/json_util.h" #include "google/protobuf/util/json_util.h"
#include "nlohmann/json.hpp"
#ifdef WITH_BACKEND #ifdef WITH_BACKEND
#include "ps/ps_context.h" #include "ps/ps_context.h"
@ -186,6 +187,34 @@ void ParallelStrategy::SaveParallelStrategyToFile() {
MS_LOG(INFO) << "Save profile parallel strategy success."; MS_LOG(INFO) << "Save profile parallel strategy success.";
} }
std::string ParallelStrategy::GetParallelStrategyForReport() {
bool parallel_data_save_status = has_got_parallel_strategy_data;
std::string report_data;
irpb::ProfilingParallel profiling_parallel;
if (has_got_parallel_strategy_data) {
profiling_parallel = cache_profiling_parallel_pb;
} else {
FuncGraphPtr func_graph = nullptr;
profiling_parallel = GetProfilingParallel(func_graph);
}
auto parallel_context = parallel::ParallelContext::GetInstance();
MS_EXCEPTION_IF_NULL(parallel_context);
(void)google::protobuf::util::MessageToJsonString(profiling_parallel, &report_data);
try {
nlohmann::json report_content = nlohmann::json::parse(report_data);
report_content["config"]["ai_framework_type"] = "MindSpore";
report_content["config"]["stage_num"] = parallel_context->pipeline_stage_split_num();
report_data = report_content.dump();
} catch (nlohmann::json::exception &e) {
MS_LOG(ERROR) << e.what();
report_data = "";
}
has_got_parallel_strategy_data = parallel_data_save_status;
return report_data;
}
} // namespace ascend } // namespace ascend
} // namespace profiler } // namespace profiler
} // namespace mindspore } // namespace mindspore

View File

@ -34,6 +34,7 @@ class ParallelStrategy {
~ParallelStrategy() {} ~ParallelStrategy() {}
BACKEND_EXPORT void DumpProfileParallelStrategy(const FuncGraphPtr &func_graph); BACKEND_EXPORT void DumpProfileParallelStrategy(const FuncGraphPtr &func_graph);
void SaveParallelStrategyToFile(); void SaveParallelStrategyToFile();
std::string GetParallelStrategyForReport();
private: private:
irpb::ProfilingParallel GetProfilingParallel(const FuncGraphPtr &func_graph); irpb::ProfilingParallel GetProfilingParallel(const FuncGraphPtr &func_graph);

View File

@ -73,7 +73,7 @@ def test_ascend_profiling():
add = Net() add = Net()
add(Tensor(x), Tensor(y)) add(Tensor(x), Tensor(y))
profiler.analyse() profiler.analyse()
assert len(glob.glob(f"{tmpdir}/profiler*/*PROF*/device_*/data/Framework*")) == 4 assert len(glob.glob(f"{tmpdir}/profiler*/*PROF*/device_*/data/Framework*")) == 6
@pytest.mark.level0 @pytest.mark.level0

View File

@ -265,6 +265,7 @@ if(ENABLE_SECURITY)
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/plugin/device/ascend/hal/profiler/options.cc") list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/plugin/device/ascend/hal/profiler/options.cc")
list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/debug/data_dump/dump_json_parser.cc") list(REMOVE_ITEM MINDSPORE_SRC_LIST "../../../mindspore/ccsrc/debug/data_dump/dump_json_parser.cc")
endif() endif()
list(REMOVE_ITEM MINDSPORE_SRC_LIST list(REMOVE_ITEM MINDSPORE_SRC_LIST
"../../../mindspore/ccsrc/plugin/device/ascend/hal/profiler/parallel_strategy_profiling.cc") "../../../mindspore/ccsrc/plugin/device/ascend/hal/profiler/parallel_strategy_profiling.cc")

View File

@ -27,6 +27,12 @@ std::shared_ptr<ParallelStrategy> &ParallelStrategy::GetInstance(){
} }
void ParallelStrategy::SaveParallelStrategyToFile(){} void ParallelStrategy::SaveParallelStrategyToFile(){}
std::string ParallelStrategy::GetParallelStrategyForReport() {
std::string data = "{\"config\": {\"ai_frame_type\": \"MindSpore\",\"parallelType\":"
"\"semi_auto_parallel\",\"rankId\":0}}";
return data;
}
} // namespace ascend } // namespace ascend
} // namespace profiler } // namespace profiler
} // namespace mindspore } // namespace mindspore