forked from mindspore-Ecosystem/mindspore
Add analysis of profiling to warn if dataset parallel_workers should be increased
This message will currently only be printed if profiling of minddata is enabled. Profiling of minddata is not enabled by default.
This commit is contained in:
parent
7838f8570b
commit
c616150fd6
|
@ -156,7 +156,8 @@ Status MapOp::operator()() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// The operator class just starts off threads by calling the tree_ function
|
// The operator class just starts off threads by calling the tree_ function
|
||||||
rc = tree_->LaunchWorkers(num_workers_, std::bind(&MapOp::WorkerEntry, this, std::placeholders::_1), NameWithID());
|
rc =
|
||||||
|
tree_->LaunchWorkers(num_workers_, std::bind(&MapOp::WorkerEntry, this, std::placeholders::_1), NameWithID(), id());
|
||||||
// Synchronize with TaskManager
|
// Synchronize with TaskManager
|
||||||
TaskManager::FindMe()->Post();
|
TaskManager::FindMe()->Post();
|
||||||
RETURN_IF_NOT_OK(rc);
|
RETURN_IF_NOT_OK(rc);
|
||||||
|
|
|
@ -109,5 +109,7 @@ Status ConnectorSize::Init(const std::string &dir_path, const std::string &devic
|
||||||
file_path_ = (Path(dir_path) / Path("pipeline_profiling_" + device_id + ".json")).toString();
|
file_path_ = (Path(dir_path) / Path("pipeline_profiling_" + device_id + ".json")).toString();
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Status ConnectorSize::Analyze() { return Status::OK(); }
|
||||||
} // namespace dataset
|
} // namespace dataset
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
|
@ -31,7 +31,7 @@ class ExecutionTree;
|
||||||
// Connector size sampling samples the output connector size of each op in the pipeline.
|
// Connector size sampling samples the output connector size of each op in the pipeline.
|
||||||
// It support JSON serialization for external usage.
|
// It support JSON serialization for external usage.
|
||||||
class ConnectorSize : public Sampling {
|
class ConnectorSize : public Sampling {
|
||||||
// Connecto size sampling data is stored as a 2D vector
|
// Connector size sampling data is stored as a 2D vector
|
||||||
// op_0 ... op_m
|
// op_0 ... op_m
|
||||||
// sample_0 size_0_0 ... size_m_0
|
// sample_0 size_0_0 ... size_m_0
|
||||||
// ... ... ... ...
|
// ... ... ... ...
|
||||||
|
@ -58,12 +58,14 @@ class ConnectorSize : public Sampling {
|
||||||
|
|
||||||
Status Init(const std::string &dir_path, const std::string &device_id) override;
|
Status Init(const std::string &dir_path, const std::string &device_id) override;
|
||||||
|
|
||||||
// Parse op infomation and transform to json format
|
// Parse op information and transform to json format
|
||||||
json ParseOpInfo(const DatasetOp &node, const std::vector<int32_t> &size);
|
json ParseOpInfo(const DatasetOp &node, const std::vector<int32_t> &size);
|
||||||
|
|
||||||
// Change file mode after save throughput data
|
// Change file mode after save throughput data
|
||||||
Status ChangeFileMode() { return Status::OK(); }
|
Status ChangeFileMode() { return Status::OK(); }
|
||||||
|
|
||||||
|
Status Analyze() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ExecutionTree *tree_ = nullptr; // ExecutionTree pointer
|
ExecutionTree *tree_ = nullptr; // ExecutionTree pointer
|
||||||
ConnectorSizeSampleTable sample_table_; // Dataset structure to store all samples of connector size sampling
|
ConnectorSizeSampleTable sample_table_; // Dataset structure to store all samples of connector size sampling
|
||||||
|
|
|
@ -150,5 +150,7 @@ Status ConnectorThroughput::ChangeFileMode() {
|
||||||
}
|
}
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Status ConnectorThroughput::Analyze() { return Status::OK(); }
|
||||||
} // namespace dataset
|
} // namespace dataset
|
||||||
} // namespace mindspore
|
} // namespace mindspore
|
||||||
|
|
|
@ -74,6 +74,8 @@ class ConnectorThroughput : public Sampling {
|
||||||
|
|
||||||
Status ChangeFileMode() override;
|
Status ChangeFileMode() override;
|
||||||
|
|
||||||
|
Status Analyze() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ExecutionTree *tree_ = nullptr; // ExecutionTree pointer
|
ExecutionTree *tree_ = nullptr; // ExecutionTree pointer
|
||||||
int64_t max_rows_;
|
int64_t max_rows_;
|
||||||
|
|
|
@ -135,6 +135,27 @@ Status DeviceCpu::Collect(ExecutionTree *tree) {
|
||||||
first_collect_ = false;
|
first_collect_ = false;
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
Status DeviceCpu::Analyze(std::string *name, double *utilization, std::string *extra_message) {
|
||||||
|
*name = std::string("device_info");
|
||||||
|
int total_samples = cpu_util_.size();
|
||||||
|
int sum = 0;
|
||||||
|
// Only analyze the middle half of the samples
|
||||||
|
// Starting and ending may be impacted by startup or ending pipeline activities
|
||||||
|
int start_analyze = total_samples / 4;
|
||||||
|
int end_analyze = total_samples - start_analyze;
|
||||||
|
|
||||||
|
for (int i = start_analyze; i < end_analyze; i++) {
|
||||||
|
sum += cpu_util_[i].user_utilization_;
|
||||||
|
sum += cpu_util_[i].sys_utilization_;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Note device utilization is already in range of 0-1, so don't
|
||||||
|
// need to divide by number of CPUS
|
||||||
|
if ((end_analyze - start_analyze) > 0) {
|
||||||
|
*utilization = sum / (end_analyze - start_analyze);
|
||||||
|
}
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
Status DeviceCpu::SaveToFile(const std::string &file_path) {
|
Status DeviceCpu::SaveToFile(const std::string &file_path) {
|
||||||
Path path = Path(file_path);
|
Path path = Path(file_path);
|
||||||
|
@ -236,6 +257,8 @@ Status OperatorCpu::Collect(ExecutionTree *tree) {
|
||||||
if (first_collect_) {
|
if (first_collect_) {
|
||||||
for (auto iter = tree->begin(); iter != tree->end(); ++iter) {
|
for (auto iter = tree->begin(); iter != tree->end(); ++iter) {
|
||||||
id_count++;
|
id_count++;
|
||||||
|
op_name[iter->id()] = iter->NameWithID();
|
||||||
|
op_parallel_workers[iter->id()] = iter->num_workers();
|
||||||
}
|
}
|
||||||
#if defined(USING_LINUX)
|
#if defined(USING_LINUX)
|
||||||
cpu_processor_num = get_nprocs_conf();
|
cpu_processor_num = get_nprocs_conf();
|
||||||
|
@ -327,6 +350,37 @@ Status OperatorCpu::Collect(ExecutionTree *tree) {
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Status OperatorCpu::Analyze(std::string *name, double *utilization, std::string *extra_message) {
|
||||||
|
int total_samples = cpu_op_util_.size();
|
||||||
|
|
||||||
|
// Only analyze the middle half of the samples
|
||||||
|
// Starting and ending may be impacted by startup or ending pipeline activities
|
||||||
|
int start_analyze = total_samples / 4;
|
||||||
|
int end_analyze = total_samples - start_analyze;
|
||||||
|
double op_util;
|
||||||
|
*utilization = 0;
|
||||||
|
|
||||||
|
// start loop from 0 was as don't want to analyze op -1
|
||||||
|
for (auto op_id = 0; op_id < id_count; op_id++) {
|
||||||
|
int sum = 0;
|
||||||
|
int index = op_id + 1;
|
||||||
|
for (int i = start_analyze; i < end_analyze; i++) {
|
||||||
|
sum += cpu_op_util_[i][index].user_utilization_;
|
||||||
|
sum += cpu_op_util_[i][index].sys_utilization_;
|
||||||
|
}
|
||||||
|
if ((end_analyze - start_analyze) > 0) {
|
||||||
|
op_util = 1.0 * sum * cpu_processor_num / (op_parallel_workers[op_id] * (end_analyze - start_analyze));
|
||||||
|
}
|
||||||
|
if (op_util > *utilization) {
|
||||||
|
*utilization = op_util;
|
||||||
|
*name = op_name[op_id];
|
||||||
|
}
|
||||||
|
extra_message->append(op_name[op_id] + " utiliization per thread: " + std::to_string(op_util) + "% (" +
|
||||||
|
std::to_string(op_parallel_workers[op_id]) + " parallel_workers); ");
|
||||||
|
}
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
Status OperatorCpu::SaveToFile(const std::string &file_path) {
|
Status OperatorCpu::SaveToFile(const std::string &file_path) {
|
||||||
Path path = Path(file_path);
|
Path path = Path(file_path);
|
||||||
json output;
|
json output;
|
||||||
|
@ -453,6 +507,26 @@ Status ProcessCpu::Collect(ExecutionTree *tree) {
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Status ProcessCpu::Analyze(std::string *name, double *utilization, std::string *extra_message) {
|
||||||
|
*name = std::string("process_info");
|
||||||
|
int total_samples = process_util_.size();
|
||||||
|
int sum = 0;
|
||||||
|
// Only analyze the middle half of the samples
|
||||||
|
// Starting and ending may be impacted by startup or ending pipeline activities
|
||||||
|
int start_analyze = total_samples / 4;
|
||||||
|
int end_analyze = total_samples - start_analyze;
|
||||||
|
|
||||||
|
for (int i = start_analyze; i < end_analyze; i++) {
|
||||||
|
sum += process_util_[i].user_utilization_;
|
||||||
|
sum += process_util_[i].sys_utilization_;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((end_analyze - start_analyze) > 0) {
|
||||||
|
*utilization = sum / (end_analyze - start_analyze);
|
||||||
|
}
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
Status ProcessCpu::SaveToFile(const std::string &file_path) {
|
Status ProcessCpu::SaveToFile(const std::string &file_path) {
|
||||||
Path path = Path(file_path);
|
Path path = Path(file_path);
|
||||||
json output;
|
json output;
|
||||||
|
@ -529,6 +603,37 @@ Status CpuSampling::SaveSamplingItervalToFile() {
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Analyze profiling data and output warning messages
|
||||||
|
Status CpuSampling::Analyze() {
|
||||||
|
std::string name;
|
||||||
|
double utilization = 0;
|
||||||
|
|
||||||
|
// Keep track of specific information returned by differentn CPU sampling types
|
||||||
|
double total_utilization = 0;
|
||||||
|
double max_op_utilization = 0;
|
||||||
|
std::string max_op_name;
|
||||||
|
std::string detailed_op_cpu_message;
|
||||||
|
|
||||||
|
// Save cpu information to json file
|
||||||
|
for (auto cpu : cpu_) {
|
||||||
|
std::string extra_message;
|
||||||
|
RETURN_IF_NOT_OK(cpu->Analyze(&name, &utilization, &extra_message));
|
||||||
|
if (name == "device_info") {
|
||||||
|
total_utilization = utilization;
|
||||||
|
} else if (name != "process_info") {
|
||||||
|
max_op_utilization = utilization;
|
||||||
|
max_op_name = name;
|
||||||
|
detailed_op_cpu_message = extra_message;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ((total_utilization < 90) && (max_op_utilization > 80)) {
|
||||||
|
MS_LOG(WARNING) << "Operator " << max_op_name << " is using " << max_op_utilization << "% CPU per thread. "
|
||||||
|
<< "This operator may benefit from increasing num_parallel_workers."
|
||||||
|
<< "Full Operator CPU utiliization for all operators: " << detailed_op_cpu_message << std::endl;
|
||||||
|
}
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
// Save profiling data to file
|
// Save profiling data to file
|
||||||
Status CpuSampling::SaveToFile() {
|
Status CpuSampling::SaveToFile() {
|
||||||
// Save time stamp to json file
|
// Save time stamp to json file
|
||||||
|
|
|
@ -71,6 +71,7 @@ class BaseCpu {
|
||||||
// Collect CPU information
|
// Collect CPU information
|
||||||
virtual Status Collect(ExecutionTree *tree) = 0;
|
virtual Status Collect(ExecutionTree *tree) = 0;
|
||||||
virtual Status SaveToFile(const std::string &file_path) = 0;
|
virtual Status SaveToFile(const std::string &file_path) = 0;
|
||||||
|
virtual Status Analyze(std::string *name, double *utilization, std::string *extra_message) = 0;
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
std::vector<CpuUtil> cpu_util_;
|
std::vector<CpuUtil> cpu_util_;
|
||||||
|
@ -90,6 +91,7 @@ class DeviceCpu : public BaseCpu {
|
||||||
~DeviceCpu() = default;
|
~DeviceCpu() = default;
|
||||||
Status Collect(ExecutionTree *tree) override;
|
Status Collect(ExecutionTree *tree) override;
|
||||||
Status SaveToFile(const std::string &file_path) override;
|
Status SaveToFile(const std::string &file_path) override;
|
||||||
|
Status Analyze(std::string *name, double *utilization, std::string *extra_message) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Get CPU information, include use/sys/idle/io utilization
|
// Get CPU information, include use/sys/idle/io utilization
|
||||||
|
@ -115,6 +117,11 @@ class OperatorCpu : public BaseCpu {
|
||||||
~OperatorCpu() = default;
|
~OperatorCpu() = default;
|
||||||
Status Collect(ExecutionTree *tree) override;
|
Status Collect(ExecutionTree *tree) override;
|
||||||
Status SaveToFile(const std::string &file_path) override;
|
Status SaveToFile(const std::string &file_path) override;
|
||||||
|
// Analyze will output the name of the metric, the avg utiliization of highest
|
||||||
|
// object within the class and any extra message that would be useful for the user.
|
||||||
|
// The Higher level CPUSampling class will combine information from different classes
|
||||||
|
// to decide if warning should be output.
|
||||||
|
Status Analyze(std::string *name, double *utilization, std::string *extra_message) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Get cpu information, include use/sys/idle/io utilization
|
// Get cpu information, include use/sys/idle/io utilization
|
||||||
|
@ -131,6 +138,8 @@ class OperatorCpu : public BaseCpu {
|
||||||
|
|
||||||
// Store the id and its corresponding threads.
|
// Store the id and its corresponding threads.
|
||||||
std::unordered_map<int32_t, std::vector<pid_t>> op_thread;
|
std::unordered_map<int32_t, std::vector<pid_t>> op_thread;
|
||||||
|
std::unordered_map<int32_t, std::string> op_name;
|
||||||
|
std::unordered_map<int32_t, int32_t> op_parallel_workers;
|
||||||
std::unordered_map<int32_t, std::unordered_map<int64_t, CpuOpStat>> pre_op_stat_;
|
std::unordered_map<int32_t, std::unordered_map<int64_t, CpuOpStat>> pre_op_stat_;
|
||||||
uint64_t pre_total_stat_;
|
uint64_t pre_total_stat_;
|
||||||
int32_t id_count = 0;
|
int32_t id_count = 0;
|
||||||
|
@ -143,6 +152,7 @@ class ProcessCpu : public BaseCpu {
|
||||||
~ProcessCpu() = default;
|
~ProcessCpu() = default;
|
||||||
Status Collect(ExecutionTree *tree) override;
|
Status Collect(ExecutionTree *tree) override;
|
||||||
Status SaveToFile(const std::string &file_path) override;
|
Status SaveToFile(const std::string &file_path) override;
|
||||||
|
Status Analyze(std::string *name, double *utilization, std::string *extra_message) override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
// Get CPU information, include use/sys/idle/io utilization
|
// Get CPU information, include use/sys/idle/io utilization
|
||||||
|
@ -183,6 +193,9 @@ class CpuSampling : public Sampling {
|
||||||
// Change file mode after save CPU data
|
// Change file mode after save CPU data
|
||||||
Status ChangeFileMode() override;
|
Status ChangeFileMode() override;
|
||||||
|
|
||||||
|
// Analyze sampling data and print message to log
|
||||||
|
Status Analyze() override;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Status CollectTimeStamp();
|
Status CollectTimeStamp();
|
||||||
|
|
||||||
|
|
|
@ -45,6 +45,7 @@ Status Monitor::operator()() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Output all profiling data upon request.
|
// Output all profiling data upon request.
|
||||||
|
RETURN_IF_NOT_OK(tree_->GetProfilingManager()->Analyze());
|
||||||
RETURN_IF_NOT_OK(tree_->GetProfilingManager()->SaveProfilingData());
|
RETURN_IF_NOT_OK(tree_->GetProfilingManager()->SaveProfilingData());
|
||||||
RETURN_IF_NOT_OK(tree_->GetProfilingManager()->ChangeFileMode());
|
RETURN_IF_NOT_OK(tree_->GetProfilingManager()->ChangeFileMode());
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
|
|
|
@ -157,6 +157,16 @@ Status ProfilingManager::SaveProfilingData() {
|
||||||
MS_LOG(INFO) << "Save profiling data end.";
|
MS_LOG(INFO) << "Save profiling data end.";
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
|
Status ProfilingManager::Analyze() {
|
||||||
|
if (!IsProfilingEnable()) {
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
MS_LOG(INFO) << "Start to analyze profiling data.";
|
||||||
|
for (auto node : sampling_nodes_) {
|
||||||
|
RETURN_IF_NOT_OK(node.second->Analyze());
|
||||||
|
}
|
||||||
|
return Status::OK();
|
||||||
|
}
|
||||||
|
|
||||||
Status ProfilingManager::ChangeFileMode() {
|
Status ProfilingManager::ChangeFileMode() {
|
||||||
if (!IsProfilingEnable()) {
|
if (!IsProfilingEnable()) {
|
||||||
|
|
|
@ -65,6 +65,7 @@ class Sampling : public Profiling {
|
||||||
// Sampling action function. This function will be invoked by performance monitor thread.
|
// Sampling action function. This function will be invoked by performance monitor thread.
|
||||||
virtual Status Sample() = 0;
|
virtual Status Sample() = 0;
|
||||||
// virtual Status TestPrint() = 0;
|
// virtual Status TestPrint() = 0;
|
||||||
|
virtual Status Analyze() = 0;
|
||||||
virtual ~Sampling() = default;
|
virtual ~Sampling() = default;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -118,6 +119,9 @@ class ProfilingManager {
|
||||||
|
|
||||||
Status ChangeFileMode();
|
Status ChangeFileMode();
|
||||||
|
|
||||||
|
// Analyze profile data and print warning messages
|
||||||
|
Status Analyze();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
std::unique_ptr<Monitor> perf_monitor_;
|
std::unique_ptr<Monitor> perf_monitor_;
|
||||||
bool enabled_;
|
bool enabled_;
|
||||||
|
|
Loading…
Reference in New Issue