forked from mindspore-Ecosystem/mindspore
!27633 Parallelize calculating tensor statistics in tensor_summary
Merge pull request !27633 from Jimmy Qi/timing
This commit is contained in:
commit
daae93cff3
|
@ -16,6 +16,7 @@
|
|||
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
#include <future>
|
||||
#include <limits>
|
||||
#include <memory>
|
||||
#include <bitset>
|
||||
|
@ -160,7 +161,59 @@ void TensorSummary<T>::TensorStatistics(DbgDataType dtype_value) {
|
|||
if (dtype_value == DT_BOOL) {
|
||||
is_bool_ = true;
|
||||
}
|
||||
double sum_elements = 0.0;
|
||||
const int default_threads = 32;
|
||||
const int default_elements_per_thread = 10000;
|
||||
|
||||
if (num_elements_ <= default_elements_per_thread) {
|
||||
return TensorStatisticsSingleThread();
|
||||
}
|
||||
int desired_threads = num_elements_ / default_elements_per_thread;
|
||||
int actual_threads = std::min(desired_threads, default_threads);
|
||||
int actual_elements_per_thread = num_elements_ / actual_threads;
|
||||
|
||||
// Use multithread to calculate statistic on chunks of data
|
||||
void *previous_tensor_ptr = nullptr;
|
||||
size_t offset = 0;
|
||||
std::vector<std::unique_ptr<TensorSummary<T>>> summary_vec;
|
||||
std::vector<std::future<void>> summary_future_vec;
|
||||
for (int i = 0; i < actual_threads; i++) {
|
||||
int num_elements_for_thread;
|
||||
if (i == actual_threads - 1) {
|
||||
num_elements_for_thread = num_elements_ - offset;
|
||||
} else {
|
||||
num_elements_for_thread = actual_elements_per_thread;
|
||||
}
|
||||
summary_vec.emplace_back(std::make_unique<TensorSummary<T>>(current_tensor_ptr_ + offset, previous_tensor_ptr,
|
||||
num_elements_for_thread, 0));
|
||||
summary_future_vec.emplace_back(
|
||||
std::async(std::launch::async, &TensorSummary<T>::TensorStatisticsSingleThread, summary_vec[i].get()));
|
||||
offset += num_elements_for_thread;
|
||||
}
|
||||
|
||||
// Aggregate results of all chunks
|
||||
num_elements_ = 0; // Let current tensor weight 0 in the aggregation
|
||||
for (unsigned int i = 0; i < summary_future_vec.size(); i++) {
|
||||
summary_future_vec[i].wait();
|
||||
summary_future_vec[i].get();
|
||||
auto &cur_summary = *(summary_vec[i]);
|
||||
num_elements_ += cur_summary.num_elements_;
|
||||
min_ = std::min(min_, cur_summary.min_);
|
||||
max_ = std::max(max_, cur_summary.max_);
|
||||
double avg_delta = cur_summary.avg_ - avg_;
|
||||
avg_ += avg_delta * (cur_summary.num_elements_ / num_elements_);
|
||||
neg_zero_count_ += cur_summary.neg_zero_count_;
|
||||
pos_zero_count_ += cur_summary.pos_zero_count_;
|
||||
neg_inf_count_ += cur_summary.neg_inf_count_;
|
||||
pos_inf_count_ += cur_summary.pos_inf_count_;
|
||||
inf_count_ += cur_summary.inf_count_;
|
||||
nan_count_ += cur_summary.nan_count_;
|
||||
zero_count_ += cur_summary.zero_count_;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void TensorSummary<T>::TensorStatisticsSingleThread() {
|
||||
MeanCalculator mean_calc = MeanCalculator();
|
||||
for (size_t i = 0; i < num_elements_; ++i) {
|
||||
auto current_value = static_cast<double>(current_tensor_ptr_[i]);
|
||||
if (std::isinf(current_value)) {
|
||||
|
@ -185,11 +238,10 @@ void TensorSummary<T>::TensorStatistics(DbgDataType dtype_value) {
|
|||
}
|
||||
max_ = std::max(max_, current_value);
|
||||
min_ = std::min(min_, current_value);
|
||||
sum_elements += current_value;
|
||||
mean_calc.ProcessElement(current_value);
|
||||
}
|
||||
}
|
||||
unsigned int value_count = zero_count_ + neg_zero_count_ + pos_zero_count_;
|
||||
avg_ = sum_elements / value_count;
|
||||
avg_ = mean_calc.GetMean();
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
|
|
@ -161,6 +161,7 @@ class TensorSummary : public ITensorSummary {
|
|||
double_t StatLookup(const DebugServices::watchpoint_t &);
|
||||
double_t StatLookup(const std::string &, const DebugServices::watchpoint_t &);
|
||||
double_t GetZeroValPercent();
|
||||
void TensorStatisticsSingleThread();
|
||||
void InitCalculators(const std::vector<DebugServices::watchpoint_t> &);
|
||||
};
|
||||
#ifdef ONLINE_DBG_MODE
|
||||
|
|
|
@ -27,7 +27,7 @@
|
|||
"is_bool": false,
|
||||
"max_vaue": 10.0,
|
||||
"min_value": -11.0,
|
||||
"avg_value": 0.880000114440918,
|
||||
"avg_value": 0.8800001144409179,
|
||||
"count": 6,
|
||||
"neg_zero_count": 2,
|
||||
"pos_zero_count": 3,
|
||||
|
|
Loading…
Reference in New Issue