!27633 Parallelize calculating tensor statistics in tensor_summary

Merge pull request !27633 from Jimmy Qi/timing
This commit is contained in:
i-robot 2021-12-15 07:53:58 +00:00 committed by Gitee
commit daae93cff3
3 changed files with 58 additions and 5 deletions

View File

@ -16,6 +16,7 @@
#include <cmath>
#include <algorithm>
#include <future>
#include <limits>
#include <memory>
#include <bitset>
@ -160,7 +161,59 @@ void TensorSummary<T>::TensorStatistics(DbgDataType dtype_value) {
if (dtype_value == DT_BOOL) {
is_bool_ = true;
}
double sum_elements = 0.0;
const int default_threads = 32;
const int default_elements_per_thread = 10000;
if (num_elements_ <= default_elements_per_thread) {
return TensorStatisticsSingleThread();
}
int desired_threads = num_elements_ / default_elements_per_thread;
int actual_threads = std::min(desired_threads, default_threads);
int actual_elements_per_thread = num_elements_ / actual_threads;
// Use multithread to calculate statistic on chunks of data
void *previous_tensor_ptr = nullptr;
size_t offset = 0;
std::vector<std::unique_ptr<TensorSummary<T>>> summary_vec;
std::vector<std::future<void>> summary_future_vec;
for (int i = 0; i < actual_threads; i++) {
int num_elements_for_thread;
if (i == actual_threads - 1) {
num_elements_for_thread = num_elements_ - offset;
} else {
num_elements_for_thread = actual_elements_per_thread;
}
summary_vec.emplace_back(std::make_unique<TensorSummary<T>>(current_tensor_ptr_ + offset, previous_tensor_ptr,
num_elements_for_thread, 0));
summary_future_vec.emplace_back(
std::async(std::launch::async, &TensorSummary<T>::TensorStatisticsSingleThread, summary_vec[i].get()));
offset += num_elements_for_thread;
}
// Aggregate results of all chunks
num_elements_ = 0; // Let current tensor weight 0 in the aggregation
for (unsigned int i = 0; i < summary_future_vec.size(); i++) {
summary_future_vec[i].wait();
summary_future_vec[i].get();
auto &cur_summary = *(summary_vec[i]);
num_elements_ += cur_summary.num_elements_;
min_ = std::min(min_, cur_summary.min_);
max_ = std::max(max_, cur_summary.max_);
double avg_delta = cur_summary.avg_ - avg_;
avg_ += avg_delta * (cur_summary.num_elements_ / num_elements_);
neg_zero_count_ += cur_summary.neg_zero_count_;
pos_zero_count_ += cur_summary.pos_zero_count_;
neg_inf_count_ += cur_summary.neg_inf_count_;
pos_inf_count_ += cur_summary.pos_inf_count_;
inf_count_ += cur_summary.inf_count_;
nan_count_ += cur_summary.nan_count_;
zero_count_ += cur_summary.zero_count_;
}
}
template <typename T>
void TensorSummary<T>::TensorStatisticsSingleThread() {
MeanCalculator mean_calc = MeanCalculator();
for (size_t i = 0; i < num_elements_; ++i) {
auto current_value = static_cast<double>(current_tensor_ptr_[i]);
if (std::isinf(current_value)) {
@ -185,11 +238,10 @@ void TensorSummary<T>::TensorStatistics(DbgDataType dtype_value) {
}
max_ = std::max(max_, current_value);
min_ = std::min(min_, current_value);
sum_elements += current_value;
mean_calc.ProcessElement(current_value);
}
}
unsigned int value_count = zero_count_ + neg_zero_count_ + pos_zero_count_;
avg_ = sum_elements / value_count;
avg_ = mean_calc.GetMean();
}
template <typename T>

View File

@ -161,6 +161,7 @@ class TensorSummary : public ITensorSummary {
double_t StatLookup(const DebugServices::watchpoint_t &);
double_t StatLookup(const std::string &, const DebugServices::watchpoint_t &);
double_t GetZeroValPercent();
void TensorStatisticsSingleThread();
void InitCalculators(const std::vector<DebugServices::watchpoint_t> &);
};
#ifdef ONLINE_DBG_MODE

View File

@ -27,7 +27,7 @@
"is_bool": false,
"max_vaue": 10.0,
"min_value": -11.0,
"avg_value": 0.880000114440918,
"avg_value": 0.8800001144409179,
"count": 6,
"neg_zero_count": 2,
"pos_zero_count": 3,