forked from mindspore-Ecosystem/mindspore
!27633 Parallelize calculating tensor statistics in tensor_summary
Merge pull request !27633 from Jimmy Qi/timing
This commit is contained in:
commit
daae93cff3
|
@ -16,6 +16,7 @@
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <future>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <bitset>
|
#include <bitset>
|
||||||
|
@ -160,7 +161,59 @@ void TensorSummary<T>::TensorStatistics(DbgDataType dtype_value) {
|
||||||
if (dtype_value == DT_BOOL) {
|
if (dtype_value == DT_BOOL) {
|
||||||
is_bool_ = true;
|
is_bool_ = true;
|
||||||
}
|
}
|
||||||
double sum_elements = 0.0;
|
const int default_threads = 32;
|
||||||
|
const int default_elements_per_thread = 10000;
|
||||||
|
|
||||||
|
if (num_elements_ <= default_elements_per_thread) {
|
||||||
|
return TensorStatisticsSingleThread();
|
||||||
|
}
|
||||||
|
int desired_threads = num_elements_ / default_elements_per_thread;
|
||||||
|
int actual_threads = std::min(desired_threads, default_threads);
|
||||||
|
int actual_elements_per_thread = num_elements_ / actual_threads;
|
||||||
|
|
||||||
|
// Use multithread to calculate statistic on chunks of data
|
||||||
|
void *previous_tensor_ptr = nullptr;
|
||||||
|
size_t offset = 0;
|
||||||
|
std::vector<std::unique_ptr<TensorSummary<T>>> summary_vec;
|
||||||
|
std::vector<std::future<void>> summary_future_vec;
|
||||||
|
for (int i = 0; i < actual_threads; i++) {
|
||||||
|
int num_elements_for_thread;
|
||||||
|
if (i == actual_threads - 1) {
|
||||||
|
num_elements_for_thread = num_elements_ - offset;
|
||||||
|
} else {
|
||||||
|
num_elements_for_thread = actual_elements_per_thread;
|
||||||
|
}
|
||||||
|
summary_vec.emplace_back(std::make_unique<TensorSummary<T>>(current_tensor_ptr_ + offset, previous_tensor_ptr,
|
||||||
|
num_elements_for_thread, 0));
|
||||||
|
summary_future_vec.emplace_back(
|
||||||
|
std::async(std::launch::async, &TensorSummary<T>::TensorStatisticsSingleThread, summary_vec[i].get()));
|
||||||
|
offset += num_elements_for_thread;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Aggregate results of all chunks
|
||||||
|
num_elements_ = 0; // Let current tensor weight 0 in the aggregation
|
||||||
|
for (unsigned int i = 0; i < summary_future_vec.size(); i++) {
|
||||||
|
summary_future_vec[i].wait();
|
||||||
|
summary_future_vec[i].get();
|
||||||
|
auto &cur_summary = *(summary_vec[i]);
|
||||||
|
num_elements_ += cur_summary.num_elements_;
|
||||||
|
min_ = std::min(min_, cur_summary.min_);
|
||||||
|
max_ = std::max(max_, cur_summary.max_);
|
||||||
|
double avg_delta = cur_summary.avg_ - avg_;
|
||||||
|
avg_ += avg_delta * (cur_summary.num_elements_ / num_elements_);
|
||||||
|
neg_zero_count_ += cur_summary.neg_zero_count_;
|
||||||
|
pos_zero_count_ += cur_summary.pos_zero_count_;
|
||||||
|
neg_inf_count_ += cur_summary.neg_inf_count_;
|
||||||
|
pos_inf_count_ += cur_summary.pos_inf_count_;
|
||||||
|
inf_count_ += cur_summary.inf_count_;
|
||||||
|
nan_count_ += cur_summary.nan_count_;
|
||||||
|
zero_count_ += cur_summary.zero_count_;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void TensorSummary<T>::TensorStatisticsSingleThread() {
|
||||||
|
MeanCalculator mean_calc = MeanCalculator();
|
||||||
for (size_t i = 0; i < num_elements_; ++i) {
|
for (size_t i = 0; i < num_elements_; ++i) {
|
||||||
auto current_value = static_cast<double>(current_tensor_ptr_[i]);
|
auto current_value = static_cast<double>(current_tensor_ptr_[i]);
|
||||||
if (std::isinf(current_value)) {
|
if (std::isinf(current_value)) {
|
||||||
|
@ -185,11 +238,10 @@ void TensorSummary<T>::TensorStatistics(DbgDataType dtype_value) {
|
||||||
}
|
}
|
||||||
max_ = std::max(max_, current_value);
|
max_ = std::max(max_, current_value);
|
||||||
min_ = std::min(min_, current_value);
|
min_ = std::min(min_, current_value);
|
||||||
sum_elements += current_value;
|
mean_calc.ProcessElement(current_value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
unsigned int value_count = zero_count_ + neg_zero_count_ + pos_zero_count_;
|
avg_ = mean_calc.GetMean();
|
||||||
avg_ = sum_elements / value_count;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
|
|
|
@ -161,6 +161,7 @@ class TensorSummary : public ITensorSummary {
|
||||||
double_t StatLookup(const DebugServices::watchpoint_t &);
|
double_t StatLookup(const DebugServices::watchpoint_t &);
|
||||||
double_t StatLookup(const std::string &, const DebugServices::watchpoint_t &);
|
double_t StatLookup(const std::string &, const DebugServices::watchpoint_t &);
|
||||||
double_t GetZeroValPercent();
|
double_t GetZeroValPercent();
|
||||||
|
void TensorStatisticsSingleThread();
|
||||||
void InitCalculators(const std::vector<DebugServices::watchpoint_t> &);
|
void InitCalculators(const std::vector<DebugServices::watchpoint_t> &);
|
||||||
};
|
};
|
||||||
#ifdef ONLINE_DBG_MODE
|
#ifdef ONLINE_DBG_MODE
|
||||||
|
|
|
@ -27,7 +27,7 @@
|
||||||
"is_bool": false,
|
"is_bool": false,
|
||||||
"max_vaue": 10.0,
|
"max_vaue": 10.0,
|
||||||
"min_value": -11.0,
|
"min_value": -11.0,
|
||||||
"avg_value": 0.880000114440918,
|
"avg_value": 0.8800001144409179,
|
||||||
"count": 6,
|
"count": 6,
|
||||||
"neg_zero_count": 2,
|
"neg_zero_count": 2,
|
||||||
"pos_zero_count": 3,
|
"pos_zero_count": 3,
|
||||||
|
|
Loading…
Reference in New Issue