!16564 adjust default monitoring sampling interval and change output save pattern
From: @ziruiwu Reviewed-by: Signed-off-by:
This commit is contained in:
commit
878fbdaf61
|
@ -35,14 +35,23 @@ Status Monitor::operator()() {
|
|||
// Keep sampling if
|
||||
// 1) Monitor Task is not interrupted by TaskManager AND
|
||||
// 2) Iterator has not received EOF
|
||||
|
||||
// this will trigger a save on 2min, 4min, 8min, 16min ... mark on top of the save per_epoch
|
||||
// The idea is whenever training is interrupted, you will get at least half of the sampling data during training
|
||||
int64_t save_interval = 1 + (120 * 1000 / sampling_interval_);
|
||||
int64_t loop_cnt = 1;
|
||||
while (!this_thread::is_interrupted() && !(tree_->isFinished()) && !(cfg->stop_profiler_status())) {
|
||||
if (tree_->IsEpochEnd()) {
|
||||
RETURN_IF_NOT_OK(tree_->GetProfilingManager()->SaveProfilingData());
|
||||
tree_->SetExecuting();
|
||||
} else if (loop_cnt % save_interval == 0) {
|
||||
RETURN_IF_NOT_OK(tree_->GetProfilingManager()->SaveProfilingData());
|
||||
}
|
||||
for (auto &node : tree_->GetProfilingManager()->GetSamplingNodes()) {
|
||||
RETURN_IF_NOT_OK(node.second->Sample());
|
||||
}
|
||||
if (loop_cnt % save_interval == 0) save_interval *= 2;
|
||||
loop_cnt += 1;
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(sampling_interval_));
|
||||
}
|
||||
|
||||
|
|
|
@ -102,8 +102,8 @@ constexpr uint32_t kCfgWorkerConnectorSize = 16;
|
|||
constexpr uint32_t kCfgOpConnectorSize = 16;
|
||||
constexpr int32_t kCfgDefaultRankId = -1;
|
||||
constexpr uint32_t kCfgDefaultSeed = std::mt19937::default_seed;
|
||||
constexpr uint32_t kCfgMonitorSamplingInterval = 10;
|
||||
constexpr uint32_t kCfgCallbackTimeout = 60; // timeout value for callback in seconds
|
||||
constexpr uint32_t kCfgMonitorSamplingInterval = 1000; // timeout value for sampling interval in milliseconds
|
||||
constexpr uint32_t kCfgCallbackTimeout = 60; // timeout value for callback in seconds
|
||||
constexpr int32_t kCfgDefaultCachePort = 50052;
|
||||
constexpr char kCfgDefaultCacheHost[] = "127.0.0.1";
|
||||
constexpr int32_t kDftPrefetchSize = 20;
|
||||
|
|
Loading…
Reference in New Issue