From 597dde543cc92b6e066e965bfc0b254cb2a4f822 Mon Sep 17 00:00:00 2001 From: jonyguo Date: Fri, 4 Dec 2020 10:56:57 +0800 Subject: [PATCH] output warning info when memroy > 95% in batchop timestamp --- .../dataset/engine/datasetops/batch_op.cc | 10 ++++ .../ccsrc/minddata/dataset/util/status.cc | 55 +++++++++++++++++++ .../ccsrc/minddata/dataset/util/status.h | 6 ++ 3 files changed, 71 insertions(+) diff --git a/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc b/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc index 60559c86409..6672d75e60c 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc +++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/batch_op.cc @@ -26,6 +26,7 @@ #include "minddata/dataset/engine/db_connector.h" #include "minddata/dataset/engine/opt/pass.h" #include "minddata/dataset/kernels/data/data_utils.h" +#include "minddata/dataset/util/status.h" namespace mindspore { namespace dataset { @@ -131,6 +132,15 @@ Status BatchOp::operator()() { worker_queues_[cnt++ % num_workers_]->EmplaceBack(std::make_pair(nullptr, CBatchInfo(batchCtrl::kEOE)))); RETURN_IF_NOT_OK(GetBatchSize(&cur_batch_size, CBatchInfo(epoch_num, batch_num, cnt - epoch_num))); RETURN_IF_NOT_OK(child_iterator_->FetchNextTensorRow(&new_row)); + +#if !defined(_WIN32) && !defined(_WIN64) + if ((num_workers_ > 1 || batch_map_func_) && GetMemoryUsage() > MAX_MEMORY_USAGE_THRESHOLD) { + MS_LOG(WARNING) << "Memory consumption is more than " << MAX_MEMORY_USAGE_THRESHOLD * 100 << "%, " + << "which may cause oom error. Please reduce num_parallel_workers size / " + << "optimize per_batch_map function / other python data preprocess function to " + << "reduce memory usage."; + } +#endif } // end of eof_handled() == false RETURN_IF_NOT_OK( worker_queues_[cnt++ % num_workers_]->EmplaceBack(std::make_pair(nullptr, CBatchInfo(batchCtrl::kEOF)))); diff --git a/mindspore/ccsrc/minddata/dataset/util/status.cc b/mindspore/ccsrc/minddata/dataset/util/status.cc index 43fc3ffe7ad..692676b7068 100644 --- a/mindspore/ccsrc/minddata/dataset/util/status.cc +++ b/mindspore/ccsrc/minddata/dataset/util/status.cc @@ -15,7 +15,12 @@ */ #include "minddata/dataset/util/status.h" #include +#include +#include +#include +#include #include "utils/ms_utils.h" +#include "./securec.h" #ifndef ENABLE_ANDROID #include "minddata/dataset/util/task_manager.h" @@ -139,5 +144,55 @@ std::ostream &operator<<(std::ostream &os, const Status &s) { std::string Status::ToString() const { return err_msg_; } StatusCode Status::get_code() const { return code_; } + +#if !defined(_WIN32) && !defined(_WIN64) +float GetMemoryUsage() { + char buf[128] = {0}; + + FILE *fd; + fd = fopen("/proc/meminfo", "r"); + if (fd == nullptr) { + MS_LOG(WARNING) << "The meminfo file: /proc/meminfo is opened failed."; + return 0.0; + } + + uint32_t status_count = 0; + uint64_t mem_total = 0L; + uint64_t mem_available = 0L; + while (fgets(buf, sizeof(buf), fd)) { + if (status_count == 2) { // get MemTotal and MemAvailable yet + break; + } + + // get title + std::string line(buf); + std::string::size_type position = line.find(":"); + std::string title = line.substr(0, position); + + // get the value when MemTotal or MemAvailable + if (title == "MemTotal") { + std::string::size_type pos1 = line.find_last_of(" "); + std::string::size_type pos2 = line.find_last_of(" ", pos1 - 1); + mem_total = atol(line.substr(pos2, pos1 - pos2).c_str()); + status_count++; + } else if (title == "MemAvailable") { + std::string::size_type pos1 = line.find_last_of(" "); + std::string::size_type pos2 = line.find_last_of(" ", pos1 - 1); + mem_available = atol(line.substr(pos2, pos1 - pos2).c_str()); + status_count++; + } + + (void)memset_s(buf, sizeof(buf), 0, sizeof(buf)); + } + fclose(fd); + + if (status_count != 2 || mem_total == 0 || mem_available > mem_total) { + MS_LOG(WARNING) << "Get memory usage failed."; + return 0.0; + } + + return (1.0 - static_cast(static_cast(mem_available) / static_cast(mem_total))); +} +#endif } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/util/status.h b/mindspore/ccsrc/minddata/dataset/util/status.h index 731143c76a8..82857e2363e 100644 --- a/mindspore/ccsrc/minddata/dataset/util/status.h +++ b/mindspore/ccsrc/minddata/dataset/util/status.h @@ -167,6 +167,12 @@ class Status { StatusCode code_; std::string err_msg_; }; + +#if !defined(_WIN32) && !defined(_WIN64) +const float MAX_MEMORY_USAGE_THRESHOLD = 0.95; + +float GetMemoryUsage(); +#endif } // namespace dataset } // namespace mindspore #endif // MINDSPORE_CCSRC_MINDDATA_DATASET_UTIL_STATUS_H_