forked from mindspore-Ecosystem/mindspore
!11582 dataset: add cpu utilization profiling
From: @ms_yan Reviewed-by: Signed-off-by:
This commit is contained in:
commit
7ec4aa92c7
|
@ -13,7 +13,6 @@
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "minddata/dataset/api/python/pybind_conversion.h"
|
||||
|
||||
namespace mindspore {
|
||||
|
@ -63,6 +62,25 @@ std::vector<std::string> toStringVector(const py::list list) {
|
|||
return vector;
|
||||
}
|
||||
|
||||
std::vector<pid_t> toIntVector(const py::list input_list) {
|
||||
std::vector<pid_t> vector;
|
||||
if (!input_list.empty()) {
|
||||
std::transform(input_list.begin(), input_list.end(), std::back_inserter(vector),
|
||||
[&](const py::handle &handle) { return static_cast<pid_t>(toInt(handle)); });
|
||||
}
|
||||
return vector;
|
||||
}
|
||||
|
||||
std::unordered_map<int32_t, std::vector<pid_t>> toIntMap(const py::dict input_dict) {
|
||||
std::unordered_map<int32_t, std::vector<pid_t>> map;
|
||||
if (!input_dict.empty()) {
|
||||
for (auto p : input_dict) {
|
||||
(void)map.emplace(toInt(p.first), toIntVector(py::reinterpret_borrow<py::list>(p.second)));
|
||||
}
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
std::pair<int64_t, int64_t> toIntPair(const py::tuple tuple) {
|
||||
std::pair<int64_t, int64_t> pair;
|
||||
if (!tuple.empty()) {
|
||||
|
|
|
@ -17,11 +17,13 @@
|
|||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_API_PYTHON_PYBIND_CONVERSION_H_
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_API_PYTHON_PYBIND_CONVERSION_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include "pybind11/pybind11.h"
|
||||
#include "pybind11/stl.h"
|
||||
|
@ -53,6 +55,10 @@ std::map<std::string, int32_t> toStringMap(const py::dict dict);
|
|||
|
||||
std::vector<std::string> toStringVector(const py::list list);
|
||||
|
||||
std::vector<pid_t> toIntVector(const py::list input_list);
|
||||
|
||||
std::unordered_map<int32_t, std::vector<pid_t>> toIntMap(const py::dict input_dict);
|
||||
|
||||
std::pair<int64_t, int64_t> toIntPair(const py::tuple tuple);
|
||||
|
||||
std::vector<std::pair<int, int>> toPairVector(const py::list list);
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
#include "minddata/dataset/engine/execution_tree.h"
|
||||
#include "minddata/dataset/util/status.h"
|
||||
#include "minddata/dataset/engine/datasetops/dataset_op.h"
|
||||
#include "minddata/dataset/engine/perf/profiling.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
@ -185,7 +186,8 @@ Status DatasetIterator::FetchNextTensorRow(TensorRow *out_row) {
|
|||
RETURN_IF_NOT_OK(curr_buffer_->PopRow(out_row));
|
||||
if (tracing_ != nullptr) {
|
||||
cur_batch_num_++;
|
||||
tracing_->Record(CONNECTOR_DEPTH, cur_connector_capacity_, cur_batch_num_, cur_connector_size_);
|
||||
tracing_->Record(CONNECTOR_DEPTH, cur_connector_capacity_, cur_batch_num_, cur_connector_size_,
|
||||
ProfilingTime::GetCurMilliSecond());
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -265,7 +265,7 @@ Status BatchOp::LaunchThreadsAndInitOp() {
|
|||
}
|
||||
RETURN_IF_NOT_OK(worker_queues_.Register(tree_->AllTasks()));
|
||||
RETURN_IF_NOT_OK(
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&BatchOp::WorkerEntry, this, std::placeholders::_1), Name()));
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&BatchOp::WorkerEntry, this, std::placeholders::_1), Name(), id()));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
|
|
@ -45,8 +45,8 @@ Status BuildSentencePieceVocabOp::operator()() {
|
|||
return Status(StatusCode::kUnexpectedError, __LINE__, __FILE__, "Pipeline init failed, Execution tree not set.");
|
||||
}
|
||||
RETURN_IF_NOT_OK(sentence_queue_->Register(tree_->AllTasks()));
|
||||
RETURN_IF_NOT_OK(
|
||||
tree_->AllTasks()->CreateAsyncTask("sentenceTask", std::bind(&BuildSentencePieceVocabOp::SentenceThread, this)));
|
||||
RETURN_IF_NOT_OK(tree_->AllTasks()->CreateAsyncTask(
|
||||
"sentenceTask", std::bind(&BuildSentencePieceVocabOp::SentenceThread, this), nullptr, id()));
|
||||
TaskManager::FindMe()->Post();
|
||||
child_iterator_ = std::make_unique<ChildIterator>(this, 0, 0);
|
||||
TensorRow new_row;
|
||||
|
|
|
@ -86,8 +86,9 @@ Status BuildVocabOp::operator()() {
|
|||
RETURN_IF_NOT_OK(collector_queue_->Register(tree_->AllTasks()));
|
||||
// launch worker threads and collector thread
|
||||
RETURN_IF_NOT_OK(
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&BuildVocabOp::WorkerEntry, this, std::placeholders::_1)));
|
||||
RETURN_IF_NOT_OK(tree_->AllTasks()->CreateAsyncTask("collector", std::bind(&BuildVocabOp::CollectorThread, this)));
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&BuildVocabOp::WorkerEntry, this, std::placeholders::_1), "", id()));
|
||||
RETURN_IF_NOT_OK(
|
||||
tree_->AllTasks()->CreateAsyncTask("collector", std::bind(&BuildVocabOp::CollectorThread, this), nullptr, id()));
|
||||
TaskManager::FindMe()->Post();
|
||||
child_iterator_ = std::make_unique<ChildIterator>(this, 0, 0);
|
||||
TensorRow new_row;
|
||||
|
|
|
@ -65,7 +65,7 @@ Status CacheLookupOp::operator()() {
|
|||
RETURN_IF_NOT_OK(RegisterResources());
|
||||
// Kick off the workers
|
||||
RETURN_IF_NOT_OK(
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&CacheLookupOp::WorkerEntry, this, std::placeholders::_1)));
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&CacheLookupOp::WorkerEntry, this, std::placeholders::_1), "", id()));
|
||||
// required task group sync after launching workers
|
||||
TaskManager::FindMe()->Post();
|
||||
// We have to wait until the leaf op has handshake with us.
|
||||
|
|
|
@ -60,13 +60,14 @@ Status CacheMergeOp::operator()() {
|
|||
io_que_ = std::make_unique<Queue<row_id_type>>(queue_sz);
|
||||
RETURN_IF_NOT_OK(io_que_->Register(tree_->AllTasks()));
|
||||
RETURN_IF_NOT_OK(tree_->LaunchWorkers(
|
||||
num_workers_, std::bind(&CacheMergeOp::WorkerEntry, this, std::placeholders::_1), Name() + "::WorkerEntry"));
|
||||
num_workers_, std::bind(&CacheMergeOp::WorkerEntry, this, std::placeholders::_1), Name() + "::WorkerEntry", id()));
|
||||
RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_,
|
||||
std::bind(&CacheMergeOp::CacheMissWorkerEntry, this, std::placeholders::_1),
|
||||
Name() + "::CacheMissWorkerEntry"));
|
||||
Name() + "::CacheMissWorkerEntry", id()));
|
||||
// One dedicated thread to move TensorRow from the pool to the cache server
|
||||
for (auto i = 0; i < num_cleaners_; ++i) {
|
||||
RETURN_IF_NOT_OK(tree_->AllTasks()->CreateAsyncTask("Cleaner", std::bind(&CacheMergeOp::Cleaner, this)));
|
||||
RETURN_IF_NOT_OK(
|
||||
tree_->AllTasks()->CreateAsyncTask("Cleaner", std::bind(&CacheMergeOp::Cleaner, this), nullptr, id()));
|
||||
}
|
||||
TaskManager::FindMe()->Post();
|
||||
return Status::OK();
|
||||
|
|
|
@ -26,7 +26,6 @@
|
|||
#include "minddata/dataset/engine/dataset_iterator.h"
|
||||
#include "minddata/dataset/engine/datasetops/epoch_ctrl_op.h"
|
||||
#include "minddata/dataset/engine/opt/pass.h"
|
||||
#include "minddata/dataset/engine/perf/device_queue_tracing.h"
|
||||
#include "minddata/dataset/engine/perf/profiling.h"
|
||||
#include "minddata/dataset/util/status.h"
|
||||
#include "minddata/dataset/util/task_manager.h"
|
||||
|
@ -134,8 +133,8 @@ Status DeviceQueueOp::operator()() {
|
|||
Status DeviceQueueOp::SendDataToAscend() {
|
||||
MS_LOG(INFO) << "Device queue, sending data to Ascend.";
|
||||
int64_t send_batch = 0;
|
||||
double batch_start_time, end_time;
|
||||
int32_t batch_cost, tdt_cost;
|
||||
uint64_t batch_start_time, end_time;
|
||||
int32_t tdt_cost;
|
||||
int32_t connector_size = 0;
|
||||
int32_t connector_capacity;
|
||||
bool is_break_loop = false;
|
||||
|
@ -178,20 +177,8 @@ Status DeviceQueueOp::SendDataToAscend() {
|
|||
[](const std::shared_ptr<Tensor> &ts) { return std::make_pair(ts->type(), ts->shape()); });
|
||||
RETURN_IF_NOT_OK(data_info_queue_ptr_->Add(data_info));
|
||||
}
|
||||
|
||||
if (isProfilingEnable) {
|
||||
end_time = ProfilingTime::GetCurMilliSecond();
|
||||
// record push tdt time
|
||||
profiling_node->Record(TIME, TDT_PUSH_TIME, send_batch + 1, tdt_cost);
|
||||
batch_cost = (int32_t)(end_time - batch_start_time);
|
||||
// record batch time
|
||||
profiling_node->Record(TIME, BATCH_TIME, send_batch + 1, batch_cost);
|
||||
// record pipeline time
|
||||
profiling_node->Record(TIME, PIPELINE_TIME, send_batch + 1, batch_cost - tdt_cost);
|
||||
batch_start_time = end_time;
|
||||
// record connector depth
|
||||
profiling_node->Record(CONNECTOR_DEPTH, connector_capacity, send_batch + 1, connector_size);
|
||||
}
|
||||
ProfilingRecorder(isProfilingEnable, profiling_node, send_batch, tdt_cost, &batch_start_time, &end_time,
|
||||
connector_capacity, connector_size);
|
||||
send_batch++;
|
||||
|
||||
if (total_batch_ > 0 && send_batch >= total_batch_) {
|
||||
|
@ -273,9 +260,9 @@ Status DeviceQueueOp::LaunchParallelCopyThread() {
|
|||
receive_queues_.Init(num_workers_, queue_capacity_);
|
||||
RETURN_IF_NOT_OK(receive_queues_.Register(tree_->AllTasks()));
|
||||
RETURN_IF_NOT_OK(
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&DeviceQueueOp::WorkerEntry, this, std::placeholders::_1)));
|
||||
RETURN_IF_NOT_OK(
|
||||
tree_->AllTasks()->CreateAsyncTask("Push data to GPU queue", std::bind(&DeviceQueueOp::PushDataToGPU, this)));
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&DeviceQueueOp::WorkerEntry, this, std::placeholders::_1), "", id()));
|
||||
RETURN_IF_NOT_OK(tree_->AllTasks()->CreateAsyncTask("Push data to GPU queue",
|
||||
std::bind(&DeviceQueueOp::PushDataToGPU, this), nullptr, id()));
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
@ -285,8 +272,8 @@ Status DeviceQueueOp::PushDataToGPU() {
|
|||
// and will overload in distribute scenario, so don't remove this line
|
||||
cudaSetDevice(rank_id_);
|
||||
TaskManager::FindMe()->Post();
|
||||
double batch_start_time = 0.0;
|
||||
double end_time = 0.0;
|
||||
uint64_t batch_start_time = 0;
|
||||
uint64_t end_time = 0;
|
||||
int32_t batch_cost = 0;
|
||||
int32_t push_cost = 0;
|
||||
int32_t connector_size = 0;
|
||||
|
@ -345,15 +332,15 @@ Status DeviceQueueOp::PushDataToGPU() {
|
|||
if (isProfilingEnable) {
|
||||
end_time = ProfilingTime::GetCurMilliSecond();
|
||||
// record push data time
|
||||
profiling_node->Record(TIME, TDT_PUSH_TIME, send_batch, push_cost);
|
||||
profiling_node->Record(TIME, TDT_PUSH_TIME, send_batch, push_cost, end_time);
|
||||
batch_cost = (int32_t)(end_time - batch_start_time);
|
||||
// record batch time
|
||||
profiling_node->Record(TIME, BATCH_TIME, send_batch, batch_cost);
|
||||
profiling_node->Record(TIME, BATCH_TIME, send_batch, batch_cost, end_time);
|
||||
// record pipeline time
|
||||
profiling_node->Record(TIME, PIPELINE_TIME, send_batch, batch_cost - push_cost);
|
||||
profiling_node->Record(TIME, PIPELINE_TIME, send_batch, batch_cost - push_cost, end_time);
|
||||
batch_start_time = end_time;
|
||||
// record connector depth
|
||||
profiling_node->Record(CONNECTOR_DEPTH, connector_capacity, send_batch, connector_size);
|
||||
profiling_node->Record(CONNECTOR_DEPTH, connector_capacity, send_batch, connector_size, end_time);
|
||||
connector_size = gpu_item_connector_->size();
|
||||
connector_capacity = gpu_item_connector_->capacity();
|
||||
}
|
||||
|
@ -508,5 +495,23 @@ Status DeviceQueueOp::Accept(NodePass *p, bool *const modified) {
|
|||
return p->RunOnNode(shared_from_base<DeviceQueueOp>(), modified);
|
||||
}
|
||||
|
||||
void DeviceQueueOp::ProfilingRecorder(bool isProfilingEnable, std::shared_ptr<DeviceQueueTracing> profiling_node,
|
||||
int64_t send_batch, int32_t tdt_cost, uint64_t *batch_start_time,
|
||||
uint64_t *end_time, int32_t connector_capacity, int32_t connector_size) {
|
||||
// Record the pipeline profiling info
|
||||
if (isProfilingEnable) {
|
||||
*end_time = ProfilingTime::GetCurMilliSecond();
|
||||
// record push tdt time
|
||||
profiling_node->Record(TIME, TDT_PUSH_TIME, send_batch + 1, tdt_cost, *end_time);
|
||||
int32_t batch_cost = (int32_t)(*end_time - *batch_start_time);
|
||||
// record batch time
|
||||
profiling_node->Record(TIME, BATCH_TIME, send_batch + 1, batch_cost, *end_time);
|
||||
// record pipeline time
|
||||
profiling_node->Record(TIME, PIPELINE_TIME, send_batch + 1, batch_cost - tdt_cost, *end_time);
|
||||
*batch_start_time = *end_time;
|
||||
// record connector depth
|
||||
profiling_node->Record(CONNECTOR_DEPTH, connector_capacity, send_batch + 1, connector_size, *end_time);
|
||||
}
|
||||
}
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -23,6 +23,7 @@
|
|||
|
||||
#include "minddata/dataset/engine/datasetops/pipeline_op.h"
|
||||
#include "minddata/dataset/engine/datasetops/repeat_op.h"
|
||||
#include "minddata/dataset/engine/perf/device_queue_tracing.h"
|
||||
#include "minddata/dataset/util/status.h"
|
||||
|
||||
#ifdef ENABLE_TDTQUE
|
||||
|
@ -173,6 +174,11 @@ class DeviceQueueOp : public PipelineOp {
|
|||
// @return - Status of the node visit.
|
||||
Status Accept(NodePass *p, bool *const modified) override;
|
||||
|
||||
// Record the pipeline profiling info
|
||||
void ProfilingRecorder(bool isProfilingEnable, std::shared_ptr<DeviceQueueTracing> profiling_node, int64_t send_batch,
|
||||
int32_t tdt_cost, uint64_t *batch_start_time, uint64_t *end_time, int32_t connector_capacity,
|
||||
int32_t connector_size);
|
||||
|
||||
// Op name getter
|
||||
// @return Name of the current Op
|
||||
std::string Name() const override { return kDeviceQueueOp; }
|
||||
|
|
|
@ -71,7 +71,7 @@ Status FilterOp::operator()() {
|
|||
filter_queues_.Init(num_workers_, oc_queue_size_);
|
||||
RETURN_IF_NOT_OK(filter_queues_.Register(tree_->AllTasks()));
|
||||
Status rc =
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&FilterOp::WorkerEntry, this, std::placeholders::_1), Name());
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&FilterOp::WorkerEntry, this, std::placeholders::_1), Name(), id());
|
||||
// Synchronize with TaskManager.
|
||||
TaskManager::FindMe()->Post();
|
||||
RETURN_IF_NOT_OK(rc);
|
||||
|
|
|
@ -464,65 +464,7 @@ Status AlbumOp::LoadTensorRow(row_id_type row_id, const std::string &file, Tenso
|
|||
|
||||
// loop over each column descriptor, this can optimized by switch cases
|
||||
for (int32_t i = 0; i < columns; i++) {
|
||||
// special case to handle
|
||||
if (data_schema_->column(i).name() == "id") {
|
||||
// id is internal, special case to load from file
|
||||
RETURN_IF_NOT_OK(LoadIDTensor(file, i, row));
|
||||
continue;
|
||||
}
|
||||
// find if key does not exist, insert placeholder nullptr if not found
|
||||
if (js.find(data_schema_->column(i).name()) == js.end()) {
|
||||
// iterator not found, push nullptr as placeholder
|
||||
MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->column(i).name() << ".";
|
||||
RETURN_IF_NOT_OK(LoadEmptyTensor(i, row));
|
||||
continue;
|
||||
}
|
||||
nlohmann::json column_value = js.at(data_schema_->column(i).name());
|
||||
MS_LOG(INFO) << "This column is: " << data_schema_->column(i).name() << ".";
|
||||
bool is_array = column_value.is_array();
|
||||
// load single string
|
||||
if (column_value.is_string() && data_schema_->column(i).type() == DataType::DE_STRING) {
|
||||
RETURN_IF_NOT_OK(LoadStringTensor(column_value, i, row));
|
||||
continue;
|
||||
}
|
||||
// load string array
|
||||
if (is_array && data_schema_->column(i).type() == DataType::DE_STRING) {
|
||||
RETURN_IF_NOT_OK(LoadStringArrayTensor(column_value, i, row));
|
||||
continue;
|
||||
}
|
||||
// load image file
|
||||
if (column_value.is_string() && data_schema_->column(i).type() != DataType::DE_STRING) {
|
||||
std::string image_file_path = column_value;
|
||||
RETURN_IF_NOT_OK(LoadImageTensor(image_file_path, i, row));
|
||||
continue;
|
||||
}
|
||||
// load float value
|
||||
if (!is_array && (data_schema_->column(i).type() == DataType::DE_FLOAT32 ||
|
||||
data_schema_->column(i).type() == DataType::DE_FLOAT64)) {
|
||||
RETURN_IF_NOT_OK(LoadFloatTensor(column_value, i, row));
|
||||
continue;
|
||||
}
|
||||
// load float array
|
||||
if (is_array && (data_schema_->column(i).type() == DataType::DE_FLOAT32 ||
|
||||
data_schema_->column(i).type() == DataType::DE_FLOAT64)) {
|
||||
RETURN_IF_NOT_OK(LoadFloatArrayTensor(column_value, i, row));
|
||||
continue;
|
||||
}
|
||||
// int value
|
||||
if (!is_array && (data_schema_->column(i).type() == DataType::DE_INT64 ||
|
||||
data_schema_->column(i).type() == DataType::DE_INT32)) {
|
||||
RETURN_IF_NOT_OK(LoadIntTensor(column_value, i, row));
|
||||
continue;
|
||||
}
|
||||
// int array
|
||||
if (is_array && (data_schema_->column(i).type() == DataType::DE_INT64 ||
|
||||
data_schema_->column(i).type() == DataType::DE_INT32)) {
|
||||
RETURN_IF_NOT_OK(LoadIntArrayTensor(column_value, i, row));
|
||||
continue;
|
||||
} else {
|
||||
MS_LOG(WARNING) << "Value type for column: " << data_schema_->column(i).name() << " is not supported.";
|
||||
continue;
|
||||
}
|
||||
RETURN_IF_NOT_OK(loadColumnData(file, i, js, row));
|
||||
}
|
||||
} catch (const std::exception &err) {
|
||||
file_handle.close();
|
||||
|
@ -535,6 +477,60 @@ Status AlbumOp::LoadTensorRow(row_id_type row_id, const std::string &file, Tenso
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status AlbumOp::loadColumnData(const std::string &file, int32_t index, nlohmann::json js, TensorRow *row) {
|
||||
int32_t i = index;
|
||||
// special case to handle
|
||||
if (data_schema_->column(i).name() == "id") {
|
||||
// id is internal, special case to load from file
|
||||
return LoadIDTensor(file, i, row);
|
||||
}
|
||||
// find if key does not exist, insert placeholder nullptr if not found
|
||||
if (js.find(data_schema_->column(i).name()) == js.end()) {
|
||||
// iterator not found, push nullptr as placeholder
|
||||
MS_LOG(INFO) << "Pushing empty tensor for column: " << data_schema_->column(i).name() << ".";
|
||||
return LoadEmptyTensor(i, row);
|
||||
}
|
||||
nlohmann::json column_value = js.at(data_schema_->column(i).name());
|
||||
MS_LOG(INFO) << "This column is: " << data_schema_->column(i).name() << ".";
|
||||
bool is_array = column_value.is_array();
|
||||
// load single string
|
||||
if (column_value.is_string() && data_schema_->column(i).type() == DataType::DE_STRING) {
|
||||
return LoadStringTensor(column_value, i, row);
|
||||
}
|
||||
// load string array
|
||||
if (is_array && data_schema_->column(i).type() == DataType::DE_STRING) {
|
||||
return LoadStringArrayTensor(column_value, i, row);
|
||||
}
|
||||
// load image file
|
||||
if (column_value.is_string() && data_schema_->column(i).type() != DataType::DE_STRING) {
|
||||
std::string image_file_path = column_value;
|
||||
return LoadImageTensor(image_file_path, i, row);
|
||||
}
|
||||
// load float value
|
||||
bool judge_float = (data_schema_->column(i).type() == DataType::DE_FLOAT32) ||
|
||||
(data_schema_->column(i).type() == DataType::DE_FLOAT64);
|
||||
if (!is_array && judge_float) {
|
||||
return LoadFloatTensor(column_value, i, row);
|
||||
}
|
||||
// load float array
|
||||
if (is_array && judge_float) {
|
||||
return LoadFloatArrayTensor(column_value, i, row);
|
||||
}
|
||||
// int value
|
||||
if (!is_array &&
|
||||
(data_schema_->column(i).type() == DataType::DE_INT64 || data_schema_->column(i).type() == DataType::DE_INT32)) {
|
||||
return LoadIntTensor(column_value, i, row);
|
||||
}
|
||||
// int array
|
||||
if (is_array &&
|
||||
(data_schema_->column(i).type() == DataType::DE_INT64 || data_schema_->column(i).type() == DataType::DE_INT32)) {
|
||||
return LoadIntArrayTensor(column_value, i, row);
|
||||
} else {
|
||||
MS_LOG(WARNING) << "Value type for column: " << data_schema_->column(i).name() << " is not supported.";
|
||||
return Status::OK();
|
||||
}
|
||||
}
|
||||
|
||||
// Looping over LoadTensorRow to make 1 DataBuffer. 1 function call produces 1 buffer
|
||||
Status AlbumOp::LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db) {
|
||||
std::unique_ptr<TensorQTable> deq = std::make_unique<TensorQTable>();
|
||||
|
@ -587,7 +583,8 @@ Status AlbumOp::LaunchThreadsAndInitOp() {
|
|||
RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks()));
|
||||
RETURN_IF_NOT_OK(wait_for_workers_post_.Register(tree_->AllTasks()));
|
||||
// launch main workers that load DataBuffers by reading all images
|
||||
RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&AlbumOp::WorkerEntry, this, std::placeholders::_1)));
|
||||
RETURN_IF_NOT_OK(
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&AlbumOp::WorkerEntry, this, std::placeholders::_1), "", id()));
|
||||
TaskManager::FindMe()->Post();
|
||||
RETURN_IF_NOT_OK(this->InitSampler()); // pass numRows to Sampler
|
||||
return Status::OK();
|
||||
|
|
|
@ -134,7 +134,7 @@ class AlbumOp : public ParallelOp, public RandomAccessOp {
|
|||
Status SanityCheck();
|
||||
|
||||
/// \brief The builder "build" method creates the final object.
|
||||
/// \param[inout] std::shared_ptr<AlbumOp> *op - DatasetOp
|
||||
/// \param[in, out] std::shared_ptr<AlbumOp> *op - DatasetOp
|
||||
/// \return Status The status code returned
|
||||
Status Build(std::shared_ptr<AlbumOp> *op);
|
||||
|
||||
|
@ -210,74 +210,82 @@ class AlbumOp : public ParallelOp, public RandomAccessOp {
|
|||
/// \brief Load image to tensor row
|
||||
/// \param[in] image_file Image name of file
|
||||
/// \param[in] col_num Column num in schema
|
||||
/// \param[inout] row Tensor row to push to
|
||||
/// \param[in, out] row Tensor row to push to
|
||||
/// \return Status The status code returned
|
||||
Status LoadImageTensor(const std::string &image_file, uint32_t col_num, TensorRow *row);
|
||||
|
||||
/// \brief Load vector of ints to tensor, append tensor to tensor row
|
||||
/// \param[in] json_obj Json object containing multi-dimensional label
|
||||
/// \param[in] col_num Column num in schema
|
||||
/// \param[inout] row Tensor row to push to
|
||||
/// \param[in, out] row Tensor row to push to
|
||||
/// \return Status The status code returned
|
||||
Status LoadIntArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row);
|
||||
|
||||
/// \brief Load vector of floatss to tensor, append tensor to tensor row
|
||||
/// \param[in] json_obj Json object containing array data
|
||||
/// \param[in] col_num Column num in schema
|
||||
/// \param[inout] row Tensor row to push to
|
||||
/// \param[in, out] row Tensor row to push to
|
||||
/// \return Status The status code returned
|
||||
Status LoadFloatArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row);
|
||||
|
||||
/// \brief Load string array into a tensor, append tensor to tensor row
|
||||
/// \param[in] json_obj Json object containing string tensor
|
||||
/// \param[in] col_num Column num in schema
|
||||
/// \param[inout] row Tensor row to push to
|
||||
/// \param[in, out] row Tensor row to push to
|
||||
/// \return Status The status code returned
|
||||
Status LoadStringArrayTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row);
|
||||
|
||||
/// \brief Load string into a tensor, append tensor to tensor row
|
||||
/// \param[in] json_obj Json object containing string tensor
|
||||
/// \param[in] col_num Column num in schema
|
||||
/// \param[inout] row Tensor row to push to
|
||||
/// \param[in, out] row Tensor row to push to
|
||||
/// \return Status The status code returned
|
||||
Status LoadStringTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row);
|
||||
|
||||
/// \brief Load float value to tensor row
|
||||
/// \param[in] json_obj Json object containing float
|
||||
/// \param[in] col_num Column num in schema
|
||||
/// \param[inout] row Tensor row to push to
|
||||
/// \param[in, out] row Tensor row to push to
|
||||
/// \return Status The status code returned
|
||||
Status LoadFloatTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row);
|
||||
|
||||
/// \brief Load int value to tensor row
|
||||
/// \param[in] json_obj Json object containing int
|
||||
/// \param[in] col_num Column num in schema
|
||||
/// \param[inout] row Tensor row to push to
|
||||
/// \param[in, out] row Tensor row to push to
|
||||
/// \return Status The status code returned
|
||||
Status LoadIntTensor(const nlohmann::json &json_obj, uint32_t col_num, TensorRow *row);
|
||||
|
||||
/// \brief Load emtpy tensor to tensor row
|
||||
/// \brief Load empty tensor to tensor row
|
||||
/// \param[in] col_num Column num in schema
|
||||
/// \param[inout] row Tensor row to push to
|
||||
/// \param[in, out] row Tensor row to push to
|
||||
/// \return Status The status code returned
|
||||
Status LoadEmptyTensor(uint32_t col_num, TensorRow *row);
|
||||
|
||||
/// \brief Load id from file name to tensor row
|
||||
/// \param[in] file The file name to get ID from
|
||||
/// \param[in] col_num Column num in schema
|
||||
/// \param[inout] row Tensor row to push to
|
||||
/// \param[in, out] row Tensor row to push to
|
||||
/// \return Status The status code returned
|
||||
Status LoadIDTensor(const std::string &file, uint32_t col_num, TensorRow *row);
|
||||
|
||||
/// \brief Load a tensor row according to a json file
|
||||
/// \param[in] row_id_type row_id - id for this tensor row
|
||||
/// \param[in] ImageColumns file Json file location
|
||||
/// \param[inout] TensorRow row Json content stored into a tensor row
|
||||
/// \param[in, out] TensorRow row Json content stored into a tensor row
|
||||
/// \return Status The status code returned
|
||||
Status LoadTensorRow(row_id_type row_id, const std::string &file, TensorRow *row);
|
||||
|
||||
/// \brief Load a tensor column according to a json file
|
||||
/// \param[in] ImageColumns file Json file location
|
||||
/// \param[in] index - certain column index
|
||||
/// \param[in] js - json object
|
||||
/// \param[in, out] TensorRow row Json content stored into a tensor row
|
||||
/// \return Status The status code returned
|
||||
Status loadColumnData(const std::string &file, int32_t index, nlohmann::json js, TensorRow *row);
|
||||
|
||||
/// \param[in] const std::vector<int64_t> &keys Keys in ioblock
|
||||
/// \param[inout] std::unique_ptr<DataBuffer> db Databuffer to push to
|
||||
/// \param[in, out] std::unique_ptr<DataBuffer> db Databuffer to push to
|
||||
/// \return Status The status code returned
|
||||
Status LoadBuffer(const std::vector<int64_t> &keys, std::unique_ptr<DataBuffer> *db);
|
||||
|
||||
|
|
|
@ -102,8 +102,10 @@ Status CelebAOp::LaunchThreadsAndInitOp() {
|
|||
RETURN_IF_NOT_OK(attr_info_queue_->Register(tree_->AllTasks()));
|
||||
RETURN_IF_NOT_OK(wait_for_workers_post_.Register(tree_->AllTasks()));
|
||||
|
||||
RETURN_IF_NOT_OK(tree_->AllTasks()->CreateAsyncTask("Walking attr file", std::bind(&CelebAOp::ParseAttrFile, this)));
|
||||
RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&CelebAOp::WorkerEntry, this, std::placeholders::_1)));
|
||||
RETURN_IF_NOT_OK(
|
||||
tree_->AllTasks()->CreateAsyncTask("Walking attr file", std::bind(&CelebAOp::ParseAttrFile, this), nullptr, id()));
|
||||
RETURN_IF_NOT_OK(
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&CelebAOp::WorkerEntry, this, std::placeholders::_1), Name(), id()));
|
||||
TaskManager::FindMe()->Post();
|
||||
RETURN_IF_NOT_OK(ParseImageAttrInfo());
|
||||
RETURN_IF_NOT_OK(sampler_->HandshakeRandomAccessOp(this));
|
||||
|
|
|
@ -165,9 +165,10 @@ Status CifarOp::LaunchThreadsAndInitOp() {
|
|||
}
|
||||
RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks()));
|
||||
RETURN_IF_NOT_OK(wait_for_workers_post_.Register(tree_->AllTasks()));
|
||||
RETURN_IF_NOT_OK(tree_->AllTasks()->CreateAsyncTask(
|
||||
"Get cifar data block", std::bind(&CifarOp::ReadCifarBlockDataAsync, this), nullptr, id()));
|
||||
RETURN_IF_NOT_OK(
|
||||
tree_->AllTasks()->CreateAsyncTask("Get cifar data block", std::bind(&CifarOp::ReadCifarBlockDataAsync, this)));
|
||||
RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&CifarOp::WorkerEntry, this, std::placeholders::_1)));
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&CifarOp::WorkerEntry, this, std::placeholders::_1), "", id()));
|
||||
TaskManager::FindMe()->Post();
|
||||
// The order of the following 2 functions must not be changed!
|
||||
RETURN_IF_NOT_OK(ParseCifarData()); // Parse cifar data and get num rows, blocking
|
||||
|
|
|
@ -237,9 +237,10 @@ Status ClueOp::operator()() {
|
|||
RETURN_IF_NOT_OK(io_block_queue_wait_post_.Register(tree_->AllTasks()));
|
||||
|
||||
// launch one thread, responsible for filling IoBlockQueue
|
||||
RETURN_IF_NOT_OK(tree_->LaunchWorkers(1, std::bind(&ClueOp::WaitToFillIOBlockQueue, this)));
|
||||
RETURN_IF_NOT_OK(tree_->LaunchWorkers(1, std::bind(&ClueOp::WaitToFillIOBlockQueue, this), "", id()));
|
||||
|
||||
RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&ClueOp::WorkerEntry, this, std::placeholders::_1)));
|
||||
RETURN_IF_NOT_OK(
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&ClueOp::WorkerEntry, this, std::placeholders::_1), "", id()));
|
||||
|
||||
// must be called after launching workers.
|
||||
TaskManager::FindMe()->Post();
|
||||
|
|
|
@ -638,7 +638,8 @@ Status CocoOp::LaunchThreadsAndInitOp() {
|
|||
}
|
||||
RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks()));
|
||||
RETURN_IF_NOT_OK(wait_for_workers_post_.Register(tree_->AllTasks()));
|
||||
RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&CocoOp::WorkerEntry, this, std::placeholders::_1)));
|
||||
RETURN_IF_NOT_OK(
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&CocoOp::WorkerEntry, this, std::placeholders::_1), "", id()));
|
||||
TaskManager::FindMe()->Post();
|
||||
RETURN_IF_NOT_OK(this->ParseAnnotationIds());
|
||||
RETURN_IF_NOT_OK(this->InitSampler());
|
||||
|
|
|
@ -555,9 +555,10 @@ Status CsvOp::operator()() {
|
|||
RETURN_IF_NOT_OK(io_block_queue_wait_post_.Register(tree_->AllTasks()));
|
||||
|
||||
// launch one thread, responsible for filling IoBlockQueue
|
||||
RETURN_IF_NOT_OK(tree_->LaunchWorkers(1, std::bind(&CsvOp::WaitToFillIOBlockQueue, this)));
|
||||
RETURN_IF_NOT_OK(tree_->LaunchWorkers(1, std::bind(&CsvOp::WaitToFillIOBlockQueue, this), "", id()));
|
||||
|
||||
RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&CsvOp::WorkerEntry, this, std::placeholders::_1)));
|
||||
RETURN_IF_NOT_OK(
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&CsvOp::WorkerEntry, this, std::placeholders::_1), "", id()));
|
||||
|
||||
// must be called after launching workers.
|
||||
TaskManager::FindMe()->Post();
|
||||
|
|
|
@ -385,12 +385,13 @@ Status ImageFolderOp::LaunchThreadsAndInitOp() {
|
|||
// 1) A thread that walks all folders and push the folder names to a util:Queue folder_name_queue_.
|
||||
// 2) Workers that pull foldername from folder_name_queue_, walk it and return the sorted images to image_name_queue
|
||||
// 3) Launch main workers that load DataBuffers by reading all images
|
||||
RETURN_IF_NOT_OK(tree_->AllTasks()->CreateAsyncTask("walk dir", std::bind(&ImageFolderOp::StartAsyncWalk, this)));
|
||||
RETURN_IF_NOT_OK(
|
||||
tree_->AllTasks()->CreateAsyncTask("walk dir", std::bind(&ImageFolderOp::StartAsyncWalk, this), nullptr, id()));
|
||||
RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_,
|
||||
std::bind(&ImageFolderOp::PrescanWorkerEntry, this, std::placeholders::_1),
|
||||
Name() + "::PrescanWorkerEntry"));
|
||||
Name() + "::PrescanWorkerEntry", id()));
|
||||
RETURN_IF_NOT_OK(tree_->LaunchWorkers(
|
||||
num_workers_, std::bind(&ImageFolderOp::WorkerEntry, this, std::placeholders::_1), Name() + "::WorkerEntry"));
|
||||
num_workers_, std::bind(&ImageFolderOp::WorkerEntry, this, std::placeholders::_1), Name() + "::WorkerEntry", id()));
|
||||
TaskManager::FindMe()->Post();
|
||||
// The order of the following 2 functions must not be changed!
|
||||
RETURN_IF_NOT_OK(this->PrescanMasterEntry(folder_path_)); // Master thread of pre-scan workers, blocking
|
||||
|
|
|
@ -152,7 +152,7 @@ Status ManifestOp::LaunchThreadsAndInitOp() {
|
|||
RETURN_IF_NOT_OK(wait_for_workers_post_.Register(tree_->AllTasks()));
|
||||
|
||||
RETURN_IF_NOT_OK(
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&ManifestOp::WorkerEntry, this, std::placeholders::_1)));
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&ManifestOp::WorkerEntry, this, std::placeholders::_1), "", id()));
|
||||
TaskManager::FindMe()->Post();
|
||||
RETURN_IF_NOT_OK(ParseManifestFile());
|
||||
RETURN_IF_NOT_OK(CountDatasetInfo());
|
||||
|
|
|
@ -446,7 +446,7 @@ Status MindRecordOp::LaunchThreadAndInitOp() {
|
|||
}
|
||||
// Launch main workers that load DataBuffers by reading all images
|
||||
RETURN_IF_NOT_OK(
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&MindRecordOp::WorkerEntry, this, std::placeholders::_1)));
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&MindRecordOp::WorkerEntry, this, std::placeholders::_1), "", id()));
|
||||
TaskManager::FindMe()->Post();
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -420,7 +420,8 @@ Status MnistOp::LaunchThreadsAndInitOp() {
|
|||
}
|
||||
RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks()));
|
||||
RETURN_IF_NOT_OK(wait_for_workers_post_.Register(tree_->AllTasks()));
|
||||
RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&MnistOp::WorkerEntry, this, std::placeholders::_1)));
|
||||
RETURN_IF_NOT_OK(
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&MnistOp::WorkerEntry, this, std::placeholders::_1), "", id()));
|
||||
TaskManager::FindMe()->Post();
|
||||
RETURN_IF_NOT_OK(this->WalkAllFiles());
|
||||
RETURN_IF_NOT_OK(this->ParseMnistData());
|
||||
|
|
|
@ -197,7 +197,7 @@ Status RandomDataOp::operator()() {
|
|||
|
||||
// RandomDataOp doesn't need the master thread to stay around. Kick off the workers and then master exits.
|
||||
RETURN_IF_NOT_OK(
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&RandomDataOp::WorkerEntry, this, std::placeholders::_1)));
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&RandomDataOp::WorkerEntry, this, std::placeholders::_1), "", id()));
|
||||
|
||||
// required task group setup after launching workers
|
||||
TaskManager::FindMe()->Post();
|
||||
|
|
|
@ -385,11 +385,11 @@ Status TextFileOp::operator()() {
|
|||
RETURN_IF_NOT_OK(io_block_queue_wait_post_.Register(tree_->AllTasks()));
|
||||
|
||||
// launch one thread, responsible for filling IoBlockQueue
|
||||
RETURN_IF_NOT_OK(tree_->LaunchWorkers(1, std::bind(&TextFileOp::WaitToFillIOBlockQueue, this)));
|
||||
RETURN_IF_NOT_OK(tree_->LaunchWorkers(1, std::bind(&TextFileOp::WaitToFillIOBlockQueue, this), Name(), id()));
|
||||
|
||||
// Read data from disk into buffers
|
||||
RETURN_IF_NOT_OK(
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&TextFileOp::WorkerEntry, this, std::placeholders::_1)));
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&TextFileOp::WorkerEntry, this, std::placeholders::_1), Name(), id()));
|
||||
|
||||
// must be called after launching workers.
|
||||
TaskManager::FindMe()->Post();
|
||||
|
|
|
@ -234,12 +234,12 @@ Status TFReaderOp::operator()() {
|
|||
RETURN_IF_NOT_OK(io_block_queue_wait_post_.Register(tree_->AllTasks()));
|
||||
|
||||
// launch one thread, responsible for filling mIOBlockQueue
|
||||
RETURN_IF_NOT_OK(tree_->LaunchWorkers(1, std::bind(&TFReaderOp::WaitToFillIOBlockQueue, this)));
|
||||
RETURN_IF_NOT_OK(tree_->LaunchWorkers(1, std::bind(&TFReaderOp::WaitToFillIOBlockQueue, this), "", id()));
|
||||
|
||||
// launch num_workers_ worker threads, responsible for pulling from the IOBlockQueue and reading
|
||||
// data from disk into buffers
|
||||
RETURN_IF_NOT_OK(
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&TFReaderOp::WorkerEntry, this, std::placeholders::_1)));
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&TFReaderOp::WorkerEntry, this, std::placeholders::_1), "", id()));
|
||||
|
||||
// must be called after launching workers. workers can't be spawned after this post,
|
||||
// so workers have to be kept alive until the end of the program
|
||||
|
|
|
@ -366,6 +366,7 @@ Status VOCOp::ParseAnnotationBbox(const std::string &path) {
|
|||
} else {
|
||||
RETURN_STATUS_UNEXPECTED("Invalid data, bndbox dismatch in " + path);
|
||||
}
|
||||
|
||||
if (label_name != "" && (class_index_.empty() || class_index_.find(label_name) != class_index_.end()) && xmin > 0 &&
|
||||
ymin > 0 && xmax > xmin && ymax > ymin) {
|
||||
std::vector<float> bbox_list = {xmin, ymin, xmax - xmin, ymax - ymin, difficult, truncated};
|
||||
|
@ -389,7 +390,8 @@ Status VOCOp::LaunchThreadsAndInitOp() {
|
|||
}
|
||||
RETURN_IF_NOT_OK(io_block_queues_.Register(tree_->AllTasks()));
|
||||
RETURN_IF_NOT_OK(wait_for_workers_post_.Register(tree_->AllTasks()));
|
||||
RETURN_IF_NOT_OK(tree_->LaunchWorkers(num_workers_, std::bind(&VOCOp::WorkerEntry, this, std::placeholders::_1)));
|
||||
RETURN_IF_NOT_OK(
|
||||
tree_->LaunchWorkers(num_workers_, std::bind(&VOCOp::WorkerEntry, this, std::placeholders::_1), "", id()));
|
||||
TaskManager::FindMe()->Post();
|
||||
RETURN_IF_NOT_OK(this->ParseImageIds());
|
||||
if (task_type_ == TaskType::Detection) {
|
||||
|
|
|
@ -208,7 +208,7 @@ Status ExecutionTree::Launch() {
|
|||
// the launching tree/user thread. Do not exec any thread for an inlined op.
|
||||
itr->state_ = DatasetOp::OpState::kDeOpRunning;
|
||||
if (!itr->inlined()) {
|
||||
RETURN_IF_NOT_OK(tg_->CreateAsyncTask(itr->NameWithID(), std::ref(*itr)));
|
||||
RETURN_IF_NOT_OK(tg_->CreateAsyncTask(itr->NameWithID(), std::ref(*itr), nullptr, itr->id()));
|
||||
// Set the state of the Operator as running. This only matters in Leaf ops, CacheOp and TakeOp
|
||||
}
|
||||
}
|
||||
|
@ -237,7 +237,8 @@ ExecutionTree::Iterator::Iterator(const std::shared_ptr<DatasetOp> &root) : ind_
|
|||
|
||||
// Given the number of workers, launches the worker entry function for each. Essentially a
|
||||
// wrapper for the TaskGroup handling that is stored inside the execution tree.
|
||||
Status ExecutionTree::LaunchWorkers(int32_t num_workers, std::function<Status(uint32_t)> func, std::string name) {
|
||||
Status ExecutionTree::LaunchWorkers(int32_t num_workers, std::function<Status(uint32_t)> func, std::string name,
|
||||
int32_t operator_id) {
|
||||
int32_t num_cpu_threads = GlobalContext::Instance()->config_manager()->num_cpu_threads();
|
||||
// this performs check that num_workers is positive and not unreasonably large which could happen
|
||||
// for example, un-initialized variable. uint16 max is 65536 which is large enough to cover everything
|
||||
|
@ -249,7 +250,7 @@ Status ExecutionTree::LaunchWorkers(int32_t num_workers, std::function<Status(ui
|
|||
<< std::to_string(num_cpu_threads) << ", the maximum number of threads on this CPU.";
|
||||
}
|
||||
for (int32_t i = 0; i < num_workers; ++i) {
|
||||
RETURN_IF_NOT_OK(tg_->CreateAsyncTask(name, std::bind(func, i)));
|
||||
RETURN_IF_NOT_OK(tg_->CreateAsyncTask(name, std::bind(func, i), nullptr, operator_id));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -155,8 +155,11 @@ class ExecutionTree {
|
|||
// wrapper for the TaskGroup handling that is stored inside the execution tree.
|
||||
// @param num_workers - The number of workers to launch
|
||||
// @param func - The function entry point that workers will execute
|
||||
// @param name - The description of worker to launch
|
||||
// @param op_id - The id of corresponding operator, if not inherit from dataset op then it is -1.
|
||||
// @return Status The status code returned
|
||||
Status LaunchWorkers(int32_t num_workers, std::function<Status(uint32_t)> func, std::string name = "");
|
||||
Status LaunchWorkers(int32_t num_workers, std::function<Status(uint32_t)> func, std::string name = "",
|
||||
int32_t operator_id = -1);
|
||||
|
||||
// Getter method
|
||||
// @return shared_ptr to the root operator
|
||||
|
|
|
@ -5,4 +5,5 @@ add_library(engine-perf OBJECT
|
|||
connector_size.cc
|
||||
dataset_iterator_tracing.cc
|
||||
connector_throughput.cc
|
||||
cpu_sampling.cc
|
||||
)
|
||||
|
|
|
@ -0,0 +1,567 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#include "minddata/dataset/engine/perf/cpu_sampling.h"
|
||||
#if !defined(_WIN32) && !defined(_WIN64) && !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__)
|
||||
#include <sys/syscall.h>
|
||||
#endif
|
||||
#include <math.h>
|
||||
#include <algorithm>
|
||||
#include <cstdio>
|
||||
#include <fstream>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include "minddata/dataset/api/python/pybind_conversion.h"
|
||||
#include "minddata/dataset/core/config_manager.h"
|
||||
#include "minddata/dataset/engine/execution_tree.h"
|
||||
#include "minddata/dataset/util/path.h"
|
||||
|
||||
using json = nlohmann::json;
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
bool BaseCpu::fetched_all_process_shared = false;
|
||||
std::unordered_map<int32_t, std::vector<pid_t>> BaseCpu::op_process_shared = {};
|
||||
|
||||
#if !defined(_WIN32) && !defined(_WIN64) && !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__)
|
||||
#define USING_LINUX
|
||||
#endif
|
||||
|
||||
BaseCpu::BaseCpu() {
|
||||
pre_cpu_stat_.user_stat_ = 0;
|
||||
pre_cpu_stat_.sys_stat_ = 0;
|
||||
pre_cpu_stat_.io_stat_ = 0;
|
||||
pre_cpu_stat_.idle_stat_ = 0;
|
||||
pre_cpu_stat_.total_stat_ = 0;
|
||||
}
|
||||
|
||||
Status DeviceCpu::ParseCpuInfo(const std::string &str) {
|
||||
CpuStat cpu_stat;
|
||||
uint64_t nice = 0;
|
||||
uint64_t irq = 0;
|
||||
uint64_t softirq = 0;
|
||||
if (std::sscanf(str.c_str(), "%*s %lu %lu %lu %lu %lu %lu %lu", &cpu_stat.user_stat_, &nice, &cpu_stat.sys_stat_,
|
||||
&cpu_stat.idle_stat_, &cpu_stat.io_stat_, &irq, &softirq) == EOF) {
|
||||
return Status(StatusCode::kUnexpectedError, "Get device CPU failed.");
|
||||
}
|
||||
|
||||
cpu_stat.total_stat_ =
|
||||
cpu_stat.user_stat_ + nice + cpu_stat.sys_stat_ + cpu_stat.idle_stat_ + cpu_stat.io_stat_ + irq + softirq;
|
||||
// Calculate the utilization from the second sampling
|
||||
if (!first_collect_) {
|
||||
CpuUtil info;
|
||||
info.user_utilization_ = floor((cpu_stat.user_stat_ - pre_cpu_stat_.user_stat_) * 1.0 /
|
||||
(cpu_stat.total_stat_ - pre_cpu_stat_.total_stat_) * 100 +
|
||||
0.5);
|
||||
info.sys_utilization_ = floor((cpu_stat.sys_stat_ - pre_cpu_stat_.sys_stat_) * 1.0 /
|
||||
(cpu_stat.total_stat_ - pre_cpu_stat_.total_stat_) * 100 +
|
||||
0.5);
|
||||
info.io_utilization_ = floor((cpu_stat.io_stat_ - pre_cpu_stat_.io_stat_) * 1.0 /
|
||||
(cpu_stat.total_stat_ - pre_cpu_stat_.total_stat_) * 100 +
|
||||
0.5);
|
||||
info.idle_utilization_ = floor((cpu_stat.idle_stat_ - pre_cpu_stat_.idle_stat_) * 1.0 /
|
||||
(cpu_stat.total_stat_ - pre_cpu_stat_.total_stat_) * 100 +
|
||||
0.5);
|
||||
cpu_util_.emplace_back(info);
|
||||
}
|
||||
pre_cpu_stat_.user_stat_ = cpu_stat.user_stat_;
|
||||
pre_cpu_stat_.sys_stat_ = cpu_stat.sys_stat_;
|
||||
pre_cpu_stat_.io_stat_ = cpu_stat.io_stat_;
|
||||
pre_cpu_stat_.idle_stat_ = cpu_stat.idle_stat_;
|
||||
pre_cpu_stat_.total_stat_ = cpu_stat.total_stat_;
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status DeviceCpu::ParseCtxt(const std::string &str) {
|
||||
uint64_t ctxt;
|
||||
if (std::sscanf(str.c_str(), "%*s %lu", &ctxt) == EOF) {
|
||||
return Status(StatusCode::kUnexpectedError, "Get context switch count failed.");
|
||||
}
|
||||
// Calculate the utilization from the second sampling
|
||||
if (!first_collect_) {
|
||||
context_switch_count_.push_back(ctxt - pre_context_switch_count_);
|
||||
}
|
||||
pre_context_switch_count_ = ctxt;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status DeviceCpu::ParseRunningProcess(const std::string &str) {
|
||||
uint32_t running_process;
|
||||
if (std::sscanf(str.c_str(), "%*s %ud", &running_process) == EOF) {
|
||||
return Status(StatusCode::kUnexpectedError, "Get context switch count failed.");
|
||||
}
|
||||
// Drop the first value in order to collect same amount of CPU utilization
|
||||
if (!first_collect_) {
|
||||
running_process_.push_back(running_process);
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status DeviceCpu::Collect(ExecutionTree *tree) {
|
||||
std::ifstream file("/proc/stat");
|
||||
if (!file.is_open()) {
|
||||
MS_LOG(WARNING) << "Open CPU file failed when collect CPU information";
|
||||
return Status::OK();
|
||||
}
|
||||
bool first_line = true;
|
||||
std::string line;
|
||||
while (getline(file, line)) {
|
||||
if (first_line) {
|
||||
first_line = false;
|
||||
RETURN_IF_NOT_OK(ParseCpuInfo(line));
|
||||
}
|
||||
if (line.find("ctxt") != std::string::npos) {
|
||||
RETURN_IF_NOT_OK(ParseCtxt(line));
|
||||
}
|
||||
if (line.find("procs_running") != std::string::npos) {
|
||||
RETURN_IF_NOT_OK(ParseRunningProcess(line));
|
||||
}
|
||||
}
|
||||
file.close();
|
||||
|
||||
first_collect_ = false;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status DeviceCpu::SaveToFile(const std::string &file_path) {
|
||||
Path path = Path(file_path);
|
||||
json output;
|
||||
if (path.Exists()) {
|
||||
MS_LOG(DEBUG) << file_path << " exists already";
|
||||
std::ifstream file(file_path);
|
||||
file >> output;
|
||||
} else {
|
||||
output["sampling_interval"] = GlobalContext::config_manager()->monitor_sampling_interval();
|
||||
}
|
||||
|
||||
std::vector<int8_t> user_util;
|
||||
std::transform(cpu_util_.begin(), cpu_util_.end(), std::back_inserter(user_util),
|
||||
[&](const CpuUtil &info) { return info.user_utilization_; });
|
||||
std::vector<int8_t> sys_util;
|
||||
std::transform(cpu_util_.begin(), cpu_util_.end(), std::back_inserter(sys_util),
|
||||
[&](const CpuUtil &info) { return info.sys_utilization_; });
|
||||
std::vector<int8_t> io_util;
|
||||
std::transform(cpu_util_.begin(), cpu_util_.end(), std::back_inserter(io_util),
|
||||
[&](const CpuUtil &info) { return info.io_utilization_; });
|
||||
std::vector<int8_t> idle_util;
|
||||
std::transform(cpu_util_.begin(), cpu_util_.end(), std::back_inserter(idle_util),
|
||||
[&](const CpuUtil &info) { return info.idle_utilization_; });
|
||||
|
||||
output["device_info"] = {{"user_utilization", user_util},
|
||||
{"sys_utilization", sys_util},
|
||||
{"io_utilization", io_util},
|
||||
{"idle_utilization", idle_util},
|
||||
{"runable_processes", running_process_},
|
||||
{"context_switch_count", context_switch_count_}};
|
||||
|
||||
// Discard the content of the file when opening.
|
||||
std::ofstream os(file_path, std::ios::trunc);
|
||||
os << output;
|
||||
|
||||
MS_LOG(INFO) << "Save device CPU success.";
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status OperatorCpu::ParseCpuInfo(int32_t op_id, int64_t thread_id,
|
||||
std::unordered_map<int32_t, std::unordered_map<int64_t, CpuOpStat>> *op_stat) {
|
||||
pid_t pid = 0;
|
||||
#if defined(USING_LINUX)
|
||||
pid = syscall(SYS_getpid);
|
||||
#endif
|
||||
std::string stat_path = "/proc/" + std::to_string(pid) + "/task/" + std::to_string(thread_id) + "/stat";
|
||||
|
||||
// Judge whether file exist first
|
||||
Path temp_path(stat_path);
|
||||
if (!temp_path.Exists()) {
|
||||
(*op_stat)[op_id][thread_id].user_stat_ = 0;
|
||||
(*op_stat)[op_id][thread_id].sys_stat_ = 0;
|
||||
return Status(StatusCode::kFileNotExist);
|
||||
}
|
||||
|
||||
std::ifstream file(stat_path);
|
||||
if (!file.is_open()) {
|
||||
MS_LOG(WARNING) << "Open CPU file failed when collect CPU information";
|
||||
return Status::OK();
|
||||
}
|
||||
std::string str;
|
||||
getline(file, str);
|
||||
uint64_t utime;
|
||||
uint64_t stime;
|
||||
if (std::sscanf(str.c_str(), "%*d %*s %*s %*lu %*lu %*lu %*lu %*lu %*lu %*lu %*lu %*lu %*lu %lu %lu", &utime,
|
||||
&stime) == EOF) {
|
||||
file.close();
|
||||
return Status(StatusCode::kUnexpectedError, "Get device CPU failed.");
|
||||
}
|
||||
file.close();
|
||||
(*op_stat)[op_id][thread_id].user_stat_ = utime;
|
||||
(*op_stat)[op_id][thread_id].sys_stat_ = stime;
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status OperatorCpu::GetTotalCpuTime(uint64_t *total_stat) {
|
||||
std::ifstream file("/proc/stat");
|
||||
if (!file.is_open()) {
|
||||
MS_LOG(WARNING) << "Open CPU file failed when collect CPU information";
|
||||
return Status::OK();
|
||||
}
|
||||
std::string str;
|
||||
getline(file, str);
|
||||
uint64_t user = 0, sys = 0, idle = 0, iowait = 0, nice = 0, irq = 0, softirq = 0;
|
||||
if (std::sscanf(str.c_str(), "%*s %lu %lu %lu %lu %lu %lu %lu", &user, &nice, &sys, &idle, &iowait, &irq, &softirq) ==
|
||||
EOF) {
|
||||
file.close();
|
||||
return Status(StatusCode::kUnexpectedError, "Get device CPU failed.");
|
||||
}
|
||||
file.close();
|
||||
*total_stat = user + nice + sys + idle + iowait + irq + softirq;
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status OperatorCpu::Collect(ExecutionTree *tree) {
|
||||
if (first_collect_) {
|
||||
for (auto iter = tree->begin(); iter != tree->end(); ++iter) {
|
||||
id_count++;
|
||||
}
|
||||
#if defined(USING_LINUX)
|
||||
cpu_processor_num = get_nprocs_conf();
|
||||
#endif
|
||||
}
|
||||
|
||||
// Obtain the op and thread mapping
|
||||
op_thread.clear();
|
||||
List<Task> allTasks = tree->AllTasks()->GetTask();
|
||||
for (auto &task1 : allTasks) {
|
||||
int32_t op_id = task1.get_operator_id();
|
||||
op_thread[op_id].emplace_back(task1.get_linux_id());
|
||||
}
|
||||
|
||||
// add process id into op_thread
|
||||
if (!fetched_all_process) {
|
||||
{
|
||||
py::gil_scoped_acquire gil_acquire;
|
||||
py::module ds = py::module::import("mindspore.dataset.engine.datasets");
|
||||
py::tuple process_info = ds.attr("_get_operator_process")();
|
||||
py::dict sub_process = py::reinterpret_borrow<py::dict>(process_info[0]);
|
||||
fetched_all_process = py::reinterpret_borrow<py::bool_>(process_info[1]);
|
||||
// parse dict value
|
||||
op_process = toIntMap(sub_process);
|
||||
BaseCpu::op_process_shared = op_process;
|
||||
BaseCpu::fetched_all_process_shared = fetched_all_process;
|
||||
}
|
||||
|
||||
// judge whether there is device_que operator, if so operator id may need increase by one, temp use directly
|
||||
for (auto item : op_process) {
|
||||
if (!item.second.empty()) {
|
||||
if (op_thread.find(item.first) != op_thread.end()) {
|
||||
op_thread[item.first].insert(op_thread[item.first].end(), item.second.begin(), item.second.end());
|
||||
} else {
|
||||
op_thread[item.first] = item.second;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t total_stat_;
|
||||
RETURN_IF_NOT_OK(GetTotalCpuTime(&total_stat_));
|
||||
std::vector<CpuOpUtil> cpu_step_util_;
|
||||
std::unordered_map<int32_t, std::unordered_map<int64_t, CpuOpStat>> op_stat_;
|
||||
|
||||
if (!first_collect_) {
|
||||
// obtain all the op id in current tasks
|
||||
std::vector<int32_t> total_op_id;
|
||||
for (auto iter = op_thread.begin(); iter != op_thread.end(); iter++) {
|
||||
total_op_id.emplace_back(iter->first);
|
||||
}
|
||||
|
||||
// iter all the op, and obtain the CPU utilization of each operator
|
||||
for (auto op_id = -1; op_id < id_count; op_id++) {
|
||||
float user_util = 0, sys_util = 0;
|
||||
auto iter = std::find(total_op_id.begin(), total_op_id.end(), op_id);
|
||||
if (iter != total_op_id.end()) {
|
||||
for (auto thread_id : op_thread[op_id]) {
|
||||
if (ParseCpuInfo(op_id, thread_id, &op_stat_) == Status::OK()) {
|
||||
user_util += (op_stat_[op_id][thread_id].user_stat_ - pre_op_stat_[op_id][thread_id].user_stat_) * 1.0 /
|
||||
(total_stat_ - pre_total_stat_) * 100;
|
||||
sys_util += (op_stat_[op_id][thread_id].sys_stat_ - pre_op_stat_[op_id][thread_id].sys_stat_) * 1.0 /
|
||||
(total_stat_ - pre_total_stat_) * 100;
|
||||
}
|
||||
}
|
||||
}
|
||||
CpuOpUtil info;
|
||||
info.op_id = op_id;
|
||||
info.sys_utilization_ = sys_util;
|
||||
info.user_utilization_ = user_util;
|
||||
cpu_step_util_.emplace_back(info);
|
||||
}
|
||||
cpu_op_util_.emplace_back(cpu_step_util_);
|
||||
} else {
|
||||
// mainly obtain the init CPU execute time in first collect
|
||||
for (auto iter = op_thread.begin(); iter != op_thread.end(); iter++) {
|
||||
int32_t op_id = iter->first;
|
||||
for (auto thread_id : iter->second) {
|
||||
ParseCpuInfo(op_id, thread_id, &op_stat_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// copy current op_stat into pre_op_stat
|
||||
pre_op_stat_ = op_stat_;
|
||||
pre_total_stat_ = total_stat_;
|
||||
|
||||
first_collect_ = false;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status OperatorCpu::SaveToFile(const std::string &file_path) {
|
||||
Path path = Path(file_path);
|
||||
json output;
|
||||
if (path.Exists()) {
|
||||
MS_LOG(DEBUG) << file_path << "already exist.";
|
||||
std::ifstream file(file_path);
|
||||
file >> output;
|
||||
}
|
||||
|
||||
uint8_t index = 0;
|
||||
json OpWriter;
|
||||
for (auto op_id = -1; op_id < id_count; op_id++) {
|
||||
std::vector<uint16_t> user_util;
|
||||
std::vector<uint16_t> sys_util;
|
||||
std::transform(
|
||||
cpu_op_util_.begin(), cpu_op_util_.end(), std::back_inserter(user_util),
|
||||
[&](const std::vector<CpuOpUtil> &info) { return int16_t(info[index].user_utilization_ * cpu_processor_num); });
|
||||
std::transform(
|
||||
cpu_op_util_.begin(), cpu_op_util_.end(), std::back_inserter(sys_util),
|
||||
[&](const std::vector<CpuOpUtil> &info) { return int16_t(info[index].sys_utilization_ * cpu_processor_num); });
|
||||
|
||||
json per_op_info = {{"metrics", {{"user_utilization", user_util}, {"sys_utilization", sys_util}}},
|
||||
{"op_id", op_id}};
|
||||
OpWriter.emplace_back(per_op_info);
|
||||
index++;
|
||||
}
|
||||
output["op_info"] = OpWriter;
|
||||
|
||||
// Discard the content of the file when opening.
|
||||
std::ofstream os(file_path, std::ios::trunc);
|
||||
os << output;
|
||||
|
||||
MS_LOG(INFO) << "Save device CPU success.";
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ProcessCpu::ParseCpuInfo() {
|
||||
uint64_t total_stat_;
|
||||
RETURN_IF_NOT_OK(GetTotalCpuTime(&total_stat_));
|
||||
|
||||
if (!pre_fetched_state) {
|
||||
process_id.clear();
|
||||
pid_t main_pid = 0;
|
||||
#if defined(USING_LINUX)
|
||||
main_pid = syscall(SYS_getpid);
|
||||
#endif
|
||||
process_id.emplace_back(main_pid);
|
||||
op_process = BaseCpu::op_process_shared;
|
||||
fetched_all_process = BaseCpu::fetched_all_process_shared;
|
||||
for (auto item : op_process) {
|
||||
for (auto id : item.second) {
|
||||
process_id.emplace_back(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
float user_util = 0, sys_util = 0;
|
||||
for (auto pid : process_id) {
|
||||
std::string stat_path = "/proc/" + std::to_string(pid) + "/stat";
|
||||
|
||||
std::ifstream file(stat_path);
|
||||
if (!file.is_open()) {
|
||||
MS_LOG(WARNING) << "Open CPU file failed when collect CPU information";
|
||||
continue;
|
||||
}
|
||||
std::string str;
|
||||
getline(file, str);
|
||||
uint64_t user = 0, sys = 0;
|
||||
if (std::sscanf(str.c_str(), "%*d %*s %*s %*lu %*lu %*lu %*lu %*lu %*lu %*lu %*lu %*lu %*lu %lu %lu", &user,
|
||||
&sys) == EOF) {
|
||||
file.close();
|
||||
return Status(StatusCode::kUnexpectedError, "Get device CPU failed.");
|
||||
}
|
||||
file.close();
|
||||
|
||||
// Calculate the utilization from the second sampling
|
||||
if (!first_collect_ && (pre_process_stat_.find(pid) != pre_process_stat_.end())) {
|
||||
user_util += (user - pre_process_stat_[pid].user_stat_) * 1.0 / (total_stat_ - pre_total_stat_) * 100;
|
||||
sys_util += (sys - pre_process_stat_[pid].sys_stat_) * 1.0 / (total_stat_ - pre_total_stat_) * 100;
|
||||
}
|
||||
pre_process_stat_[pid].user_stat_ = user;
|
||||
pre_process_stat_[pid].sys_stat_ = sys;
|
||||
}
|
||||
if (!first_collect_) {
|
||||
CpuProcessUtil info;
|
||||
info.user_utilization_ = user_util;
|
||||
info.sys_utilization_ = sys_util;
|
||||
process_util_.emplace_back(info);
|
||||
}
|
||||
pre_total_stat_ = total_stat_;
|
||||
first_collect_ = false;
|
||||
pre_fetched_state = fetched_all_process;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ProcessCpu::GetTotalCpuTime(uint64_t *total_stat) {
|
||||
std::ifstream file("/proc/stat");
|
||||
if (!file.is_open()) {
|
||||
MS_LOG(WARNING) << "Open CPU file failed when collect CPU information";
|
||||
return Status::OK();
|
||||
}
|
||||
std::string str;
|
||||
getline(file, str);
|
||||
uint64_t user = 0, sys = 0, idle = 0, iowait = 0, nice = 0, irq = 0, softirq = 0;
|
||||
if (std::sscanf(str.c_str(), "%*s %lu %lu %lu %lu %lu %lu %lu", &user, &nice, &sys, &idle, &iowait, &irq, &softirq) ==
|
||||
EOF) {
|
||||
file.close();
|
||||
return Status(StatusCode::kUnexpectedError, "Get device CPU failed.");
|
||||
}
|
||||
file.close();
|
||||
*total_stat = user + nice + sys + idle + iowait + irq + softirq;
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ProcessCpu::Collect(ExecutionTree *tree) {
|
||||
if (first_collect_) {
|
||||
#if defined(USING_LINUX)
|
||||
cpu_processor_num = get_nprocs_conf();
|
||||
#endif
|
||||
}
|
||||
RETURN_IF_NOT_OK(ParseCpuInfo());
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ProcessCpu::SaveToFile(const std::string &file_path) {
|
||||
Path path = Path(file_path);
|
||||
json output;
|
||||
if (path.Exists()) {
|
||||
MS_LOG(DEBUG) << file_path << "already exist.";
|
||||
std::ifstream file(file_path);
|
||||
file >> output;
|
||||
} else {
|
||||
output["sampling_interval"] = GlobalContext::config_manager()->monitor_sampling_interval();
|
||||
}
|
||||
|
||||
std::vector<int16_t> user_util;
|
||||
std::transform(process_util_.begin(), process_util_.end(), std::back_inserter(user_util),
|
||||
[&](const CpuProcessUtil &info) { return uint16_t(info.user_utilization_ * cpu_processor_num); });
|
||||
std::vector<int16_t> sys_util;
|
||||
std::transform(process_util_.begin(), process_util_.end(), std::back_inserter(sys_util),
|
||||
[&](const CpuProcessUtil &info) { return uint16_t(info.sys_utilization_ * cpu_processor_num); });
|
||||
|
||||
output["process_info"] = {{"user_utilization", user_util}, {"sys_utilization", sys_util}};
|
||||
output["cpu_processor_num"] = cpu_processor_num;
|
||||
// Discard the content of the file when opening.
|
||||
std::ofstream os(file_path, std::ios::trunc);
|
||||
os << output;
|
||||
|
||||
MS_LOG(INFO) << "Save process CPU success.";
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status CpuSampling::CollectTimeStamp() {
|
||||
time_stamp_.emplace_back(ProfilingTime::GetCurMilliSecond());
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Sample action
|
||||
Status CpuSampling::Sample() {
|
||||
// Collect cpu information
|
||||
for (auto cpu : cpu_) {
|
||||
RETURN_IF_NOT_OK(cpu->Collect(this->tree_));
|
||||
}
|
||||
|
||||
// Collect time stamp
|
||||
RETURN_IF_NOT_OK(CollectTimeStamp());
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status CpuSampling::SaveTimeStampToFile() {
|
||||
// Save time stamp to json file
|
||||
// If the file is already exist, simply add the data to corresponding field.
|
||||
Path path = Path(file_path_);
|
||||
json output;
|
||||
if (path.Exists()) {
|
||||
std::ifstream file(file_path_);
|
||||
file >> output;
|
||||
}
|
||||
output["time_stamp"] = time_stamp_;
|
||||
std::ofstream os(file_path_, std::ios::trunc);
|
||||
os << output;
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status CpuSampling::SaveSamplingItervalToFile() {
|
||||
// If the file is already exist, simply add the data to corresponding field.
|
||||
Path path = Path(file_path_);
|
||||
json output;
|
||||
if (path.Exists()) {
|
||||
std::ifstream file(file_path_);
|
||||
file >> output;
|
||||
}
|
||||
output["sampling_interval"] = GlobalContext::config_manager()->monitor_sampling_interval();
|
||||
std::ofstream os(file_path_, std::ios::trunc);
|
||||
os << output;
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Save profiling data to file
|
||||
Status CpuSampling::SaveToFile() {
|
||||
// Save time stamp to json file
|
||||
RETURN_IF_NOT_OK(SaveTimeStampToFile());
|
||||
|
||||
// Save time stamp to json file
|
||||
RETURN_IF_NOT_OK(SaveSamplingItervalToFile());
|
||||
|
||||
// Save cpu information to json file
|
||||
for (auto cpu : cpu_) {
|
||||
RETURN_IF_NOT_OK(cpu->SaveToFile(file_path_));
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status CpuSampling::Init(const std::string &dir_path, const std::string &device_id) {
|
||||
file_path_ = (Path(dir_path) / Path("minddata_cpu_utilization_" + device_id + ".json")).toString();
|
||||
std::shared_ptr<DeviceCpu> device_cpu = std::make_shared<DeviceCpu>();
|
||||
std::shared_ptr<OperatorCpu> operator_cpu = std::make_shared<OperatorCpu>();
|
||||
std::shared_ptr<ProcessCpu> process_cpu = std::make_shared<ProcessCpu>();
|
||||
cpu_.push_back(device_cpu);
|
||||
cpu_.push_back(operator_cpu);
|
||||
cpu_.push_back(process_cpu);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status CpuSampling::ChangeFileMode() {
|
||||
if (chmod(common::SafeCStr(file_path_), S_IRUSR | S_IWUSR) == -1) {
|
||||
std::string err_str = "Change file mode failed," + file_path_;
|
||||
return Status(StatusCode::kUnexpectedError, err_str);
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
|
@ -0,0 +1,201 @@
|
|||
/**
|
||||
* Copyright 2021 Huawei Technologies Co., Ltd
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_CPU_SAMPLING_H
|
||||
#define MINDSPORE_CCSRC_MINDDATA_DATASET_CPU_SAMPLING_H
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <nlohmann/json.hpp>
|
||||
#include "minddata/dataset/engine/perf/profiling.h"
|
||||
#include "minddata/dataset/engine/datasetops/dataset_op.h"
|
||||
|
||||
namespace mindspore {
|
||||
namespace dataset {
|
||||
class ExecutionTree;
|
||||
|
||||
// CPU information from /proc/stat or /proc/pid/stat file
|
||||
typedef struct CpuStat_s {
|
||||
uint64_t user_stat_;
|
||||
uint64_t sys_stat_;
|
||||
uint64_t io_stat_;
|
||||
uint64_t idle_stat_;
|
||||
uint64_t total_stat_;
|
||||
} CpuStat;
|
||||
|
||||
// Cpu utilization
|
||||
typedef struct CpuInfo_s {
|
||||
uint8_t user_utilization_;
|
||||
uint8_t sys_utilization_;
|
||||
uint8_t io_utilization_;
|
||||
uint8_t idle_utilization_;
|
||||
} CpuUtil;
|
||||
|
||||
// CPU utilization of operator
|
||||
typedef struct CpuOpInfo_s {
|
||||
float user_utilization_;
|
||||
float sys_utilization_;
|
||||
int32_t op_id;
|
||||
} CpuOpUtil;
|
||||
|
||||
// CPU utilization of process
|
||||
typedef struct CpuProcessInfo_s {
|
||||
float user_utilization_;
|
||||
float sys_utilization_;
|
||||
} CpuProcessUtil;
|
||||
|
||||
// CPU stat of operator
|
||||
typedef struct CpuOpStat_s {
|
||||
uint64_t user_stat_;
|
||||
uint64_t sys_stat_;
|
||||
} CpuOpStat;
|
||||
|
||||
class BaseCpu {
|
||||
public:
|
||||
BaseCpu();
|
||||
~BaseCpu() = default;
|
||||
// Collect CPU information
|
||||
virtual Status Collect(ExecutionTree *tree) = 0;
|
||||
virtual Status SaveToFile(const std::string &file_path) = 0;
|
||||
|
||||
protected:
|
||||
std::vector<CpuUtil> cpu_util_;
|
||||
CpuStat pre_cpu_stat_;
|
||||
static bool fetched_all_process_shared;
|
||||
static std::unordered_map<int32_t, std::vector<pid_t>> op_process_shared;
|
||||
bool fetched_all_process = false;
|
||||
bool pre_fetched_state = false;
|
||||
std::unordered_map<int32_t, std::vector<pid_t>> op_process;
|
||||
int32_t cpu_processor_num;
|
||||
};
|
||||
|
||||
// Collect device CPU information
|
||||
class DeviceCpu : public BaseCpu {
|
||||
public:
|
||||
DeviceCpu() : pre_running_process_(0), pre_context_switch_count_(0), first_collect_(true) {}
|
||||
~DeviceCpu() = default;
|
||||
Status Collect(ExecutionTree *tree) override;
|
||||
Status SaveToFile(const std::string &file_path) override;
|
||||
|
||||
private:
|
||||
// Get CPU information, include use/sys/idle/io utilization
|
||||
Status ParseCpuInfo(const std::string &str);
|
||||
|
||||
// Get context switch count
|
||||
Status ParseCtxt(const std::string &str);
|
||||
|
||||
// Get running process count
|
||||
Status ParseRunningProcess(const std::string &str);
|
||||
|
||||
std::vector<uint32_t> running_process_;
|
||||
std::vector<uint64_t> context_switch_count_;
|
||||
uint32_t pre_running_process_;
|
||||
uint64_t pre_context_switch_count_;
|
||||
bool first_collect_;
|
||||
};
|
||||
|
||||
// Collect operator CPU information
|
||||
class OperatorCpu : public BaseCpu {
|
||||
public:
|
||||
OperatorCpu() : first_collect_(true) {}
|
||||
~OperatorCpu() = default;
|
||||
Status Collect(ExecutionTree *tree) override;
|
||||
Status SaveToFile(const std::string &file_path) override;
|
||||
|
||||
private:
|
||||
// Get cpu information, include use/sys/idle/io utilization
|
||||
Status ParseCpuInfo(int32_t op_id, int64_t thread_id,
|
||||
std::unordered_map<int32_t, std::unordered_map<int64_t, CpuOpStat>> *op_stat);
|
||||
|
||||
// Get the total CPU time of device
|
||||
Status GetTotalCpuTime(uint64_t *total_stat);
|
||||
|
||||
// Store the CPU utilization of each operator
|
||||
std::vector<std::vector<CpuOpUtil>> cpu_op_util_;
|
||||
|
||||
bool first_collect_;
|
||||
|
||||
// Store the id and its corresponding threads.
|
||||
std::unordered_map<int32_t, std::vector<pid_t>> op_thread;
|
||||
std::unordered_map<int32_t, std::unordered_map<int64_t, CpuOpStat>> pre_op_stat_;
|
||||
uint64_t pre_total_stat_;
|
||||
int32_t id_count = 0;
|
||||
};
|
||||
|
||||
// Collect operator CPU information
|
||||
class ProcessCpu : public BaseCpu {
|
||||
public:
|
||||
ProcessCpu() : first_collect_(true) {}
|
||||
~ProcessCpu() = default;
|
||||
Status Collect(ExecutionTree *tree) override;
|
||||
Status SaveToFile(const std::string &file_path) override;
|
||||
|
||||
private:
|
||||
// Get CPU information, include use/sys/idle/io utilization
|
||||
Status ParseCpuInfo();
|
||||
|
||||
// Get the total CPU time of device
|
||||
Status GetTotalCpuTime(uint64_t *total_stat);
|
||||
|
||||
bool first_collect_;
|
||||
std::vector<CpuProcessUtil> process_util_;
|
||||
uint64_t pre_total_stat_;
|
||||
std::unordered_map<int64_t, CpuOpStat> pre_process_stat_;
|
||||
std::vector<pid_t> process_id;
|
||||
};
|
||||
|
||||
// Sampling CPU information
|
||||
// It support JSON serialization for external usage.
|
||||
class CpuSampling : public Sampling {
|
||||
using TimeStamp = std::vector<uint32_t>;
|
||||
|
||||
public:
|
||||
explicit CpuSampling(ExecutionTree *tree) : tree_(tree) {}
|
||||
|
||||
~CpuSampling() = default;
|
||||
|
||||
// Driver function for CPU sampling.
|
||||
// This function samples the CPU information of device/process/op
|
||||
Status Sample() override;
|
||||
|
||||
std::string Name() const override { return kCpuSamplingName; }
|
||||
|
||||
// Save sampling data to file
|
||||
// @return Status - The error code return
|
||||
Status SaveToFile() override;
|
||||
|
||||
Status Init(const std::string &dir_path, const std::string &device_id) override;
|
||||
|
||||
// Change file mode after save CPU data
|
||||
Status ChangeFileMode() override;
|
||||
|
||||
private:
|
||||
Status CollectTimeStamp();
|
||||
|
||||
Status SaveTimeStampToFile();
|
||||
|
||||
Status SaveSamplingItervalToFile();
|
||||
|
||||
ExecutionTree *tree_ = nullptr; // ExecutionTree pointer
|
||||
std::vector<std::shared_ptr<BaseCpu>> cpu_; // CPU information of device/process/op
|
||||
TimeStamp time_stamp_; // Time stamp
|
||||
};
|
||||
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
||||
#endif // MINDSPORE_CCSRC_MINDDATA_DATASET_CPU_SAMPLING_H
|
|
@ -24,7 +24,7 @@ namespace mindspore {
|
|||
namespace dataset {
|
||||
|
||||
Status DatasetIteratorTracing::Record(const int32_t type, const int32_t extra_info, const int32_t batch_num,
|
||||
const int32_t value) {
|
||||
const int32_t value, const uint64_t time_stamp) {
|
||||
// Format: "type extra-info batch-num value"
|
||||
// type: 0: time, 1: connector size
|
||||
// extra-info: if type is 0 - 0: pipeline time, 1: push tdt time, 2: batch time
|
||||
|
@ -36,7 +36,7 @@ Status DatasetIteratorTracing::Record(const int32_t type, const int32_t extra_in
|
|||
// 0 0 20 10 - The 20th batch took 10ms to get data from pipeline.
|
||||
// 1 64 20 5 - Connector size is 5 when get the 20th batch.Connector capacity is 64.
|
||||
std::string data = std::to_string(type) + " " + std::to_string(extra_info) + " " + std::to_string(batch_num) + " " +
|
||||
std::to_string(value);
|
||||
std::to_string(value) + " " + std::to_string(time_stamp);
|
||||
value_.emplace_back(data);
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -33,7 +33,8 @@ class DatasetIteratorTracing : public Tracing {
|
|||
|
||||
// Record tracing data
|
||||
// @return Status The status code returned
|
||||
Status Record(const int32_t type, const int32_t extra_info, const int32_t batch_num, const int32_t value);
|
||||
Status Record(const int32_t type, const int32_t extra_info, const int32_t batch_num, const int32_t value,
|
||||
const uint64_t time_stamp);
|
||||
|
||||
std::string Name() const override { return kDatasetIteratorTracingName; };
|
||||
|
||||
|
|
|
@ -24,7 +24,7 @@ namespace mindspore {
|
|||
namespace dataset {
|
||||
|
||||
Status DeviceQueueTracing::Record(const int32_t type, const int32_t extra_info, const int32_t batch_num,
|
||||
const int32_t value) {
|
||||
const int32_t value, const uint64_t time_stamp) {
|
||||
// Format: "type extra-info batch-num value"
|
||||
// type: 0: time, 1: connector size
|
||||
// extra-info: if type is 0 - 0: pipeline time, 1: push tdt time, 2: batch time
|
||||
|
@ -32,11 +32,12 @@ Status DeviceQueueTracing::Record(const int32_t type, const int32_t extra_info,
|
|||
// batch-num: batch number
|
||||
// value: if type is 0 - value is time(ms)
|
||||
// if type is 1 - value is connector size
|
||||
// time-stamp: time stamp
|
||||
// Examples:
|
||||
// 0 0 20 10 - The 20th batch took 10ms to get data from pipeline.
|
||||
// 1 64 20 5 - Connector size is 5 when get the 20th batch.Connector capacity is 64.
|
||||
// 0 0 20 10 xxx- The 20th batch took 10ms to get data from pipeline.
|
||||
// 1 64 20 5 xxx- Connector size is 5 when get the 20th batch.Connector capacity is 64.
|
||||
std::string data = std::to_string(type) + " " + std::to_string(extra_info) + " " + std::to_string(batch_num) + " " +
|
||||
std::to_string(value);
|
||||
std::to_string(value) + " " + std::to_string(time_stamp);
|
||||
value_.emplace_back(data);
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -33,7 +33,8 @@ class DeviceQueueTracing : public Tracing {
|
|||
|
||||
// Record tracing data
|
||||
// @return Status The status code returned
|
||||
Status Record(const int32_t type, const int32_t extra_info, const int32_t batch_num, const int32_t value);
|
||||
Status Record(const int32_t type, const int32_t extra_info, const int32_t batch_num, const int32_t value,
|
||||
const uint64_t time_stamp);
|
||||
|
||||
std::string Name() const override { return kDeviceQueueTracingName; };
|
||||
|
||||
|
|
|
@ -24,6 +24,7 @@
|
|||
#include "minddata/dataset/engine/perf/device_queue_tracing.h"
|
||||
#include "minddata/dataset/engine/perf/connector_size.h"
|
||||
#include "minddata/dataset/engine/perf/connector_throughput.h"
|
||||
#include "minddata/dataset/engine/perf/cpu_sampling.h"
|
||||
#include "minddata/dataset/engine/perf/dataset_iterator_tracing.h"
|
||||
#include "minddata/dataset/util/log_adapter.h"
|
||||
|
||||
|
@ -79,6 +80,9 @@ Status ProfilingManager::Initialize() {
|
|||
std::shared_ptr<Sampling> connector_thr_sampling = std::make_shared<ConnectorThroughput>(tree_);
|
||||
RETURN_IF_NOT_OK(RegisterSamplingNode(connector_thr_sampling));
|
||||
|
||||
std::shared_ptr<Sampling> cpu_sampling = std::make_shared<CpuSampling>(tree_);
|
||||
RETURN_IF_NOT_OK(RegisterSamplingNode(cpu_sampling));
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -168,7 +172,7 @@ Status ProfilingManager::ChangeFileMode() {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
int64_t ProfilingTime::GetCurMilliSecond() {
|
||||
uint64_t ProfilingTime::GetCurMilliSecond() {
|
||||
// because cpplint does not allow using namespace
|
||||
using std::chrono::duration_cast;
|
||||
using std::chrono::milliseconds;
|
||||
|
|
|
@ -33,6 +33,7 @@ const char kDeviceQueueTracingName[] = "Device_Queue_Tracing";
|
|||
const char kDatasetIteratorTracingName[] = "Dataset_Iterator_Tracing";
|
||||
const char kConnectorSizeSamplingName[] = "Connector_Size_Sampling";
|
||||
const char kConnectorThroughputSamplingName[] = "Connector_Throughput_Sampling";
|
||||
const char kCpuSamplingName[] = "Cpu_Sampling";
|
||||
|
||||
// Profiling is a class of basic unit of profiling action
|
||||
// This base class encapsulate the serialization output logic
|
||||
|
@ -150,7 +151,7 @@ enum ProfilingTimeSubType {
|
|||
|
||||
class ProfilingTime {
|
||||
public:
|
||||
static int64_t GetCurMilliSecond();
|
||||
static uint64_t GetCurMilliSecond();
|
||||
};
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -246,8 +246,10 @@ Status TreeAdapter::GetNext(TensorRow *row) {
|
|||
RETURN_IF_NOT_OK(cur_db_->PopRow(row));
|
||||
// Record profiling info
|
||||
if (tracing_ != nullptr) {
|
||||
uint64_t end_time = ProfilingTime::GetCurMilliSecond();
|
||||
cur_batch_num_++;
|
||||
RETURN_IF_NOT_OK(tracing_->Record(CONNECTOR_DEPTH, cur_connector_capacity_, cur_batch_num_, cur_connector_size_));
|
||||
RETURN_IF_NOT_OK(
|
||||
tracing_->Record(CONNECTOR_DEPTH, cur_connector_capacity_, cur_batch_num_, cur_connector_size_, end_time));
|
||||
}
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
|
@ -44,8 +44,8 @@ void Task::operator()() {
|
|||
|
||||
#if !defined(_WIN32) && !defined(_WIN64) && !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__)
|
||||
native_handle_ = pthread_self();
|
||||
thread_id_ = syscall(SYS_gettid);
|
||||
#endif
|
||||
|
||||
try {
|
||||
// Previously there is a timing hole where the thread is spawn but hit error immediately before we can set
|
||||
// the TaskGroup pointer and register. We move the registration logic to here (after we spawn) so we can
|
||||
|
@ -105,8 +105,9 @@ Status Task::GetTaskErrorIfAny() const {
|
|||
}
|
||||
}
|
||||
|
||||
Task::Task(const std::string &myName, const std::function<Status()> &f)
|
||||
Task::Task(const std::string &myName, const std::function<Status()> &f, int32_t operator_id)
|
||||
: my_name_(myName),
|
||||
operator_id_(operator_id),
|
||||
rc_(),
|
||||
fnc_obj_(f),
|
||||
task_group_(nullptr),
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
|
||||
#if !defined(_WIN32) && !defined(_WIN64) && !defined(__ANDROID__) && !defined(ANDROID) && !defined(__APPLE__)
|
||||
#include <pthread.h>
|
||||
#include <sys/syscall.h>
|
||||
#endif
|
||||
#include <chrono>
|
||||
#include <exception>
|
||||
|
@ -48,7 +49,7 @@ class Task : public IntrpResource {
|
|||
|
||||
enum class WaitFlag : int { kBlocking, kNonBlocking };
|
||||
|
||||
Task(const std::string &myName, const std::function<Status()> &f);
|
||||
Task(const std::string &myName, const std::function<Status()> &f, int32_t operator_id = -1);
|
||||
|
||||
// Future objects are not copyable.
|
||||
Task(const Task &) = delete;
|
||||
|
@ -87,8 +88,12 @@ class Task : public IntrpResource {
|
|||
|
||||
std::thread::id get_id() { return id_; }
|
||||
|
||||
pid_t get_linux_id() { return thread_id_; }
|
||||
|
||||
std::string MyName() const { return my_name_; }
|
||||
|
||||
int32_t get_operator_id() { return operator_id_; }
|
||||
|
||||
// An operator used by std::find
|
||||
bool operator==(const Task &other) const { return (this == &other); }
|
||||
|
||||
|
@ -107,6 +112,8 @@ class Task : public IntrpResource {
|
|||
private:
|
||||
mutable std::mutex mux_;
|
||||
std::string my_name_;
|
||||
int32_t operator_id_;
|
||||
pid_t thread_id_;
|
||||
Status rc_;
|
||||
WaitPost wp_;
|
||||
// Task need to provide definition for this function. It
|
||||
|
|
|
@ -25,7 +25,7 @@ TaskManager *TaskManager::instance_ = nullptr;
|
|||
std::once_flag TaskManager::init_instance_flag_;
|
||||
// This takes the same parameter as Task constructor.
|
||||
Status TaskManager::CreateAsyncTask(const std::string &my_name, const std::function<Status()> &f, TaskGroup *vg,
|
||||
Task **task) {
|
||||
Task **task, int32_t operator_id) {
|
||||
// We need to block destructor coming otherwise we will deadlock. We will grab the
|
||||
// stateLock in shared allowing CreateAsyncTask to run concurrently.
|
||||
SharedLock stateLck(&state_lock_);
|
||||
|
@ -33,7 +33,7 @@ Status TaskManager::CreateAsyncTask(const std::string &my_name, const std::funct
|
|||
if (ServiceState() == STATE::kStopInProg || ServiceState() == STATE::kStopped) {
|
||||
return Status(StatusCode::kInterrupted, __LINE__, __FILE__, "TaskManager is shutting down");
|
||||
}
|
||||
RETURN_IF_NOT_OK(GetFreeTask(my_name, f, task));
|
||||
RETURN_IF_NOT_OK(GetFreeTask(my_name, f, task, operator_id));
|
||||
if (vg == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("TaskGroup is null");
|
||||
}
|
||||
|
@ -248,7 +248,8 @@ void TaskManager::ReturnFreeTask(Task *p) noexcept {
|
|||
}
|
||||
}
|
||||
|
||||
Status TaskManager::GetFreeTask(const std::string &my_name, const std::function<Status()> &f, Task **p) {
|
||||
Status TaskManager::GetFreeTask(const std::string &my_name, const std::function<Status()> &f, Task **p,
|
||||
int32_t operator_id) {
|
||||
if (p == nullptr) {
|
||||
RETURN_STATUS_UNEXPECTED("p is null");
|
||||
}
|
||||
|
@ -262,18 +263,19 @@ Status TaskManager::GetFreeTask(const std::string &my_name, const std::function<
|
|||
}
|
||||
}
|
||||
if (q) {
|
||||
new (q) Task(my_name, f);
|
||||
new (q) Task(my_name, f, operator_id);
|
||||
} else {
|
||||
std::shared_ptr<MemoryPool> mp = Services::GetInstance().GetServiceMemPool();
|
||||
Status rc;
|
||||
q = new (&rc, mp) Task(my_name, f);
|
||||
q = new (&rc, mp) Task(my_name, f, operator_id);
|
||||
RETURN_IF_NOT_OK(rc);
|
||||
}
|
||||
*p = q;
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status TaskGroup::CreateAsyncTask(const std::string &my_name, const std::function<Status()> &f, Task **ppTask) {
|
||||
Status TaskGroup::CreateAsyncTask(const std::string &my_name, const std::function<Status()> &f, Task **ppTask,
|
||||
int32_t operator_id) {
|
||||
auto pMytask = TaskManager::FindMe();
|
||||
// We need to block ~TaskGroup coming otherwise we will deadlock. We will grab the
|
||||
// stateLock in shared allowing CreateAsyncTask to run concurrently.
|
||||
|
@ -293,7 +295,7 @@ Status TaskGroup::CreateAsyncTask(const std::string &my_name, const std::functio
|
|||
return pMytask->IsMasterThread() ? rc_ : Status(StatusCode::kInterrupted);
|
||||
}
|
||||
}
|
||||
RETURN_IF_NOT_OK(dm.CreateAsyncTask(my_name, f, this, &pTask));
|
||||
RETURN_IF_NOT_OK(dm.CreateAsyncTask(my_name, f, this, &pTask, operator_id));
|
||||
if (ppTask) {
|
||||
*ppTask = pTask;
|
||||
}
|
||||
|
|
|
@ -75,7 +75,8 @@ class TaskManager : public Service {
|
|||
// API
|
||||
// This takes the same parameter as Task constructor. Take a look
|
||||
// of the test-thread.cc for usage.
|
||||
Status CreateAsyncTask(const std::string &my_name, const std::function<Status()> &f, TaskGroup *vg, Task **);
|
||||
Status CreateAsyncTask(const std::string &my_name, const std::function<Status()> &f, TaskGroup *vg, Task **,
|
||||
int32_t operator_id = -1);
|
||||
|
||||
// Same usage as boot thread group
|
||||
Status join_all();
|
||||
|
@ -100,7 +101,7 @@ class TaskManager : public Service {
|
|||
|
||||
void ReturnFreeTask(Task *p) noexcept;
|
||||
|
||||
Status GetFreeTask(const std::string &my_name, const std::function<Status()> &f, Task **p);
|
||||
Status GetFreeTask(const std::string &my_name, const std::function<Status()> &f, Task **p, int32_t operator_id = -1);
|
||||
|
||||
Status WatchDog();
|
||||
|
||||
|
@ -129,7 +130,8 @@ class TaskGroup : public Service {
|
|||
friend class Task;
|
||||
friend class TaskManager;
|
||||
|
||||
Status CreateAsyncTask(const std::string &my_name, const std::function<Status()> &f, Task **pTask = nullptr);
|
||||
Status CreateAsyncTask(const std::string &my_name, const std::function<Status()> &f, Task **pTask = nullptr,
|
||||
int32_t operator_id = -1);
|
||||
|
||||
void interrupt_all() noexcept;
|
||||
|
||||
|
@ -137,6 +139,8 @@ class TaskGroup : public Service {
|
|||
|
||||
int size() const noexcept { return grp_list_.count; }
|
||||
|
||||
List<Task> GetTask() const noexcept { return grp_list_; }
|
||||
|
||||
Status DoServiceStart() override { return Status::OK(); }
|
||||
|
||||
Status DoServiceStop() override;
|
||||
|
|
|
@ -107,6 +107,25 @@ def zip(datasets):
|
|||
return ZipDataset(datasets)
|
||||
|
||||
|
||||
def _get_operator_process():
|
||||
"""
|
||||
Inner implemented method, mainly for passing sub-process id in C layer
|
||||
|
||||
Returns:
|
||||
dict, mapping dict of operator id and corresponding process id.
|
||||
"""
|
||||
global _OP_PROCESS
|
||||
process_info = _OP_PROCESS
|
||||
op_process = dict()
|
||||
keys = process_info.keys()
|
||||
fetched_all = True
|
||||
for key in keys:
|
||||
op_process[key] = list(process_info[key][1])
|
||||
item_full = (len(process_info[key][1]) == process_info[key][0])
|
||||
fetched_all = fetched_all and item_full
|
||||
return op_process, fetched_all
|
||||
|
||||
|
||||
class Dataset:
|
||||
"""
|
||||
Abstract class to represent a dataset in DataEngine's data pipeline.
|
||||
|
@ -156,11 +175,47 @@ class Dataset:
|
|||
parent = self.parent
|
||||
self.parent = []
|
||||
dataset = copy.deepcopy(self)
|
||||
global _OP_NAME
|
||||
_OP_NAME = Dataset._get_operator_id(dataset)
|
||||
ir_tree = dataset.parse_tree()
|
||||
self.parent = parent
|
||||
_init_device_info()
|
||||
return ir_tree, dataset
|
||||
|
||||
@staticmethod
|
||||
def _get_operator_id(dataset):
|
||||
"""
|
||||
Internal method to iterate the tree and obtain op_id of each operator.
|
||||
|
||||
Returns:
|
||||
Dataset, the root dataset of the tree.
|
||||
"""
|
||||
op_name = dict()
|
||||
generator_process = dict()
|
||||
op_name[str(dataset)] = 0
|
||||
op_id = 1
|
||||
|
||||
def process_name(datasets, operator_id):
|
||||
if not datasets:
|
||||
return 0
|
||||
temp = []
|
||||
for item in datasets:
|
||||
for d in item.children:
|
||||
temp.append(d)
|
||||
op_name[str(d)] = operator_id
|
||||
if isinstance(d, GeneratorDataset) and d.sample_fn:
|
||||
if d.sample_fn.pid:
|
||||
generator_process[operator_id] = [d.num_parallel_workers, set(d.sample_fn.pid)]
|
||||
|
||||
operator_id = operator_id + 1
|
||||
return process_name(temp, operator_id)
|
||||
|
||||
process_name([dataset], op_id)
|
||||
if generator_process:
|
||||
global _OP_PROCESS
|
||||
_OP_PROCESS.update(generator_process)
|
||||
return op_name
|
||||
|
||||
def parse_tree(self):
|
||||
"""
|
||||
Internal method to parse the API tree into an IR tree.
|
||||
|
@ -2202,7 +2257,8 @@ class ShuffleDataset(Dataset):
|
|||
# Pyfunc collection for multiprocess pyfunc
|
||||
# This global variable will only be used within subprocesses
|
||||
_GLOBAL_PYFUNC_LIST = []
|
||||
|
||||
_OP_NAME = dict()
|
||||
_OP_PROCESS = dict()
|
||||
|
||||
# Pyfunc worker init function
|
||||
# Python multiprocessing library forbid sending lambda function through pipe.
|
||||
|
@ -2215,6 +2271,9 @@ def _pyfunc_worker_init(pyfunc_list):
|
|||
# Pyfunc worker execution function
|
||||
# All exceptions will be raised to main processes
|
||||
def _pyfunc_worker_exec(index, *args):
|
||||
"""
|
||||
Internal function for call certain pyfunc in python process.
|
||||
"""
|
||||
try:
|
||||
return _GLOBAL_PYFUNC_LIST[index](*args)
|
||||
except KeyboardInterrupt:
|
||||
|
@ -3509,6 +3568,7 @@ class SamplerFn:
|
|||
self.multi_process = multi_process
|
||||
self.joined = False
|
||||
self.ppid = os.getpid()
|
||||
self.pid = []
|
||||
# Event for end of epoch
|
||||
if multi_process is True:
|
||||
self.eof = multiprocessing.Event()
|
||||
|
@ -3523,6 +3583,7 @@ class SamplerFn:
|
|||
# which may cause deadlock. Therefore, the subprocess startup is performed in che initialization phase.
|
||||
# In this phase, the main process is not locked.
|
||||
worker.start()
|
||||
self.pid.append(worker.pid)
|
||||
else:
|
||||
worker = _GeneratorWorkerMt(dataset, self.eof)
|
||||
worker.daemon = True
|
||||
|
@ -3847,6 +3908,7 @@ class GeneratorDataset(MappableDataset):
|
|||
new_op.source_len = self.source_len
|
||||
new_op.saved_output_types = self.saved_output_types
|
||||
new_op.saved_output_shapes = self.saved_output_shapes
|
||||
sample_fn = None
|
||||
if hasattr(self, "__total_batch__"):
|
||||
new_op.__total_batch__ = self.__total_batch__
|
||||
if new_op.sampler is not None and hasattr(self.source, "__getitem__"):
|
||||
|
@ -3870,9 +3932,11 @@ class GeneratorDataset(MappableDataset):
|
|||
new_op.source = (lambda: _py_sampler_fn_mp(new_op.sample_ids, new_op.num_samples, sample_fn))
|
||||
else:
|
||||
new_op.source = (lambda: _py_sampler_fn(new_op.sample_ids, new_op.num_samples, self.source))
|
||||
new_op.sample_fn = sample_fn
|
||||
else:
|
||||
try:
|
||||
new_op.sampler = None
|
||||
new_op.sample_fn = sample_fn
|
||||
iter(self.source)
|
||||
except TypeError:
|
||||
# Use generator function if input callable
|
||||
|
|
Loading…
Reference in New Issue