forked from OSSInnovation/mindspore
Remove rows_per_buffer
This commit is contained in:
parent
c99fe1e412
commit
0d86cb1423
|
@ -42,7 +42,6 @@ PYBIND_REGISTER(ConfigManager, 0, ([](const py::module *m) {
|
|||
.def("get_numa_enable", &ConfigManager::numa_enable)
|
||||
.def("set_numa_enable", &ConfigManager::set_numa_enable)
|
||||
.def("get_op_connector_size", &ConfigManager::op_connector_size)
|
||||
.def("get_rows_per_buffer", &ConfigManager::rows_per_buffer)
|
||||
.def("get_seed", &ConfigManager::seed)
|
||||
.def("set_rank_id", &ConfigManager::set_rank_id)
|
||||
.def("get_worker_connector_size", &ConfigManager::worker_connector_size)
|
||||
|
@ -54,7 +53,6 @@ PYBIND_REGISTER(ConfigManager, 0, ([](const py::module *m) {
|
|||
.def("get_profiler_file_status", &ConfigManager::get_profiler_file_status)
|
||||
.def("set_num_parallel_workers", &ConfigManager::set_num_parallel_workers)
|
||||
.def("set_op_connector_size", &ConfigManager::set_op_connector_size)
|
||||
.def("set_rows_per_buffer", &ConfigManager::set_rows_per_buffer)
|
||||
.def("set_seed", &ConfigManager::set_seed)
|
||||
.def("set_worker_connector_size", &ConfigManager::set_worker_connector_size)
|
||||
.def("load", [](ConfigManager &c, std::string s) { THROW_IF_ERROR(c.LoadFile(s)); });
|
||||
|
|
|
@ -31,8 +31,7 @@
|
|||
namespace mindspore {
|
||||
namespace dataset {
|
||||
ConfigManager::ConfigManager()
|
||||
: rows_per_buffer_(kCfgRowsPerBuffer),
|
||||
num_parallel_workers_(kCfgParallelWorkers),
|
||||
: num_parallel_workers_(kCfgParallelWorkers),
|
||||
worker_connector_size_(kCfgWorkerConnectorSize),
|
||||
op_connector_size_(kCfgOpConnectorSize),
|
||||
rank_id_(kCfgDefaultRankId),
|
||||
|
@ -70,7 +69,6 @@ void ConfigManager::Print(std::ostream &out) const {
|
|||
// Don't show the test/internal ones. Only display the main ones here.
|
||||
// fyi, boolalpha tells the output stream to write "true" and "false" for bools
|
||||
out << "\nClient config settings :"
|
||||
<< "\nDataCache Rows per buffer : " << rows_per_buffer_
|
||||
<< "\nParallelOp workers : " << num_parallel_workers_
|
||||
<< "\nParallelOp worker connector size : " << worker_connector_size_
|
||||
<< "\nSize of each Connector : " << op_connector_size_ << std::endl;
|
||||
|
@ -78,7 +76,6 @@ void ConfigManager::Print(std::ostream &out) const {
|
|||
|
||||
// Private helper function that takes a nlohmann json format and populates the settings
|
||||
Status ConfigManager::FromJson(const nlohmann::json &j) {
|
||||
set_rows_per_buffer(j.value("rowsPerBuffer", rows_per_buffer_));
|
||||
set_num_parallel_workers(j.value("numParallelWorkers", num_parallel_workers_));
|
||||
set_worker_connector_size(j.value("workerConnectorSize", worker_connector_size_));
|
||||
set_op_connector_size(j.value("opConnectorSize", op_connector_size_));
|
||||
|
@ -115,9 +112,6 @@ Status ConfigManager::LoadFile(const std::string &settingsFile) {
|
|||
return rc;
|
||||
}
|
||||
|
||||
// Setter function
|
||||
void ConfigManager::set_rows_per_buffer(int32_t rows_per_buffer) { rows_per_buffer_ = rows_per_buffer; }
|
||||
|
||||
// Setter function
|
||||
void ConfigManager::set_num_parallel_workers(int32_t num_parallel_workers) {
|
||||
num_parallel_workers_ = num_parallel_workers;
|
||||
|
|
|
@ -74,10 +74,6 @@ class ConfigManager {
|
|||
// @return Status error code
|
||||
Status LoadFile(const std::string &settingsFile);
|
||||
|
||||
// getter function
|
||||
// @return The rows per buffer setting
|
||||
int32_t rows_per_buffer() const { return rows_per_buffer_; }
|
||||
|
||||
// getter function
|
||||
// @return The number of workers setting
|
||||
int32_t num_parallel_workers() const { return num_parallel_workers_; }
|
||||
|
@ -112,10 +108,6 @@ class ConfigManager {
|
|||
/// \return auto_num_workers_
|
||||
bool auto_num_workers() const { return auto_num_workers_; }
|
||||
|
||||
// setter function
|
||||
// @param rows_per_buffer - The setting to apply to the config
|
||||
void set_rows_per_buffer(int32_t rows_per_buffer);
|
||||
|
||||
// setter function
|
||||
// @param num_parallel_workers - The setting to apply to the config
|
||||
void set_num_parallel_workers(int32_t num_parallel_workers);
|
||||
|
@ -230,7 +222,6 @@ class ConfigManager {
|
|||
void set_auto_worker_config_(uint8_t cfg) { auto_worker_config_ = cfg; }
|
||||
|
||||
private:
|
||||
int32_t rows_per_buffer_;
|
||||
int32_t num_parallel_workers_;
|
||||
int32_t worker_connector_size_;
|
||||
int32_t op_connector_size_;
|
||||
|
|
|
@ -35,7 +35,7 @@ TensorRow::TensorRow(row_id_type id, const std::initializer_list<value_type> &ls
|
|||
TensorRow::TensorRow(const TensorRow &tr)
|
||||
: id_(tr.id_), path_(tr.path_), row_(tr.row_), tensor_row_flag_(tr.tensor_row_flag_) {}
|
||||
|
||||
TensorRow::TensorRow(TensorRow::TensorRowFlags flag) : tensor_row_flag_(flag) {}
|
||||
TensorRow::TensorRow(TensorRow::TensorRowFlags flag) : id_(kDefaultRowId), path_({}), tensor_row_flag_(flag) {}
|
||||
|
||||
TensorRow &TensorRow::operator=(const TensorRow &tr) {
|
||||
if (this == &tr) {
|
||||
|
|
|
@ -540,8 +540,7 @@ Status CachePerfRun::Run() {
|
|||
int64_t elapse_time = std::chrono::duration_cast<std::chrono::seconds>(end_tick - start_tick).count();
|
||||
std::cout << "Epoch one (build phase) elapsed time " << elapse_time << " seconds" << std::endl;
|
||||
|
||||
std::cout << "Epoch one (build phase) per pipeline per worker summary. Buffer size = " << cfg_.rows_per_buffer()
|
||||
<< std::endl;
|
||||
std::cout << "Epoch one (build phase) per pipeline per worker summary." << std::endl;
|
||||
PrintEpochSummary();
|
||||
|
||||
// Get some stat but we need to connect. The server will thinks it is the (n+1) pipeline
|
||||
|
|
|
@ -228,17 +228,14 @@ Status CachePipelineRun::RunFirstEpoch() {
|
|||
}
|
||||
|
||||
std::vector<row_id_type> keys;
|
||||
auto rows_per_buffer = cfg_.rows_per_buffer();
|
||||
keys.reserve(rows_per_buffer);
|
||||
keys.reserve(1);
|
||||
int32_t worker_id = 0;
|
||||
for (auto i = start_row_; i <= end_row_; ++i) {
|
||||
keys.push_back(i);
|
||||
if (keys.size() == rows_per_buffer) {
|
||||
auto blk = std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone));
|
||||
RETURN_IF_NOT_OK(io_block_queues_[worker_id++ % num_workers]->Add(std::move(blk)));
|
||||
keys.clear();
|
||||
}
|
||||
}
|
||||
if (!keys.empty()) {
|
||||
auto blk = std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone));
|
||||
RETURN_IF_NOT_OK(io_block_queues_[worker_id++ % num_workers]->Add(std::move(blk)));
|
||||
|
@ -355,9 +352,8 @@ Status CachePipelineRun::WriterWorkerEntry(int32_t worker_id) {
|
|||
|
||||
Status CachePipelineRun::RunReadEpoch() {
|
||||
std::vector<row_id_type> keys;
|
||||
auto rows_per_buffer = cc_->GetPrefetchSize(); // We will use prefetch size to read.
|
||||
auto num_workers = cfg_.num_parallel_workers();
|
||||
keys.reserve(rows_per_buffer);
|
||||
keys.reserve(1);
|
||||
// Spawn workers
|
||||
auto f = std::bind(&CachePipelineRun::ReaderWorkerEntry, this, std::placeholders::_1);
|
||||
std::vector<Task *> worker_threads;
|
||||
|
@ -381,12 +377,10 @@ Status CachePipelineRun::RunReadEpoch() {
|
|||
int32_t worker_id = 0;
|
||||
for (auto id : all_keys) {
|
||||
keys.push_back(id);
|
||||
if (keys.size() == rows_per_buffer) {
|
||||
auto blk = std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone));
|
||||
RETURN_IF_NOT_OK(io_block_queues_[worker_id++ % num_workers]->Add(std::move(blk)));
|
||||
keys.clear();
|
||||
}
|
||||
}
|
||||
if (!keys.empty()) {
|
||||
auto blk = std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone));
|
||||
RETURN_IF_NOT_OK(io_block_queues_[worker_id++ % num_workers]->Add(std::move(blk)));
|
||||
|
|
|
@ -31,7 +31,6 @@ BarrierOp::Builder::Builder() {
|
|||
// using the various builder set methods.
|
||||
|
||||
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
|
||||
builder_rows_per_buffer_ = cfg->rows_per_buffer();
|
||||
builder_op_connector_size_ = cfg->op_connector_size();
|
||||
}
|
||||
|
||||
|
@ -39,17 +38,13 @@ Status BarrierOp::Builder::SanityCheck() const { return Status::OK(); }
|
|||
|
||||
Status BarrierOp::Builder::Build(std::shared_ptr<BarrierOp> *ptr) {
|
||||
RETURN_IF_NOT_OK(SanityCheck());
|
||||
*ptr = std::make_shared<BarrierOp>(builder_rows_per_buffer_, builder_op_connector_size_, builder_condition_name_,
|
||||
builder_condition_func_);
|
||||
*ptr = std::make_shared<BarrierOp>(builder_op_connector_size_, builder_condition_name_, builder_condition_func_);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Construct BarrierOp here, local variables initialized in operator due to tree construction restrictions
|
||||
BarrierOp::BarrierOp(int32_t rows_per_buffer, int32_t op_connector_size, const std::string &condition_name,
|
||||
py::function condition_func)
|
||||
BarrierOp::BarrierOp(int32_t op_connector_size, const std::string &condition_name, py::function condition_func)
|
||||
: PipelineOp(op_connector_size),
|
||||
rows_per_buffer_(rows_per_buffer),
|
||||
buffer_id_(0),
|
||||
clean_up_(false),
|
||||
eof_(false),
|
||||
condition_name_(condition_name),
|
||||
|
|
|
@ -98,16 +98,13 @@ class BarrierOp : public PipelineOp {
|
|||
};
|
||||
|
||||
// Constructor for BarrierOp
|
||||
// @param rows_per_buffer - number of rows in output buffer
|
||||
// @param op_connector_size - connector size
|
||||
// @param condition_name - the condition name associated with this operator
|
||||
// @param condition_func - the blocking condition check per row
|
||||
// @note - currently rows_per_buffer should = 1 for barrier.
|
||||
// The reason for this is having other values would complicate how the pipeline behaves with other operators
|
||||
// One example of such case is having batch after barrier. Batch would be waiting for data and having
|
||||
// rows per buffer in this case can result in hanging
|
||||
BarrierOp(int32_t rows_per_buffer, int32_t op_connector_size, const std::string &condition_name,
|
||||
py::function condition_func);
|
||||
BarrierOp(int32_t op_connector_size, const std::string &condition_name, py::function condition_func);
|
||||
|
||||
// Destructor
|
||||
~BarrierOp();
|
||||
|
@ -156,10 +153,6 @@ class BarrierOp : public PipelineOp {
|
|||
bool clean_up_;
|
||||
// end of file state, we stop reading data and shut down
|
||||
bool eof_;
|
||||
// rows per buffer
|
||||
int32_t rows_per_buffer_;
|
||||
// buffer_id
|
||||
int32_t buffer_id_;
|
||||
// iterator to pull new rows, we only have one child
|
||||
std::unique_ptr<ChildIterator> child_iterator_;
|
||||
// condition name, to support multiple barriers
|
||||
|
|
|
@ -248,7 +248,7 @@ Status BatchOp::WorkerEntry(int32_t workerId) {
|
|||
RETURN_IF_NOT_OK(out_connector_->SendEOF(workerId));
|
||||
} else if (table_pair.second.ctrl_ == batchCtrl::kNoCtrl) {
|
||||
TensorRow new_row;
|
||||
RETURN_IF_NOT_OK(MakeBatchedBuffer(std::move(table_pair), &new_row));
|
||||
RETURN_IF_NOT_OK(MakeBatchedRow(std::move(table_pair), &new_row));
|
||||
RETURN_IF_NOT_OK(out_connector_->Add(std::move(new_row), workerId));
|
||||
}
|
||||
RETURN_IF_NOT_OK(worker_queues_[workerId]->PopFront(&table_pair));
|
||||
|
@ -256,7 +256,7 @@ Status BatchOp::WorkerEntry(int32_t workerId) {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status BatchOp::MakeBatchedBuffer(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair, TensorRow *new_row) {
|
||||
Status BatchOp::MakeBatchedRow(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair, TensorRow *new_row) {
|
||||
RETURN_UNEXPECTED_IF_NULL(table_pair.first);
|
||||
#ifdef ENABLE_PYTHON
|
||||
if (!in_col_names_.empty()) RETURN_IF_NOT_OK(MapColumns(&table_pair)); // pass it through pyfunc
|
||||
|
|
|
@ -225,7 +225,7 @@ class BatchOp : public ParallelOp {
|
|||
|
||||
// Generate buffer with batched tensors
|
||||
// @return Status The status code returned
|
||||
Status MakeBatchedBuffer(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair, TensorRow *new_row);
|
||||
Status MakeBatchedRow(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair, TensorRow *new_row);
|
||||
|
||||
#ifdef ENABLE_PYTHON
|
||||
// Function that calls pyfunc to perform map on batch
|
||||
|
|
|
@ -45,14 +45,13 @@ Status CacheBase::Reset() {
|
|||
MS_LOG(DEBUG) << Name() << " performing a self-reset.";
|
||||
return Status::OK();
|
||||
}
|
||||
CacheBase::CacheBase(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf,
|
||||
std::shared_ptr<CacheClient> cache_client, std::shared_ptr<SamplerRT> sampler)
|
||||
CacheBase::CacheBase(int32_t num_workers, int32_t op_connector_size, std::shared_ptr<CacheClient> cache_client,
|
||||
std::shared_ptr<SamplerRT> sampler)
|
||||
: ParallelOp(num_workers, op_connector_size, std::move(sampler)),
|
||||
row_cnt_(0),
|
||||
num_cache_miss_(0),
|
||||
cache_client_(std::move(cache_client)),
|
||||
rows_per_buffer_(rows_per_buf),
|
||||
prefetch_size_(rows_per_buffer_),
|
||||
prefetch_size_(1),
|
||||
num_prefetchers_(num_workers_) {
|
||||
// Adjust the prefetch size based on the number of workers.
|
||||
auto prefetch_sz_per_thread = cache_client_->GetPrefetchSize() / num_prefetchers_;
|
||||
|
@ -92,7 +91,7 @@ Status CacheBase::FetchSamplesToWorkers() {
|
|||
row_cnt_ = 0;
|
||||
++wait_cnt;
|
||||
std::vector<row_id_type> keys;
|
||||
keys.reserve(rows_per_buffer_);
|
||||
keys.reserve(1);
|
||||
std::vector<row_id_type> prefetch_keys;
|
||||
prefetch_keys.reserve(prefetch_size_);
|
||||
std::unique_ptr<DataBuffer> sampler_buffer;
|
||||
|
@ -107,16 +106,12 @@ Status CacheBase::FetchSamplesToWorkers() {
|
|||
// Batch enough rows for performance reason.
|
||||
if (row_cnt_ % prefetch_size_ == 0) {
|
||||
RETURN_IF_NOT_OK(send_to_que(prefetch_queues_, prefetch_cnt++ % num_prefetchers_, prefetch_keys));
|
||||
// Now we tell the WorkerEntry to wait for them to come back. If prefetch_size_ is a multiple
|
||||
// of rows_per_buffer_, the keys vector will always be empty. But it can be partially filled.
|
||||
// The only requirement we set up is rows_per_buffer_ is less than or equal to prefetch_size_.
|
||||
// Now we tell the WorkerEntry to wait for them to come back.
|
||||
for (auto row_id : prefetch_keys) {
|
||||
keys.push_back(row_id);
|
||||
if (keys.size() == rows_per_buffer_) {
|
||||
RETURN_IF_NOT_OK(send_to_que(io_block_queues_, buf_cnt++ % num_workers_, keys));
|
||||
keys.clear();
|
||||
}
|
||||
}
|
||||
prefetch_keys.clear();
|
||||
}
|
||||
}
|
||||
|
@ -127,12 +122,10 @@ Status CacheBase::FetchSamplesToWorkers() {
|
|||
RETURN_IF_NOT_OK(send_to_que(prefetch_queues_, prefetch_cnt++ % num_prefetchers_, prefetch_keys));
|
||||
for (auto row_id : prefetch_keys) {
|
||||
keys.push_back(row_id);
|
||||
if (keys.size() == rows_per_buffer_) {
|
||||
RETURN_IF_NOT_OK(send_to_que(io_block_queues_, buf_cnt++ % num_workers_, keys));
|
||||
keys.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!keys.empty()) {
|
||||
RETURN_IF_NOT_OK(send_to_que(io_block_queues_, buf_cnt++ % num_workers_, keys));
|
||||
}
|
||||
|
|
|
@ -42,11 +42,10 @@ class CacheBase : public ParallelOp {
|
|||
/// \brief Base class constructor
|
||||
/// \param num_workers Number of parallel workers
|
||||
/// \param op_connector_size Connector size
|
||||
/// \param rows_per_buf Number of rows per buffer
|
||||
/// \param cache_client CacheClient for communication to the CacheServer
|
||||
/// \param sampler Sampler which is mandatory
|
||||
CacheBase(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf,
|
||||
std::shared_ptr<CacheClient> cache_client, std::shared_ptr<SamplerRT> sampler);
|
||||
CacheBase(int32_t num_workers, int32_t op_connector_size, std::shared_ptr<CacheClient> cache_client,
|
||||
std::shared_ptr<SamplerRT> sampler);
|
||||
/// \brief Destructor
|
||||
~CacheBase();
|
||||
|
||||
|
@ -87,7 +86,6 @@ class CacheBase : public ParallelOp {
|
|||
int64_t row_cnt_;
|
||||
std::atomic<int64_t> num_cache_miss_;
|
||||
std::shared_ptr<CacheClient> cache_client_;
|
||||
int32_t rows_per_buffer_;
|
||||
std::unique_ptr<Connector<std::vector<row_id_type>>> keys_miss_;
|
||||
|
||||
/// \brief Common function to register resources for interrupt
|
||||
|
|
|
@ -31,7 +31,6 @@ namespace dataset {
|
|||
CacheLookupOp::Builder::Builder() : build_cache_client_(nullptr), build_sampler_(nullptr) {
|
||||
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
|
||||
build_num_workers_ = cfg->num_parallel_workers();
|
||||
rows_per_buffer_ = cfg->rows_per_buffer();
|
||||
build_op_connector_size_ = cfg->op_connector_size();
|
||||
}
|
||||
|
||||
|
@ -52,8 +51,8 @@ Status CacheLookupOp::Builder::SanityCheck() const {
|
|||
// The builder "build" method creates the final object and does some init on it
|
||||
Status CacheLookupOp::Builder::Build(std::shared_ptr<CacheLookupOp> *ptr) {
|
||||
RETURN_IF_NOT_OK(SanityCheck());
|
||||
*ptr = std::make_shared<CacheLookupOp>(build_num_workers_, build_op_connector_size_, rows_per_buffer_,
|
||||
build_cache_client_, build_sampler_);
|
||||
*ptr =
|
||||
std::make_shared<CacheLookupOp>(build_num_workers_, build_op_connector_size_, build_cache_client_, build_sampler_);
|
||||
return Status::OK();
|
||||
}
|
||||
Status CacheLookupOp::operator()() {
|
||||
|
|
|
@ -74,7 +74,6 @@ class CacheLookupOp : public CacheBase, public SamplerRT {
|
|||
|
||||
private:
|
||||
int32_t build_num_workers_;
|
||||
int32_t rows_per_buffer_;
|
||||
int32_t build_op_connector_size_;
|
||||
std::shared_ptr<CacheClient> build_cache_client_;
|
||||
std::shared_ptr<SamplerRT> build_sampler_;
|
||||
|
@ -86,9 +85,9 @@ class CacheLookupOp : public CacheBase, public SamplerRT {
|
|||
/// \brief Constructor
|
||||
/// \note It takes the same argument as the base class.
|
||||
/// \see CacheBase
|
||||
CacheLookupOp(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf,
|
||||
std::shared_ptr<CacheClient> cache_client, std::shared_ptr<SamplerRT> sampler)
|
||||
: CacheBase(num_workers, op_connector_size, rows_per_buf, cache_client, sampler), SamplerRT(*(sampler.get())) {}
|
||||
CacheLookupOp(int32_t num_workers, int32_t op_connector_size, std::shared_ptr<CacheClient> cache_client,
|
||||
std::shared_ptr<SamplerRT> sampler)
|
||||
: CacheBase(num_workers, op_connector_size, cache_client, sampler), SamplerRT(*(sampler.get())) {}
|
||||
~CacheLookupOp() = default;
|
||||
// As a parallel op, we override these two functions
|
||||
Status operator()() override;
|
||||
|
|
|
@ -33,7 +33,6 @@ namespace dataset {
|
|||
CacheOp::Builder::Builder() : build_cache_client_(nullptr), build_sampler_(nullptr) {
|
||||
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
|
||||
build_num_workers_ = cfg->num_parallel_workers();
|
||||
rows_per_buffer_ = cfg->rows_per_buffer();
|
||||
build_op_connector_size_ = cfg->op_connector_size();
|
||||
}
|
||||
|
||||
|
@ -54,17 +53,16 @@ Status CacheOp::Builder::SanityCheck() const {
|
|||
// The builder "build" method creates the final object and does some init on it
|
||||
Status CacheOp::Builder::Build(std::shared_ptr<CacheOp> *ptr) {
|
||||
RETURN_IF_NOT_OK(SanityCheck());
|
||||
*ptr = std::make_shared<CacheOp>(build_num_workers_, build_op_connector_size_, rows_per_buffer_, build_cache_client_,
|
||||
build_sampler_);
|
||||
*ptr = std::make_shared<CacheOp>(build_num_workers_, build_op_connector_size_, build_cache_client_, build_sampler_);
|
||||
RETURN_IF_NOT_OK((*ptr)->InitCache());
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Constructor of CacheOp
|
||||
CacheOp::CacheOp(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf,
|
||||
std::shared_ptr<CacheClient> cache_client, std::shared_ptr<SamplerRT> sampler)
|
||||
: CacheBase(num_workers, op_connector_size, rows_per_buf, std::move(cache_client), std::move(sampler)),
|
||||
CacheOp::CacheOp(int32_t num_workers, int32_t op_connector_size, std::shared_ptr<CacheClient> cache_client,
|
||||
std::shared_ptr<SamplerRT> sampler)
|
||||
: CacheBase(num_workers, op_connector_size, std::move(cache_client), std::move(sampler)),
|
||||
num_guys_in_(0),
|
||||
phase_(Phase::kBuildPhase) {}
|
||||
|
||||
|
|
|
@ -70,14 +70,6 @@ class CacheOp : public CacheBase, public RandomAccessOp {
|
|||
return *this;
|
||||
}
|
||||
|
||||
/// \brief Setter method
|
||||
/// \param rows_per_buffer
|
||||
/// \return Builder setter method returns reference to the builder.
|
||||
Builder &SetRowsPerBuffer(int32_t rows_per_buffer) {
|
||||
rows_per_buffer_ = rows_per_buffer;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// \brief Setter method
|
||||
/// \param sampler
|
||||
/// \return Builder setter method returns reference to the builder.
|
||||
|
@ -93,7 +85,6 @@ class CacheOp : public CacheBase, public RandomAccessOp {
|
|||
|
||||
private:
|
||||
int32_t build_num_workers_;
|
||||
int32_t rows_per_buffer_;
|
||||
int32_t build_op_connector_size_;
|
||||
std::shared_ptr<CacheClient> build_cache_client_;
|
||||
std::shared_ptr<SamplerRT> build_sampler_;
|
||||
|
@ -107,8 +98,8 @@ class CacheOp : public CacheBase, public RandomAccessOp {
|
|||
/// \note The builder class should be used to call it.
|
||||
/// \param num_workers The number of worker threads.
|
||||
/// \param op_connector_size The size of each queue in the connector.
|
||||
CacheOp(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf,
|
||||
std::shared_ptr<CacheClient> cache_client, std::shared_ptr<SamplerRT> sampler);
|
||||
CacheOp(int32_t num_workers, int32_t op_connector_size, std::shared_ptr<CacheClient> cache_client,
|
||||
std::shared_ptr<SamplerRT> sampler);
|
||||
|
||||
// Destructor
|
||||
~CacheOp();
|
||||
|
|
|
@ -41,7 +41,6 @@ constexpr int32_t ShuffleOp::kShuffleStateDrain;
|
|||
ShuffleOp::Builder::Builder() : build_shuffle_size_(0), build_reshuffle_each_epoch_(true) {
|
||||
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
|
||||
build_op_connector_size_ = cfg->op_connector_size();
|
||||
build_rows_per_buffer_ = cfg->rows_per_buffer();
|
||||
build_shuffle_seed_ = GetSeed();
|
||||
}
|
||||
|
||||
|
@ -56,20 +55,17 @@ Status ShuffleOp::Builder::SanityCheck() const {
|
|||
Status ShuffleOp::Builder::Build(std::shared_ptr<ShuffleOp> *ptr) {
|
||||
RETURN_IF_NOT_OK(SanityCheck());
|
||||
*ptr = std::make_shared<ShuffleOp>(build_shuffle_size_, build_shuffle_seed_, build_op_connector_size_,
|
||||
build_reshuffle_each_epoch_, build_rows_per_buffer_);
|
||||
build_reshuffle_each_epoch_);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Constructor of the ShuffleOp
|
||||
ShuffleOp::ShuffleOp(int32_t shuffle_size, uint32_t shuffle_seed, int32_t op_connector_size, bool reset_every_epoch,
|
||||
int32_t rows_per_buffer)
|
||||
ShuffleOp::ShuffleOp(int32_t shuffle_size, uint32_t shuffle_seed, int32_t op_connector_size, bool reset_every_epoch)
|
||||
: PipelineOp(op_connector_size),
|
||||
shuffle_size_(shuffle_size),
|
||||
shuffle_seed_(shuffle_seed),
|
||||
reshuffle_each_epoch_(reset_every_epoch),
|
||||
rng_(shuffle_seed),
|
||||
buffer_counter_(0),
|
||||
rows_per_buffer_(rows_per_buffer),
|
||||
shuffle_buffer_(std::make_unique<TensorTable>()),
|
||||
shuffle_last_row_idx_(0),
|
||||
shuffle_buffer_state_(kShuffleStateInit) {}
|
||||
|
@ -87,7 +83,6 @@ Status ShuffleOp::SelfReset() {
|
|||
}
|
||||
|
||||
shuffle_buffer_ = std::make_unique<TensorTable>();
|
||||
buffer_counter_ = 0;
|
||||
shuffle_last_row_idx_ = 0;
|
||||
shuffle_buffer_state_ = kShuffleStateInit;
|
||||
return Status::OK();
|
||||
|
@ -104,8 +99,8 @@ void ShuffleOp::Print(std::ostream &out, bool show_all) const {
|
|||
// Call the super class for displaying any common detailed info
|
||||
PipelineOp::Print(out, show_all);
|
||||
// Then show any custom derived-internal stuff
|
||||
out << "\nShuffle size: " << shuffle_size_ << "\nRows per buffer: " << rows_per_buffer_
|
||||
<< "\nShuffle buffer state: " << shuffle_buffer_state_ << "\nShuffle seed: " << shuffle_seed_ << "\n\n";
|
||||
out << "\nShuffle size: " << shuffle_size_ << "\nShuffle buffer state: " << shuffle_buffer_state_
|
||||
<< "\nShuffle seed: " << shuffle_seed_ << "\n\n";
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -121,9 +121,7 @@ class ShuffleOp : public PipelineOp {
|
|||
// @param shuffle_size - The size for the shuffle buffer
|
||||
// @param shuffle_seed - The seed to use for random number generation
|
||||
// @param op_connector_size - The output connector queue size
|
||||
// @param rows_per_buffer - The requested number of rows per buffer
|
||||
ShuffleOp(int32_t shuffle_size, uint32_t shuffle_seed, int32_t op_connector_size, bool reset_every_epoch,
|
||||
int32_t rows_per_buffer);
|
||||
ShuffleOp(int32_t shuffle_size, uint32_t shuffle_seed, int32_t op_connector_size, bool reset_every_epoch);
|
||||
|
||||
// Destructor
|
||||
~ShuffleOp() = default;
|
||||
|
@ -183,8 +181,6 @@ class ShuffleOp : public PipelineOp {
|
|||
// (ie uniform_int_distribution) because we will need to create up to |dataset| instances
|
||||
// of the distribution object in the common case of a perfect shuffle
|
||||
std::mt19937_64 rng_;
|
||||
int32_t buffer_counter_; // For creating new buffer id's
|
||||
int32_t rows_per_buffer_; // Number of rows to pack into output buffer
|
||||
// A single (potentially large) buffer of tensor rows for performing shuffling.
|
||||
std::unique_ptr<TensorTable> shuffle_buffer_;
|
||||
int32_t shuffle_last_row_idx_; // Internal tracking of the last slot of our shuffle buffer
|
||||
|
|
|
@ -32,7 +32,6 @@ namespace dataset {
|
|||
AlbumOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) {
|
||||
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
|
||||
builder_num_workers_ = cfg->num_parallel_workers();
|
||||
builder_rows_per_buffer_ = cfg->rows_per_buffer();
|
||||
builder_op_connector_size_ = cfg->op_connector_size();
|
||||
}
|
||||
|
||||
|
@ -52,9 +51,8 @@ Status AlbumOp::Builder::Build(std::shared_ptr<AlbumOp> *ptr) {
|
|||
MS_LOG(INFO) << "Schema file provided: " << builder_schema_file_ << ".";
|
||||
builder_schema_->LoadSchemaFile(builder_schema_file_, builder_columns_to_load_);
|
||||
}
|
||||
*ptr = std::make_shared<AlbumOp>(builder_num_workers_, builder_rows_per_buffer_, builder_dir_,
|
||||
builder_op_connector_size_, builder_decode_, builder_extensions_,
|
||||
std::move(builder_schema_), std::move(builder_sampler_));
|
||||
*ptr = std::make_shared<AlbumOp>(builder_num_workers_, builder_dir_, builder_op_connector_size_, builder_decode_,
|
||||
builder_extensions_, std::move(builder_schema_), std::move(builder_sampler_));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -69,10 +67,10 @@ Status AlbumOp::Builder::SanityCheck() {
|
|||
return err_msg.empty() ? Status::OK() : Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg);
|
||||
}
|
||||
|
||||
AlbumOp::AlbumOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size, bool do_decode,
|
||||
AlbumOp::AlbumOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, bool do_decode,
|
||||
const std::set<std::string> &exts, std::unique_ptr<DataSchema> data_schema,
|
||||
std::shared_ptr<SamplerRT> sampler)
|
||||
: MappableLeafOp(num_wkrs, queue_size, std::move(sampler), rows_per_buffer),
|
||||
: MappableLeafOp(num_wkrs, queue_size, std::move(sampler)),
|
||||
folder_path_(file_dir),
|
||||
decode_(do_decode),
|
||||
extensions_(exts),
|
||||
|
|
|
@ -58,14 +58,6 @@ class AlbumOp : public MappableLeafOp {
|
|||
/// \brief Destructor.
|
||||
~Builder() = default;
|
||||
|
||||
/// \brief Setter method
|
||||
/// \param[in] rows_per_buffer
|
||||
/// \return Builder setter method returns reference to the builder
|
||||
Builder &SetRowsPerBuffer(int32_t rows_per_buffer) {
|
||||
builder_rows_per_buffer_ = rows_per_buffer;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/// \brief Setter method
|
||||
/// \param[in] size
|
||||
/// \return Builder setter method returns reference to the builder
|
||||
|
@ -154,16 +146,14 @@ class AlbumOp : public MappableLeafOp {
|
|||
|
||||
/// \brief Constructor
|
||||
/// \param[in] num_wkrs - Num of workers reading images in parallel
|
||||
/// \param[in] rows_per_buffer Number of images (rows) in each buffer
|
||||
/// \param[in] file_dir - directory of Album
|
||||
/// \param[in] queue_size - connector size
|
||||
/// \param[in] do_decode - decode image files
|
||||
/// \param[in] exts - set of file extensions to read, if empty, read everything under the dir
|
||||
/// \param[in] data_schema - schema of dataset
|
||||
/// \param[in] sampler - sampler tells AlbumOp what to read
|
||||
AlbumOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size, bool do_decode,
|
||||
const std::set<std::string> &exts, std::unique_ptr<DataSchema> data_schema,
|
||||
std::shared_ptr<SamplerRT> sampler);
|
||||
AlbumOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, bool do_decode, const std::set<std::string> &exts,
|
||||
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler);
|
||||
|
||||
/// \brief Destructor.
|
||||
~AlbumOp() = default;
|
||||
|
@ -273,7 +263,6 @@ class AlbumOp : public MappableLeafOp {
|
|||
/// \return Status The status code returned
|
||||
Status ComputeColMap() override;
|
||||
|
||||
int32_t rows_per_buffer_;
|
||||
std::string folder_path_; // directory of image folder
|
||||
bool decode_;
|
||||
std::set<std::string> extensions_; // extensions allowed
|
||||
|
|
|
@ -34,7 +34,6 @@ namespace dataset {
|
|||
CelebAOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) {
|
||||
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
|
||||
builder_num_workers_ = cfg->num_parallel_workers();
|
||||
builder_rows_per_buffer_ = cfg->rows_per_buffer();
|
||||
builder_op_connector_size_ = cfg->op_connector_size();
|
||||
}
|
||||
|
||||
|
@ -54,9 +53,9 @@ Status CelebAOp::Builder::Build(std::shared_ptr<CelebAOp> *op) {
|
|||
// label is like this:0 1 0 0 1......
|
||||
RETURN_IF_NOT_OK(
|
||||
builder_schema_->AddColumn(ColDescriptor("attr", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
|
||||
*op = std::make_shared<CelebAOp>(builder_num_workers_, builder_rows_per_buffer_, builder_dir_,
|
||||
builder_op_connector_size_, builder_decode_, builder_usage_, builder_extensions_,
|
||||
std::move(builder_schema_), std::move(builder_sampler_));
|
||||
*op = std::make_shared<CelebAOp>(builder_num_workers_, builder_dir_, builder_op_connector_size_, builder_decode_,
|
||||
builder_usage_, builder_extensions_, std::move(builder_schema_),
|
||||
std::move(builder_sampler_));
|
||||
if (*op == nullptr) {
|
||||
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "CelebAOp init failed.");
|
||||
}
|
||||
|
@ -76,10 +75,10 @@ Status CelebAOp::Builder::SanityCheck() {
|
|||
return err_msg.empty() ? Status::OK() : Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg);
|
||||
}
|
||||
|
||||
CelebAOp::CelebAOp(int32_t num_workers, int32_t rows_per_buffer, const std::string &dir, int32_t queue_size,
|
||||
bool decode, const std::string &usage, const std::set<std::string> &exts,
|
||||
std::unique_ptr<DataSchema> schema, std::shared_ptr<SamplerRT> sampler)
|
||||
: MappableLeafOp(num_workers, queue_size, std::move(sampler), rows_per_buffer),
|
||||
CelebAOp::CelebAOp(int32_t num_workers, const std::string &dir, int32_t queue_size, bool decode,
|
||||
const std::string &usage, const std::set<std::string> &exts, std::unique_ptr<DataSchema> schema,
|
||||
std::shared_ptr<SamplerRT> sampler)
|
||||
: MappableLeafOp(num_workers, queue_size, std::move(sampler)),
|
||||
folder_path_(dir),
|
||||
decode_(decode),
|
||||
extensions_(exts),
|
||||
|
|
|
@ -53,14 +53,6 @@ class CelebAOp : public MappableLeafOp {
|
|||
// Destructor.
|
||||
~Builder() = default;
|
||||
|
||||
// Setter method
|
||||
// @param int32_t rows_per_buffer
|
||||
// @return Builder setter method returns reference to the builder.
|
||||
Builder &SetRowsPerBuffer(int32_t rows_per_buffer) {
|
||||
builder_rows_per_buffer_ = rows_per_buffer;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Setter method
|
||||
// @param int32_t size
|
||||
// @return Builder setter method returns reference to the builder.
|
||||
|
@ -139,13 +131,11 @@ class CelebAOp : public MappableLeafOp {
|
|||
|
||||
// Constructor
|
||||
// @param int32_t - num_workers - Num of workers reading images in parallel
|
||||
// @param int32_t - rows_per_buffer Number of images (rows) in each buffer
|
||||
// @param std::string - dir directory of celeba dataset
|
||||
// @param int32_t queueSize - connector queue size
|
||||
// @param std::unique_ptr<Sampler> sampler - sampler tells CelebAOp what to read
|
||||
CelebAOp(int32_t num_workers, int32_t rows_per_buffer, const std::string &dir, int32_t queue_size, bool decode,
|
||||
const std::string &usage, const std::set<std::string> &exts, std::unique_ptr<DataSchema> schema,
|
||||
std::shared_ptr<SamplerRT> sampler);
|
||||
CelebAOp(int32_t num_workers, const std::string &dir, int32_t queue_size, bool decode, const std::string &usage,
|
||||
const std::set<std::string> &exts, std::unique_ptr<DataSchema> schema, std::shared_ptr<SamplerRT> sampler);
|
||||
|
||||
~CelebAOp() override = default;
|
||||
|
||||
|
|
|
@ -39,7 +39,6 @@ constexpr uint32_t kCifarImageSize = kCifarImageHeight * kCifarImageWidth * kCif
|
|||
CifarOp::Builder::Builder() : sampler_(nullptr), usage_("") {
|
||||
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
|
||||
num_workers_ = cfg->num_parallel_workers();
|
||||
rows_per_buffer_ = cfg->rows_per_buffer();
|
||||
op_connect_size_ = cfg->op_connector_size();
|
||||
cifar_type_ = kCifar10;
|
||||
}
|
||||
|
@ -65,8 +64,8 @@ Status CifarOp::Builder::Build(std::shared_ptr<CifarOp> *ptr) {
|
|||
ColDescriptor("fine_label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &another_scalar)));
|
||||
}
|
||||
|
||||
*ptr = std::make_shared<CifarOp>(cifar_type_, usage_, num_workers_, rows_per_buffer_, dir_, op_connect_size_,
|
||||
std::move(schema_), std::move(sampler_));
|
||||
*ptr = std::make_shared<CifarOp>(cifar_type_, usage_, num_workers_, dir_, op_connect_size_, std::move(schema_),
|
||||
std::move(sampler_));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -85,10 +84,9 @@ Status CifarOp::Builder::SanityCheck() {
|
|||
return err_msg.empty() ? Status::OK() : Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg);
|
||||
}
|
||||
|
||||
CifarOp::CifarOp(CifarType type, const std::string &usage, int32_t num_works, int32_t rows_per_buf,
|
||||
const std::string &file_dir, int32_t queue_size, std::unique_ptr<DataSchema> data_schema,
|
||||
std::shared_ptr<SamplerRT> sampler)
|
||||
: MappableLeafOp(num_works, queue_size, std::move(sampler), rows_per_buf),
|
||||
CifarOp::CifarOp(CifarType type, const std::string &usage, int32_t num_works, const std::string &file_dir,
|
||||
int32_t queue_size, std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler)
|
||||
: MappableLeafOp(num_works, queue_size, std::move(sampler)),
|
||||
cifar_type_(type),
|
||||
usage_(usage),
|
||||
folder_path_(file_dir),
|
||||
|
|
|
@ -49,14 +49,6 @@ class CifarOp : public MappableLeafOp {
|
|||
// Destructor.
|
||||
~Builder() = default;
|
||||
|
||||
// Setter method
|
||||
// @param uint32_t rows_per_buffer
|
||||
// @return Builder setter method returns reference to the builder.
|
||||
Builder &SetRowsPerBuffer(int32_t rows_per_buffer) {
|
||||
rows_per_buffer_ = rows_per_buffer;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Setter method
|
||||
// @param uint32_t size
|
||||
// @return Builder setter method returns reference to the builder.
|
||||
|
@ -122,7 +114,6 @@ class CifarOp : public MappableLeafOp {
|
|||
std::string dir_;
|
||||
std::string usage_;
|
||||
int32_t num_workers_;
|
||||
int32_t rows_per_buffer_;
|
||||
int32_t op_connect_size_;
|
||||
std::shared_ptr<SamplerRT> sampler_;
|
||||
std::unique_ptr<DataSchema> schema_;
|
||||
|
@ -133,13 +124,11 @@ class CifarOp : public MappableLeafOp {
|
|||
// @param CifarType type - Cifar10 or Cifar100
|
||||
// @param const std::string &usage - Usage of this dataset, can be 'train', 'test' or 'all'
|
||||
// @param uint32_t numWorks - Num of workers reading images in parallel
|
||||
// @param uint32_t - rowsPerBuffer Number of images (rows) in each buffer
|
||||
// @param std::string - dir directory of cifar dataset
|
||||
// @param uint32_t - queueSize - connector queue size
|
||||
// @param std::unique_ptr<Sampler> sampler - sampler tells ImageFolderOp what to read
|
||||
CifarOp(CifarType type, const std::string &usage, int32_t num_works, int32_t rows_per_buf,
|
||||
const std::string &file_dir, int32_t queue_size, std::unique_ptr<DataSchema> data_schema,
|
||||
std::shared_ptr<SamplerRT> sampler);
|
||||
CifarOp(CifarType type, const std::string &usage, int32_t num_works, const std::string &file_dir, int32_t queue_size,
|
||||
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler);
|
||||
// Destructor.
|
||||
~CifarOp() = default;
|
||||
|
||||
|
|
|
@ -36,7 +36,6 @@ ClueOp::Builder::Builder()
|
|||
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
|
||||
builder_num_workers_ = config_manager->num_parallel_workers();
|
||||
builder_op_connector_size_ = config_manager->op_connector_size();
|
||||
builder_rows_per_buffer_ = config_manager->rows_per_buffer();
|
||||
builder_worker_connector_size_ = config_manager->worker_connector_size();
|
||||
}
|
||||
|
||||
|
@ -67,9 +66,8 @@ Status ClueOp::Builder::Build(std::shared_ptr<ClueOp> *op) {
|
|||
}
|
||||
|
||||
std::shared_ptr<ClueOp> clue_op = std::make_shared<ClueOp>(
|
||||
builder_num_workers_, builder_rows_per_buffer_, builder_num_samples_, builder_worker_connector_size_, ck_map,
|
||||
builder_clue_files_list_, builder_op_connector_size_, builder_shuffle_files_, builder_num_devices_,
|
||||
builder_device_id_);
|
||||
builder_num_workers_, builder_num_samples_, builder_worker_connector_size_, ck_map, builder_clue_files_list_,
|
||||
builder_op_connector_size_, builder_shuffle_files_, builder_num_devices_, builder_device_id_);
|
||||
RETURN_IF_NOT_OK(clue_op->Init());
|
||||
*op = std::move(clue_op);
|
||||
|
||||
|
@ -87,11 +85,11 @@ std::vector<std::string> ClueOp::Builder::split(const std::string &s, char delim
|
|||
return res;
|
||||
}
|
||||
|
||||
ClueOp::ClueOp(int32_t num_workers, int64_t rows_per_buffer, int64_t num_samples, int32_t worker_connector_size,
|
||||
ColKeyMap cols_to_keyword, std::vector<std::string> clue_files_list, int32_t op_connector_size,
|
||||
bool shuffle_files, int32_t num_devices, int32_t device_id)
|
||||
: NonMappableLeafOp(num_workers, worker_connector_size, rows_per_buffer, num_samples, op_connector_size,
|
||||
shuffle_files, num_devices, device_id),
|
||||
ClueOp::ClueOp(int32_t num_workers, int64_t num_samples, int32_t worker_connector_size, ColKeyMap cols_to_keyword,
|
||||
std::vector<std::string> clue_files_list, int32_t op_connector_size, bool shuffle_files,
|
||||
int32_t num_devices, int32_t device_id)
|
||||
: NonMappableLeafOp(num_workers, worker_connector_size, num_samples, op_connector_size, shuffle_files, num_devices,
|
||||
device_id),
|
||||
clue_files_list_(std::move(clue_files_list)),
|
||||
cols_to_keyword_(cols_to_keyword) {}
|
||||
|
||||
|
@ -200,8 +198,7 @@ void ClueOp::Print(std::ostream &out, bool show_all) const {
|
|||
// Call the super class for displaying any common detailed info
|
||||
ParallelOp::Print(out, show_all);
|
||||
// Then show any custom derived-internal stuff
|
||||
out << "\nRows per buffer: " << rows_per_buffer_ << "\nSample count: " << total_rows_
|
||||
<< "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_
|
||||
out << "\nSample count: " << total_rows_ << "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_
|
||||
<< "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no") << "\nClue files list:\n";
|
||||
for (int i = 0; i < clue_files_list_.size(); ++i) {
|
||||
out << " " << clue_files_list_[i];
|
||||
|
|
|
@ -138,9 +138,9 @@ class ClueOp : public NonMappableLeafOp {
|
|||
};
|
||||
|
||||
// Constructor of ClueOp
|
||||
ClueOp(int32_t num_workers, int64_t rows_per_buffer, int64_t num_samples, int32_t worker_connector_size,
|
||||
ColKeyMap cols_to_keyword, std::vector<std::string> clue_files_list, int32_t op_connector_size,
|
||||
bool shuffle_files, int32_t num_devices, int32_t device_id);
|
||||
ClueOp(int32_t num_workers, int64_t num_samples, int32_t worker_connector_size, ColKeyMap cols_to_keyword,
|
||||
std::vector<std::string> clue_files_list, int32_t op_connector_size, bool shuffle_files, int32_t num_devices,
|
||||
int32_t device_id);
|
||||
|
||||
// Default destructor
|
||||
~ClueOp() = default;
|
||||
|
|
|
@ -50,7 +50,6 @@ const unsigned int kPadValueZero = 0;
|
|||
CocoOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) {
|
||||
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
|
||||
builder_num_workers_ = cfg->num_parallel_workers();
|
||||
builder_rows_per_buffer_ = cfg->rows_per_buffer();
|
||||
builder_op_connector_size_ = cfg->op_connector_size();
|
||||
builder_task_type_ = TaskType::Detection;
|
||||
}
|
||||
|
@ -100,8 +99,8 @@ Status CocoOp::Builder::Build(std::shared_ptr<CocoOp> *ptr) {
|
|||
RETURN_STATUS_UNEXPECTED("Invalid parameter, task type should be Detection, Stuff, Keypoint or Panoptic.");
|
||||
}
|
||||
*ptr = std::make_shared<CocoOp>(builder_task_type_, builder_dir_, builder_file_, builder_num_workers_,
|
||||
builder_rows_per_buffer_, builder_op_connector_size_, builder_decode_,
|
||||
std::move(builder_schema_), std::move(builder_sampler_));
|
||||
builder_op_connector_size_, builder_decode_, std::move(builder_schema_),
|
||||
std::move(builder_sampler_));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -122,9 +121,9 @@ Status CocoOp::Builder::SanityCheck() {
|
|||
}
|
||||
|
||||
CocoOp::CocoOp(const TaskType &task_type, const std::string &image_folder_path, const std::string &annotation_path,
|
||||
int32_t num_workers, int32_t rows_per_buffer, int32_t queue_size, bool decode,
|
||||
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler)
|
||||
: MappableLeafOp(num_workers, queue_size, std::move(sampler), rows_per_buffer),
|
||||
int32_t num_workers, int32_t queue_size, bool decode, std::unique_ptr<DataSchema> data_schema,
|
||||
std::shared_ptr<SamplerRT> sampler)
|
||||
: MappableLeafOp(num_workers, queue_size, std::move(sampler)),
|
||||
decode_(decode),
|
||||
task_type_(task_type),
|
||||
image_folder_path_(image_folder_path),
|
||||
|
|
|
@ -109,14 +109,6 @@ class CocoOp : public MappableLeafOp {
|
|||
return *this;
|
||||
}
|
||||
|
||||
// Setter method.
|
||||
// @param int32_t rows_per_buffer
|
||||
// @return Builder setter method returns reference to the builder.
|
||||
Builder &SetRowsPerBuffer(int32_t rows_per_buffer) {
|
||||
builder_rows_per_buffer_ = rows_per_buffer;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Setter method.
|
||||
// @param std::shared_ptr<Sampler> sampler
|
||||
// @return Builder setter method returns reference to the builder.
|
||||
|
@ -159,15 +151,14 @@ class CocoOp : public MappableLeafOp {
|
|||
// @param std::string image_folder_path - image folder path of Coco
|
||||
// @param std::string annotation_path - annotation json path of Coco
|
||||
// @param int32_t num_workers - number of workers reading images in parallel
|
||||
// @param int32_t rows_per_buffer - number of images (rows) in each buffer
|
||||
// @param int32_t queue_size - connector queue size
|
||||
// @param int64_t num_samples - number of samples to read
|
||||
// @param bool decode - whether to decode images
|
||||
// @param std::unique_ptr<DataSchema> data_schema - the schema of the Coco dataset
|
||||
// @param std::shared_ptr<Sampler> sampler - sampler tells CocoOp what to read
|
||||
CocoOp(const TaskType &task_type, const std::string &image_folder_path, const std::string &annotation_path,
|
||||
int32_t num_workers, int32_t rows_per_buffer, int32_t queue_size, bool decode,
|
||||
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler);
|
||||
int32_t num_workers, int32_t queue_size, bool decode, std::unique_ptr<DataSchema> data_schema,
|
||||
std::shared_ptr<SamplerRT> sampler);
|
||||
|
||||
// Destructor
|
||||
~CocoOp() = default;
|
||||
|
|
|
@ -32,7 +32,6 @@ CsvOp::Builder::Builder()
|
|||
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
|
||||
builder_num_workers_ = config_manager->num_parallel_workers();
|
||||
builder_op_connector_size_ = config_manager->op_connector_size();
|
||||
builder_rows_per_buffer_ = config_manager->rows_per_buffer();
|
||||
builder_worker_connector_size_ = config_manager->worker_connector_size();
|
||||
}
|
||||
|
||||
|
@ -59,8 +58,8 @@ Status CsvOp::Builder::Build(std::shared_ptr<CsvOp> *op) {
|
|||
|
||||
std::shared_ptr<CsvOp> csv_op = std::make_shared<CsvOp>(
|
||||
builder_csv_files_list_, builder_field_delim_, builder_column_default_list_, builder_column_name_list_,
|
||||
builder_num_workers_, builder_rows_per_buffer_, builder_num_samples_, builder_worker_connector_size_,
|
||||
builder_op_connector_size_, builder_shuffle_files_, builder_num_devices_, builder_device_id_);
|
||||
builder_num_workers_, builder_num_samples_, builder_worker_connector_size_, builder_op_connector_size_,
|
||||
builder_shuffle_files_, builder_num_devices_, builder_device_id_);
|
||||
RETURN_IF_NOT_OK(csv_op->Init());
|
||||
*op = std::move(csv_op);
|
||||
|
||||
|
@ -69,11 +68,11 @@ Status CsvOp::Builder::Build(std::shared_ptr<CsvOp> *op) {
|
|||
|
||||
CsvOp::CsvOp(const std::vector<std::string> &csv_files_list, char field_delim,
|
||||
const std::vector<std::shared_ptr<BaseRecord>> &column_default,
|
||||
const std::vector<std::string> &column_name, int32_t num_workers, int64_t rows_per_buffer,
|
||||
int64_t num_samples, int32_t worker_connector_size, int32_t op_connector_size, bool shuffle_files,
|
||||
int32_t num_devices, int32_t device_id)
|
||||
: NonMappableLeafOp(num_workers, worker_connector_size, rows_per_buffer, num_samples, op_connector_size,
|
||||
shuffle_files, num_devices, device_id),
|
||||
const std::vector<std::string> &column_name, int32_t num_workers, int64_t num_samples,
|
||||
int32_t worker_connector_size, int32_t op_connector_size, bool shuffle_files, int32_t num_devices,
|
||||
int32_t device_id)
|
||||
: NonMappableLeafOp(num_workers, worker_connector_size, num_samples, op_connector_size, shuffle_files, num_devices,
|
||||
device_id),
|
||||
csv_files_list_(std::move(csv_files_list)),
|
||||
field_delim_(field_delim),
|
||||
column_default_list_(column_default),
|
||||
|
@ -91,11 +90,10 @@ Status CsvOp::Init() {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
CsvOp::CsvParser::CsvParser(int32_t worker_id, JaggedConnector *connector, int64_t rows_per_buffer, char field_delim,
|
||||
CsvOp::CsvParser::CsvParser(int32_t worker_id, JaggedConnector *connector, char field_delim,
|
||||
std::vector<std::shared_ptr<CsvOp::BaseRecord>> column_default, std::string file_path)
|
||||
: worker_id_(worker_id),
|
||||
buffer_connector_(connector),
|
||||
csv_rows_per_buffer_(rows_per_buffer),
|
||||
csv_field_delim_(field_delim),
|
||||
column_default_(column_default),
|
||||
file_path_(file_path),
|
||||
|
@ -469,8 +467,7 @@ Status CsvOp::CsvParser::InitCsvParser() {
|
|||
}
|
||||
|
||||
Status CsvOp::LoadFile(const std::string &file, int64_t start_offset, int64_t end_offset, int32_t worker_id) {
|
||||
CsvParser csv_parser(worker_id, jagged_buffer_connector_.get(), rows_per_buffer_, field_delim_, column_default_list_,
|
||||
file);
|
||||
CsvParser csv_parser(worker_id, jagged_buffer_connector_.get(), field_delim_, column_default_list_, file);
|
||||
csv_parser.SetStartOffset(start_offset);
|
||||
csv_parser.SetEndOffset(end_offset);
|
||||
std::ifstream ifs;
|
||||
|
@ -516,8 +513,7 @@ void CsvOp::Print(std::ostream &out, bool show_all) const {
|
|||
// Call the super class for displaying any common detailed info
|
||||
ParallelOp::Print(out, show_all);
|
||||
// Then show any custom derived-internal stuff
|
||||
out << "\nRows per buffer: " << rows_per_buffer_ << "\nSample count: " << total_rows_
|
||||
<< "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_
|
||||
out << "\nSample count: " << total_rows_ << "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_
|
||||
<< "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no") << "\nCsv files list:\n";
|
||||
for (int i = 0; i < csv_files_list_.size(); ++i) {
|
||||
out << " " << csv_files_list_[i];
|
||||
|
@ -592,7 +588,7 @@ Status CsvOp::CalculateNumRowsPerShard() {
|
|||
}
|
||||
|
||||
int64_t CsvOp::CountTotalRows(const std::string &file) {
|
||||
CsvParser csv_parser(0, jagged_buffer_connector_.get(), rows_per_buffer_, field_delim_, column_default_list_, file);
|
||||
CsvParser csv_parser(0, jagged_buffer_connector_.get(), field_delim_, column_default_list_, file);
|
||||
std::ifstream ifs;
|
||||
ifs.open(file, std::ifstream::in);
|
||||
if (!ifs.is_open()) {
|
||||
|
|
|
@ -65,7 +65,7 @@ class CsvOp : public NonMappableLeafOp {
|
|||
public:
|
||||
CsvParser() = delete;
|
||||
|
||||
CsvParser(int32_t worker_id, JaggedConnector *connector, int64_t rows_per_buffer, char field_delim,
|
||||
CsvParser(int32_t worker_id, JaggedConnector *connector, char field_delim,
|
||||
std::vector<std::shared_ptr<CsvOp::BaseRecord>> column_default, std::string file_path);
|
||||
|
||||
~CsvParser() = default;
|
||||
|
@ -128,7 +128,6 @@ class CsvOp : public NonMappableLeafOp {
|
|||
|
||||
int32_t worker_id_;
|
||||
JaggedConnector *buffer_connector_;
|
||||
int64_t csv_rows_per_buffer_;
|
||||
const char csv_field_delim_;
|
||||
std::vector<std::shared_ptr<CsvOp::BaseRecord>> column_default_;
|
||||
State cur_state_;
|
||||
|
@ -261,8 +260,8 @@ class CsvOp : public NonMappableLeafOp {
|
|||
|
||||
CsvOp(const std::vector<std::string> &csv_files_list, char field_delim,
|
||||
const std::vector<std::shared_ptr<BaseRecord>> &column_default, const std::vector<std::string> &column_name,
|
||||
int32_t num_workers, int64_t rows_per_buffer, int64_t num_samples, int32_t worker_connector_size,
|
||||
int32_t op_connector_size, bool shuffle_files, int32_t num_devices, int32_t device_id);
|
||||
int32_t num_workers, int64_t num_samples, int32_t worker_connector_size, int32_t op_connector_size,
|
||||
bool shuffle_files, int32_t num_devices, int32_t device_id);
|
||||
|
||||
// Default destructor
|
||||
~CsvOp() = default;
|
||||
|
|
|
@ -28,7 +28,6 @@ namespace dataset {
|
|||
ImageFolderOp::Builder::Builder() : builder_decode_(false), builder_recursive_(false), builder_sampler_(nullptr) {
|
||||
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
|
||||
builder_num_workers_ = cfg->num_parallel_workers();
|
||||
builder_rows_per_buffer_ = cfg->rows_per_buffer();
|
||||
builder_op_connector_size_ = cfg->op_connector_size();
|
||||
}
|
||||
|
||||
|
@ -45,10 +44,9 @@ Status ImageFolderOp::Builder::Build(std::shared_ptr<ImageFolderOp> *ptr) {
|
|||
builder_schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
|
||||
RETURN_IF_NOT_OK(builder_schema_->AddColumn(
|
||||
ColDescriptor("label", DataType(DataType::DE_INT32), TensorImpl::kFlexible, 0, &scalar)));
|
||||
*ptr = std::make_shared<ImageFolderOp>(builder_num_workers_, builder_rows_per_buffer_, builder_dir_,
|
||||
builder_op_connector_size_, builder_recursive_, builder_decode_,
|
||||
builder_extensions_, builder_labels_to_read_, std::move(builder_schema_),
|
||||
std::move(builder_sampler_));
|
||||
*ptr = std::make_shared<ImageFolderOp>(
|
||||
builder_num_workers_, builder_dir_, builder_op_connector_size_, builder_recursive_, builder_decode_,
|
||||
builder_extensions_, builder_labels_to_read_, std::move(builder_schema_), std::move(builder_sampler_));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -64,11 +62,10 @@ Status ImageFolderOp::Builder::SanityCheck() {
|
|||
return err_msg.empty() ? Status::OK() : Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg);
|
||||
}
|
||||
|
||||
ImageFolderOp::ImageFolderOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size,
|
||||
bool recursive, bool do_decode, const std::set<std::string> &exts,
|
||||
const std::map<std::string, int32_t> &map, std::unique_ptr<DataSchema> data_schema,
|
||||
std::shared_ptr<SamplerRT> sampler)
|
||||
: MappableLeafOp(num_wkrs, queue_size, std::move(sampler), rows_per_buffer),
|
||||
ImageFolderOp::ImageFolderOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, bool recursive, bool do_decode,
|
||||
const std::set<std::string> &exts, const std::map<std::string, int32_t> &map,
|
||||
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler)
|
||||
: MappableLeafOp(num_wkrs, queue_size, std::move(sampler)),
|
||||
folder_path_(file_dir),
|
||||
recursive_(recursive),
|
||||
decode_(do_decode),
|
||||
|
|
|
@ -63,14 +63,6 @@ class ImageFolderOp : public MappableLeafOp {
|
|||
// Destructor.
|
||||
~Builder() = default;
|
||||
|
||||
// Setter method
|
||||
// @param int32_t rows_per_buffer
|
||||
// @return Builder setter method returns reference to the builder.
|
||||
Builder &SetRowsPerBuffer(int32_t rows_per_buffer) {
|
||||
builder_rows_per_buffer_ = rows_per_buffer;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Setter method
|
||||
// @param int32_t size
|
||||
// @return Builder setter method returns reference to the builder.
|
||||
|
@ -159,13 +151,12 @@ class ImageFolderOp : public MappableLeafOp {
|
|||
|
||||
// Constructor
|
||||
// @param int32_t num_wkrs - Num of workers reading images in parallel
|
||||
// @param int32_t - rows_per_buffer Number of images (rows) in each buffer
|
||||
// @param std::string - dir directory of ImageNetFolder
|
||||
// @param int32_t queue_size - connector queue size
|
||||
// @param std::set<std::string> exts - set of file extensions to read, if empty, read everything under the dir
|
||||
// @param td::unique_ptr<Sampler> sampler - sampler tells ImageFolderOp what to read
|
||||
ImageFolderOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size, bool recursive,
|
||||
bool do_decode, const std::set<std::string> &exts, const std::map<std::string, int32_t> &map,
|
||||
ImageFolderOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, bool recursive, bool do_decode,
|
||||
const std::set<std::string> &exts, const std::map<std::string, int32_t> &map,
|
||||
std::unique_ptr<DataSchema>, std::shared_ptr<SamplerRT> sampler);
|
||||
|
||||
// Destructor.
|
||||
|
|
|
@ -33,7 +33,6 @@ namespace dataset {
|
|||
ManifestOp::Builder::Builder() : builder_sampler_(nullptr), builder_decode_(false) {
|
||||
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
|
||||
builder_num_workers_ = cfg->num_parallel_workers();
|
||||
builder_rows_per_buffer_ = cfg->rows_per_buffer();
|
||||
builder_op_connector_size_ = cfg->op_connector_size();
|
||||
}
|
||||
|
||||
|
@ -49,9 +48,9 @@ Status ManifestOp::Builder::Build(std::shared_ptr<ManifestOp> *ptr) {
|
|||
builder_schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
|
||||
RETURN_IF_NOT_OK(
|
||||
builder_schema_->AddColumn(ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
|
||||
*ptr = std::make_shared<ManifestOp>(builder_num_workers_, builder_rows_per_buffer_, builder_file_,
|
||||
builder_op_connector_size_, builder_decode_, builder_labels_to_read_,
|
||||
std::move(builder_schema_), std::move(builder_sampler_), builder_usage_);
|
||||
*ptr = std::make_shared<ManifestOp>(builder_num_workers_, builder_file_, builder_op_connector_size_, builder_decode_,
|
||||
builder_labels_to_read_, std::move(builder_schema_), std::move(builder_sampler_),
|
||||
builder_usage_);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -64,10 +63,10 @@ Status ManifestOp::Builder::SanityCheck() {
|
|||
return err_msg.empty() ? Status::OK() : Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg);
|
||||
}
|
||||
|
||||
ManifestOp::ManifestOp(int32_t num_works, int32_t rows_per_buffer, std::string file, int32_t queue_size, bool decode,
|
||||
ManifestOp::ManifestOp(int32_t num_works, std::string file, int32_t queue_size, bool decode,
|
||||
const std::map<std::string, int32_t> &class_index, std::unique_ptr<DataSchema> data_schema,
|
||||
std::shared_ptr<SamplerRT> sampler, std::string usage)
|
||||
: MappableLeafOp(num_works, queue_size, std::move(sampler), rows_per_buffer),
|
||||
: MappableLeafOp(num_works, queue_size, std::move(sampler)),
|
||||
io_block_pushed_(0),
|
||||
sampler_ind_(0),
|
||||
data_schema_(std::move(data_schema)),
|
||||
|
|
|
@ -46,14 +46,6 @@ class ManifestOp : public MappableLeafOp {
|
|||
// Destructor
|
||||
~Builder() = default;
|
||||
|
||||
// Setter method
|
||||
// @param int32_t rows_per_buffer
|
||||
// @return Builder setter method returns reference to the builder.
|
||||
Builder &SetRowsPerBuffer(int32_t rows_per_buffer) {
|
||||
builder_rows_per_buffer_ = rows_per_buffer;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Setter method
|
||||
// @param int32_t size
|
||||
// @return Builder setter method returns reference to the builder.
|
||||
|
@ -134,11 +126,10 @@ class ManifestOp : public MappableLeafOp {
|
|||
|
||||
// Constructor
|
||||
// @param int32_t num_works - Num of workers reading images in parallel
|
||||
// @param int32_t - rows_per_buffer Number of images (rows) in each buffer
|
||||
// @param std::string - file list of Manifest
|
||||
// @param int32_t queue_size - connector queue size
|
||||
// @param td::unique_ptr<Sampler> sampler - sampler tells ImageFolderOp what to read
|
||||
ManifestOp(int32_t num_works, int32_t rows_per_buffer, std::string file, int32_t queue_size, bool decode,
|
||||
ManifestOp(int32_t num_works, std::string file, int32_t queue_size, bool decode,
|
||||
const std::map<std::string, int32_t> &class_index, std::unique_ptr<DataSchema> data_schema,
|
||||
std::shared_ptr<SamplerRT> sampler, std::string usage);
|
||||
// Destructor.
|
||||
|
|
|
@ -24,9 +24,8 @@
|
|||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
MappableLeafOp::MappableLeafOp(int32_t num_wkrs, int32_t queue_size, std::shared_ptr<SamplerRT> sampler,
|
||||
int32_t rows_per_buffer)
|
||||
: ParallelOp(num_wkrs, queue_size, std::move(sampler)), rows_per_buffer_(rows_per_buffer) {}
|
||||
MappableLeafOp::MappableLeafOp(int32_t num_wkrs, int32_t queue_size, std::shared_ptr<SamplerRT> sampler)
|
||||
: ParallelOp(num_wkrs, queue_size, std::move(sampler)) {}
|
||||
|
||||
// Main logic, Register Queue with TaskGroup, launch all threads and do the functor's work
|
||||
Status MappableLeafOp::operator()() {
|
||||
|
|
|
@ -47,16 +47,13 @@ namespace dataset {
|
|||
template <typename T>
|
||||
class Queue;
|
||||
|
||||
using ImageLabelPair = std::shared_ptr<std::pair<std::string, int32_t>>;
|
||||
using FolderImagesPair = std::shared_ptr<std::pair<std::string, std::queue<ImageLabelPair>>>;
|
||||
|
||||
class MappableLeafOp : public ParallelOp, public RandomAccessOp {
|
||||
public:
|
||||
/// Constructor
|
||||
/// \param int32_t num_wkrs - Num of workers reading images in parallel
|
||||
/// \param int32_t queue_size - connector queue size
|
||||
/// \param td::unique_ptr<Sampler> sampler - sampler tells the source what to read
|
||||
MappableLeafOp(int32_t num_wkrs, int32_t queue_size, std::shared_ptr<SamplerRT> sampler, int32_t rows_per_buffer);
|
||||
MappableLeafOp(int32_t num_wkrs, int32_t queue_size, std::shared_ptr<SamplerRT> sampler);
|
||||
|
||||
/// Destructor.
|
||||
~MappableLeafOp() = default;
|
||||
|
@ -94,10 +91,6 @@ class MappableLeafOp : public ParallelOp, public RandomAccessOp {
|
|||
/// Reset function to be called after every epoch to reset the source op after
|
||||
/// \return Status The status code returned
|
||||
Status Reset() override;
|
||||
|
||||
int32_t rows_per_buffer_;
|
||||
int64_t row_cnt_;
|
||||
int64_t buf_cnt_;
|
||||
};
|
||||
} // namespace dataset
|
||||
} // namespace mindspore
|
||||
|
|
|
@ -50,7 +50,6 @@ MindRecordOp::Builder::Builder() : build_dataset_file_({}) {
|
|||
|
||||
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
|
||||
build_num_mind_record_workers_ = kDefaultMindRecordWorkers;
|
||||
build_rows_per_buffer_ = cfg->rows_per_buffer();
|
||||
build_op_connector_queue_size_ = cfg->op_connector_size();
|
||||
builder_num_workers_ = 0;
|
||||
build_load_dataset_ = false;
|
||||
|
@ -114,7 +113,7 @@ MindRecordOp::MindRecordOp(int32_t num_mind_record_workers, std::vector<std::str
|
|||
int32_t op_connector_queue_size, const std::vector<std::string> &columns_to_load,
|
||||
const std::vector<std::shared_ptr<ShardOperator>> &operators, int64_t num_padded,
|
||||
const mindrecord::json &sample_json, const std::map<std::string, std::string> &sample_bytes)
|
||||
: MappableLeafOp(num_mind_record_workers, op_connector_queue_size, std::make_shared<SequentialSamplerRT>(0, 0), 1),
|
||||
: MappableLeafOp(num_mind_record_workers, op_connector_queue_size, std::make_shared<SequentialSamplerRT>(0, 0)),
|
||||
dataset_file_(dataset_file),
|
||||
load_dataset_(load_dataset),
|
||||
columns_to_load_(columns_to_load),
|
||||
|
|
|
@ -35,7 +35,6 @@ const int32_t kMnistImageCols = 28;
|
|||
MnistOp::Builder::Builder() : builder_sampler_(nullptr), builder_usage_("") {
|
||||
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
|
||||
builder_num_workers_ = cfg->num_parallel_workers();
|
||||
builder_rows_per_buffer_ = cfg->rows_per_buffer();
|
||||
builder_op_connector_size_ = cfg->op_connector_size();
|
||||
}
|
||||
|
||||
|
@ -52,8 +51,8 @@ Status MnistOp::Builder::Build(std::shared_ptr<MnistOp> *ptr) {
|
|||
TensorShape scalar = TensorShape::CreateScalar();
|
||||
RETURN_IF_NOT_OK(builder_schema_->AddColumn(
|
||||
ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &scalar)));
|
||||
*ptr = std::make_shared<MnistOp>(builder_usage_, builder_num_workers_, builder_rows_per_buffer_, builder_dir_,
|
||||
builder_op_connector_size_, std::move(builder_schema_), std::move(builder_sampler_));
|
||||
*ptr = std::make_shared<MnistOp>(builder_usage_, builder_num_workers_, builder_dir_, builder_op_connector_size_,
|
||||
std::move(builder_schema_), std::move(builder_sampler_));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -73,9 +72,9 @@ Status MnistOp::Builder::SanityCheck() {
|
|||
return err_msg.empty() ? Status::OK() : Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg);
|
||||
}
|
||||
|
||||
MnistOp::MnistOp(const std::string &usage, int32_t num_workers, int32_t rows_per_buffer, std::string folder_path,
|
||||
int32_t queue_size, std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler)
|
||||
: MappableLeafOp(num_workers, queue_size, std::move(sampler), rows_per_buffer),
|
||||
MnistOp::MnistOp(const std::string &usage, int32_t num_workers, std::string folder_path, int32_t queue_size,
|
||||
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler)
|
||||
: MappableLeafOp(num_workers, queue_size, std::move(sampler)),
|
||||
usage_(usage),
|
||||
folder_path_(folder_path),
|
||||
image_path_({}),
|
||||
|
|
|
@ -52,14 +52,6 @@ class MnistOp : public MappableLeafOp {
|
|||
// Destructor.
|
||||
~Builder() = default;
|
||||
|
||||
// Setter method
|
||||
// @param int32_t rows_per_buffer
|
||||
// @return Builder setter method returns reference to the builder.
|
||||
Builder &SetRowsPerBuffer(int32_t rows_per_buffer) {
|
||||
builder_rows_per_buffer_ = rows_per_buffer;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Setter method
|
||||
// @param int32_t op_connector_size
|
||||
// @return Builder setter method returns reference to the builder.
|
||||
|
@ -121,13 +113,12 @@ class MnistOp : public MappableLeafOp {
|
|||
// Constructor
|
||||
// @param const std::string &usage - Usage of this dataset, can be 'train', 'test' or 'all'
|
||||
// @param int32_t num_workers - number of workers reading images in parallel
|
||||
// @param int32_t rows_per_buffer - number of images (rows) in each buffer
|
||||
// @param std::string folder_path - dir directory of mnist
|
||||
// @param int32_t queue_size - connector queue size
|
||||
// @param std::unique_ptr<DataSchema> data_schema - the schema of the mnist dataset
|
||||
// @param td::unique_ptr<Sampler> sampler - sampler tells MnistOp what to read
|
||||
MnistOp(const std::string &usage, int32_t num_workers, int32_t rows_per_buffer, std::string folder_path,
|
||||
int32_t queue_size, std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler);
|
||||
MnistOp(const std::string &usage, int32_t num_workers, std::string folder_path, int32_t queue_size,
|
||||
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler);
|
||||
|
||||
// Destructor.
|
||||
~MnistOp() = default;
|
||||
|
|
|
@ -36,13 +36,12 @@
|
|||
namespace mindspore {
|
||||
namespace dataset {
|
||||
|
||||
NonMappableLeafOp::NonMappableLeafOp(int32_t num_workers, int32_t worker_connector_size, int64_t rows_per_buffer,
|
||||
int64_t total_num_rows, int32_t op_connector_size, bool shuffle_files,
|
||||
int32_t num_devices, int32_t device_id)
|
||||
NonMappableLeafOp::NonMappableLeafOp(int32_t num_workers, int32_t worker_connector_size, int64_t total_num_rows,
|
||||
int32_t op_connector_size, bool shuffle_files, int32_t num_devices,
|
||||
int32_t device_id)
|
||||
: ParallelOp(num_workers, op_connector_size),
|
||||
device_id_(device_id),
|
||||
num_devices_(num_devices),
|
||||
rows_per_buffer_(rows_per_buffer),
|
||||
filename_index_(std::make_unique<StringIndex>()),
|
||||
load_io_block_queue_(true),
|
||||
load_jagged_connector_(true),
|
||||
|
|
|
@ -49,14 +49,13 @@ class NonMappableLeafOp : public ParallelOp {
|
|||
// @note The builder class should be used to call this constructor.
|
||||
// @param num_workers - number of worker threads reading data from tf_file files.
|
||||
// @param worker_connector_size - size of each internal queue.
|
||||
// @param rows_per_buffer - number of rows that a full buffer will contain.
|
||||
// @param total_num_rows - Number of rows to read
|
||||
// @param dataset_files_list - list of filepaths for the dataset files.
|
||||
// @param op_connector_size - size of each queue in the connector that the child operator pulls from.
|
||||
// @param columns_to_load - the names of the columns to load data from.
|
||||
// @param shuffle_files - whether or not to shuffle the files before reading data.
|
||||
// @param equal_rows_per_shard - whether or not to get equal rows for each process.
|
||||
NonMappableLeafOp(int32_t num_workers, int32_t worker_connector_size, int64_t rows_per_buffer, int64_t total_num_rows,
|
||||
NonMappableLeafOp(int32_t num_workers, int32_t worker_connector_size, int64_t total_num_rows,
|
||||
int32_t op_connector_size, bool shuffle_files, int32_t num_devices, int32_t device_id);
|
||||
|
||||
// Default destructor
|
||||
|
@ -77,9 +76,6 @@ class NonMappableLeafOp : public ParallelOp {
|
|||
// @return Status - the error code returned.
|
||||
Status Reset() override;
|
||||
|
||||
// Getter method
|
||||
int64_t rows_per_buffer() const { return rows_per_buffer_; }
|
||||
|
||||
// Op name getter
|
||||
// @return Name of the current Op
|
||||
std::string Name() const override { return "NonMappableLeafOp"; }
|
||||
|
@ -157,7 +153,6 @@ class NonMappableLeafOp : public ParallelOp {
|
|||
bool finished_reading_dataset_;
|
||||
int64_t total_rows_;
|
||||
|
||||
int64_t rows_per_buffer_;
|
||||
WaitPost io_block_queue_wait_post_;
|
||||
bool load_io_block_queue_;
|
||||
std::mutex load_io_block_queue_mutex_;
|
||||
|
|
|
@ -37,7 +37,6 @@ RandomDataOp::Builder::Builder()
|
|||
// Some arguments to the RandomDataOp have a default argument that is taken from the config.
|
||||
// The user may override these defaults by using the builder set methods.
|
||||
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
|
||||
builder_rows_per_buffer_ = cfg->rows_per_buffer();
|
||||
builder_num_workers_ = cfg->num_parallel_workers();
|
||||
builder_op_connector_size_ = cfg->op_connector_size();
|
||||
}
|
||||
|
|
|
@ -97,16 +97,6 @@ class RandomDataOp : public ParallelOp {
|
|||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builder set method
|
||||
* @param rows_per_buffer - The number of rows in each DataBuffer
|
||||
* @return Builder - The modified builder by reference
|
||||
*/
|
||||
Builder &SetRowsPerBuffer(int64_t rows_per_buffer) {
|
||||
builder_rows_per_buffer_ = rows_per_buffer;
|
||||
return *this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builder set method
|
||||
* @param total_rows - The total number of rows in the dataset
|
||||
|
|
|
@ -36,7 +36,6 @@ TextFileOp::Builder::Builder()
|
|||
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
|
||||
builder_num_workers_ = config_manager->num_parallel_workers();
|
||||
builder_op_connector_size_ = config_manager->op_connector_size();
|
||||
builder_rows_per_buffer_ = config_manager->rows_per_buffer();
|
||||
builder_worker_connector_size_ = config_manager->worker_connector_size();
|
||||
}
|
||||
|
||||
|
@ -65,21 +64,21 @@ Status TextFileOp::Builder::Build(std::shared_ptr<TextFileOp> *op) {
|
|||
RETURN_IF_NOT_OK(
|
||||
builder_schema_->AddColumn(ColDescriptor("text", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
|
||||
|
||||
std::shared_ptr<TextFileOp> text_file_op = std::make_shared<TextFileOp>(
|
||||
builder_num_workers_, builder_rows_per_buffer_, builder_total_rows_, builder_worker_connector_size_,
|
||||
std::move(builder_schema_), builder_text_files_list_, builder_op_connector_size_, builder_shuffle_files_,
|
||||
builder_num_devices_, builder_device_id_);
|
||||
std::shared_ptr<TextFileOp> text_file_op =
|
||||
std::make_shared<TextFileOp>(builder_num_workers_, builder_total_rows_, builder_worker_connector_size_,
|
||||
std::move(builder_schema_), builder_text_files_list_, builder_op_connector_size_,
|
||||
builder_shuffle_files_, builder_num_devices_, builder_device_id_);
|
||||
RETURN_IF_NOT_OK(text_file_op->Init());
|
||||
*op = std::move(text_file_op);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
TextFileOp::TextFileOp(int32_t num_workers, int64_t rows_per_buffer, int64_t total_rows, int32_t worker_connector_size,
|
||||
TextFileOp::TextFileOp(int32_t num_workers, int64_t total_rows, int32_t worker_connector_size,
|
||||
std::unique_ptr<DataSchema> schema, std::vector<std::string> text_files_list,
|
||||
int32_t op_connector_size, bool shuffle_files, int32_t num_devices, int32_t device_id)
|
||||
: NonMappableLeafOp(num_workers, worker_connector_size, rows_per_buffer, total_rows, op_connector_size,
|
||||
shuffle_files, num_devices, device_id),
|
||||
: NonMappableLeafOp(num_workers, worker_connector_size, total_rows, op_connector_size, shuffle_files, num_devices,
|
||||
device_id),
|
||||
text_files_list_(std::move(text_files_list)),
|
||||
data_schema_(std::move(schema)) {}
|
||||
|
||||
|
@ -94,9 +93,8 @@ void TextFileOp::Print(std::ostream &out, bool show_all) const {
|
|||
// Call the super class for displaying any common detailed info
|
||||
ParallelOp::Print(out, show_all);
|
||||
// Then show any custom derived-internal stuff
|
||||
out << "\nRows per buffer: " << rows_per_buffer_ << "\nRow count: " << total_rows_ << "\nDevice id: " << device_id_
|
||||
<< "\nNumber of devices: " << num_devices_ << "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no")
|
||||
<< "\nText files list:\n";
|
||||
out << "\nRow count: " << total_rows_ << "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_
|
||||
<< "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no") << "\nText files list:\n";
|
||||
for (int i = 0; i < text_files_list_.size(); ++i) {
|
||||
out << " " << text_files_list_[i];
|
||||
}
|
||||
|
|
|
@ -129,7 +129,6 @@ class TextFileOp : public NonMappableLeafOp {
|
|||
// Constructor of TextFileOp
|
||||
// @note The builder class should be used to call this constructor.
|
||||
// @param num_workers - number of worker threads reading data from tf_file files.
|
||||
// @param rows_per_buffer - number of rows that a full buffer will contain.
|
||||
// @param total_num_rows - number of rows to read
|
||||
// @param dataset_files_list - list of filepaths for the dataset files.
|
||||
// @param data_schema - the data schema object.
|
||||
|
@ -137,9 +136,9 @@ class TextFileOp : public NonMappableLeafOp {
|
|||
// @param columns_to_load - the names of the columns to load data from.
|
||||
// @param shuffle_files - whether or not to shuffle the files before reading data.
|
||||
// @param equal_rows_per_shard - whether or not to get equal rows for each process.
|
||||
TextFileOp(int32_t num_workers, int64_t rows_per_buffer, int64_t total_rows, int32_t worker_connector_size,
|
||||
std::unique_ptr<DataSchema>, std::vector<std::string> text_files_list, int32_t op_connector_size,
|
||||
bool shuffle_files, int32_t num_devices, int32_t device_id);
|
||||
TextFileOp(int32_t num_workers, int64_t total_rows, int32_t worker_connector_size, std::unique_ptr<DataSchema>,
|
||||
std::vector<std::string> text_files_list, int32_t op_connector_size, bool shuffle_files,
|
||||
int32_t num_devices, int32_t device_id);
|
||||
|
||||
// Default destructor
|
||||
~TextFileOp() = default;
|
||||
|
|
|
@ -47,7 +47,6 @@ TFReaderOp::Builder::Builder()
|
|||
builder_num_workers_ = config_manager->num_parallel_workers();
|
||||
builder_worker_connector_size_ = config_manager->worker_connector_size();
|
||||
builder_op_connector_size_ = config_manager->op_connector_size();
|
||||
builder_rows_per_buffer_ = config_manager->rows_per_buffer();
|
||||
builder_shuffle_files_ = false;
|
||||
builder_data_schema_ = std::make_unique<DataSchema>();
|
||||
}
|
||||
|
@ -114,22 +113,21 @@ Status TFReaderOp::Builder::Build(std::shared_ptr<TFReaderOp> *out_tf_reader_op)
|
|||
}
|
||||
|
||||
std::shared_ptr<TFReaderOp> new_tf_reader_op = std::make_shared<TFReaderOp>(
|
||||
builder_num_workers_, builder_worker_connector_size_, builder_rows_per_buffer_, builder_total_rows_,
|
||||
builder_dataset_files_list_, std::move(builder_data_schema_), builder_op_connector_size_, builder_columns_to_load_,
|
||||
builder_shuffle_files_, builder_num_devices_, builder_device_id_, builder_equal_rows_per_shard_);
|
||||
builder_num_workers_, builder_worker_connector_size_, builder_total_rows_, builder_dataset_files_list_,
|
||||
std::move(builder_data_schema_), builder_op_connector_size_, builder_columns_to_load_, builder_shuffle_files_,
|
||||
builder_num_devices_, builder_device_id_, builder_equal_rows_per_shard_);
|
||||
|
||||
RETURN_IF_NOT_OK(new_tf_reader_op->Init());
|
||||
*out_tf_reader_op = std::move(new_tf_reader_op);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
TFReaderOp::TFReaderOp(int32_t num_workers, int32_t worker_connector_size, int64_t rows_per_buffer,
|
||||
int64_t total_num_rows, std::vector<std::string> dataset_files_list,
|
||||
std::unique_ptr<DataSchema> data_schema, int32_t op_connector_size,
|
||||
std::vector<std::string> columns_to_load, bool shuffle_files, int32_t num_devices,
|
||||
int32_t device_id, bool equal_rows_per_shard)
|
||||
: NonMappableLeafOp(num_workers, worker_connector_size, rows_per_buffer, total_num_rows, op_connector_size,
|
||||
shuffle_files, num_devices, device_id),
|
||||
TFReaderOp::TFReaderOp(int32_t num_workers, int32_t worker_connector_size, int64_t total_num_rows,
|
||||
std::vector<std::string> dataset_files_list, std::unique_ptr<DataSchema> data_schema,
|
||||
int32_t op_connector_size, std::vector<std::string> columns_to_load, bool shuffle_files,
|
||||
int32_t num_devices, int32_t device_id, bool equal_rows_per_shard)
|
||||
: NonMappableLeafOp(num_workers, worker_connector_size, total_num_rows, op_connector_size, shuffle_files,
|
||||
num_devices, device_id),
|
||||
dataset_files_list_(std::move(dataset_files_list)),
|
||||
columns_to_load_(std::move(columns_to_load)),
|
||||
data_schema_(std::move(data_schema)),
|
||||
|
@ -146,8 +144,8 @@ void TFReaderOp::Print(std::ostream &out, bool show_all) const {
|
|||
// Call the super class for displaying any common detailed info
|
||||
ParallelOp::Print(out, show_all);
|
||||
// Then show any custom derived-internal stuff
|
||||
out << "\nRows per buffer: " << rows_per_buffer_ << "\nTotal rows: " << total_rows_ << "\nDevice id: " << device_id_
|
||||
<< "\nNumber of devices: " << num_devices_ << "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no")
|
||||
out << "\nTotal rows: " << total_rows_ << "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_
|
||||
<< "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no")
|
||||
<< "\nDataset files list: Size: " << dataset_files_list_.size() << "\n";
|
||||
for (int i = 0; i < dataset_files_list_.size(); ++i) {
|
||||
out << " " << dataset_files_list_[i];
|
||||
|
|
|
@ -173,7 +173,6 @@ class TFReaderOp : public NonMappableLeafOp {
|
|||
// @note The builder class should be used to call this constructor.
|
||||
// @param num_workers - number of worker threads reading data from tf_file files.
|
||||
// @param worker_connector_size - size of each internal queue.
|
||||
// @param rows_per_buffer - number of rows that a full buffer will contain.
|
||||
// @param total_num_rows - Number of rows to read
|
||||
// @param dataset_files_list - list of filepaths for the dataset files.
|
||||
// @param data_schema - the data schema object.
|
||||
|
@ -181,7 +180,7 @@ class TFReaderOp : public NonMappableLeafOp {
|
|||
// @param columns_to_load - the names of the columns to load data from.
|
||||
// @param shuffle_files - whether or not to shuffle the files before reading data.
|
||||
// @param equal_rows_per_shard - whether or not to get equal rows for each process.
|
||||
TFReaderOp(int32_t num_workers, int32_t worker_connector_size, int64_t rows_per_buffer, int64_t total_num_rows,
|
||||
TFReaderOp(int32_t num_workers, int32_t worker_connector_size, int64_t total_num_rows,
|
||||
std::vector<std::string> dataset_files_list, std::unique_ptr<DataSchema> data_schema,
|
||||
int32_t op_connector_size, std::vector<std::string> columns_to_load, bool shuffle_files,
|
||||
int32_t num_devices, int32_t device_id, bool equal_rows_per_shard);
|
||||
|
|
|
@ -47,7 +47,6 @@ const char kImageSetsExtension[] = ".txt";
|
|||
VOCOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) {
|
||||
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
|
||||
builder_num_workers_ = cfg->num_parallel_workers();
|
||||
builder_rows_per_buffer_ = cfg->rows_per_buffer();
|
||||
builder_op_connector_size_ = cfg->op_connector_size();
|
||||
builder_task_type_ = TaskType::Segmentation;
|
||||
}
|
||||
|
@ -78,8 +77,8 @@ Status VOCOp::Builder::Build(std::shared_ptr<VOCOp> *ptr) {
|
|||
ColDescriptor(std::string(kColumnTruncate), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
|
||||
}
|
||||
*ptr = std::make_shared<VOCOp>(builder_task_type_, builder_usage_, builder_dir_, builder_labels_to_read_,
|
||||
builder_num_workers_, builder_rows_per_buffer_, builder_op_connector_size_,
|
||||
builder_decode_, std::move(builder_schema_), std::move(builder_sampler_));
|
||||
builder_num_workers_, builder_op_connector_size_, builder_decode_,
|
||||
std::move(builder_schema_), std::move(builder_sampler_));
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -96,10 +95,9 @@ Status VOCOp::Builder::SanityCheck() {
|
|||
}
|
||||
|
||||
VOCOp::VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path,
|
||||
const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t rows_per_buffer,
|
||||
int32_t queue_size, bool decode, std::unique_ptr<DataSchema> data_schema,
|
||||
std::shared_ptr<SamplerRT> sampler)
|
||||
: MappableLeafOp(num_workers, queue_size, std::move(sampler), rows_per_buffer),
|
||||
const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t queue_size, bool decode,
|
||||
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler)
|
||||
: MappableLeafOp(num_workers, queue_size, std::move(sampler)),
|
||||
decode_(decode),
|
||||
task_type_(task_type),
|
||||
usage_(task_mode),
|
||||
|
|
|
@ -112,14 +112,6 @@ class VOCOp : public MappableLeafOp {
|
|||
return *this;
|
||||
}
|
||||
|
||||
// Setter method.
|
||||
// @param int32_t rows_per_buffer
|
||||
// @return Builder setter method returns reference to the builder.
|
||||
Builder &SetRowsPerBuffer(int32_t rows_per_buffer) {
|
||||
builder_rows_per_buffer_ = rows_per_buffer;
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Setter method.
|
||||
// @param std::shared_ptr<Sampler> sampler
|
||||
// @return Builder setter method returns reference to the builder.
|
||||
|
@ -164,14 +156,13 @@ class VOCOp : public MappableLeafOp {
|
|||
// @param std::string folder_path - dir directory of VOC
|
||||
// @param std::map<std::string, int32_t> class_index - input class-to-index of annotation
|
||||
// @param int32_t num_workers - number of workers reading images in parallel
|
||||
// @param int32_t rows_per_buffer - number of images (rows) in each buffer
|
||||
// @param int32_t queue_size - connector queue size
|
||||
// @param bool decode - whether to decode images
|
||||
// @param std::unique_ptr<DataSchema> data_schema - the schema of the VOC dataset
|
||||
// @param std::shared_ptr<Sampler> sampler - sampler tells VOCOp what to read
|
||||
VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path,
|
||||
const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t rows_per_buffer,
|
||||
int32_t queue_size, bool decode, std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler);
|
||||
const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t queue_size, bool decode,
|
||||
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler);
|
||||
|
||||
// Destructor
|
||||
~VOCOp() = default;
|
||||
|
@ -255,11 +246,9 @@ class VOCOp : public MappableLeafOp {
|
|||
|
||||
bool decode_;
|
||||
int64_t row_cnt_;
|
||||
int64_t buf_cnt_;
|
||||
std::string folder_path_;
|
||||
TaskType task_type_;
|
||||
std::string usage_;
|
||||
int32_t rows_per_buffer_;
|
||||
std::unique_ptr<DataSchema> data_schema_;
|
||||
|
||||
std::vector<std::string> image_ids_;
|
||||
|
|
|
@ -33,7 +33,6 @@ ZipOp::Builder::Builder() {
|
|||
// using the various builder set methods.
|
||||
|
||||
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
|
||||
builder_rows_per_buffer_ = cfg->rows_per_buffer();
|
||||
builder_op_connector_size_ = cfg->op_connector_size();
|
||||
}
|
||||
|
||||
|
@ -41,18 +40,13 @@ Status ZipOp::Builder::SanityCheck() const { return Status::OK(); }
|
|||
|
||||
Status ZipOp::Builder::Build(std::shared_ptr<ZipOp> *ptr) {
|
||||
RETURN_IF_NOT_OK(SanityCheck());
|
||||
*ptr = std::make_shared<ZipOp>(builder_rows_per_buffer_, builder_op_connector_size_);
|
||||
*ptr = std::make_shared<ZipOp>(builder_op_connector_size_);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// Construct ZipOp here, local variables initialized in operator due to tree construction restrictions
|
||||
ZipOp::ZipOp(int32_t rows_per_buffer, int32_t op_connector_size)
|
||||
: PipelineOp(op_connector_size),
|
||||
children_num_(0),
|
||||
rows_per_buffer_(rows_per_buffer),
|
||||
buffer_id_(0),
|
||||
draining_(false),
|
||||
eof_(false) {}
|
||||
ZipOp::ZipOp(int32_t op_connector_size)
|
||||
: PipelineOp(op_connector_size), children_num_(0), draining_(false), eof_(false) {}
|
||||
|
||||
// destructor
|
||||
ZipOp::~ZipOp() {}
|
||||
|
|
|
@ -76,9 +76,8 @@ class ZipOp : public PipelineOp {
|
|||
};
|
||||
|
||||
// Constructor for ZipOp
|
||||
// @param rows_per_buffer - number of rows in output buffer
|
||||
// @param op_connector_size - connector size
|
||||
ZipOp(int32_t rows_per_buffer, int32_t op_connector_size);
|
||||
explicit ZipOp(int32_t op_connector_size);
|
||||
|
||||
// Destructor
|
||||
~ZipOp();
|
||||
|
@ -136,8 +135,6 @@ class ZipOp : public PipelineOp {
|
|||
Status ComputeColMap() override;
|
||||
|
||||
int32_t children_num_;
|
||||
int32_t rows_per_buffer_;
|
||||
int32_t buffer_id_;
|
||||
bool draining_;
|
||||
bool eof_;
|
||||
std::vector<std::unique_ptr<ChildIterator>> child_iterators_;
|
||||
|
|
|
@ -58,13 +58,13 @@ Status ComputeShuffleSize(int64_t num_files, int64_t num_devices, int64_t num_ro
|
|||
|
||||
// Helper function to inject a shuffle operator over top of current operator being built
|
||||
Status AddShuffleOp(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows,
|
||||
int32_t connector_que_size, int32_t rows_per_buffer, std::shared_ptr<DatasetOp> *shuffle_op) {
|
||||
int32_t connector_que_size, std::shared_ptr<DatasetOp> *shuffle_op) {
|
||||
std::shared_ptr<ShuffleOp> new_shuffle_op = nullptr;
|
||||
int64_t shuffle_size = 0;
|
||||
RETURN_IF_NOT_OK(ComputeShuffleSize(num_files, num_devices, num_rows, total_rows, &shuffle_size));
|
||||
MS_LOG(INFO) << "Dataset::AddShuffleOp - num_rows: " << num_rows << ", shuffle_size: " << shuffle_size;
|
||||
// Add the shuffle op
|
||||
*shuffle_op = std::make_shared<ShuffleOp>(shuffle_size, GetSeed(), connector_que_size, true, rows_per_buffer);
|
||||
*shuffle_op = std::make_shared<ShuffleOp>(shuffle_size, GetSeed(), connector_que_size, true);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
@ -231,7 +231,6 @@ DatasetNode::DatasetNode()
|
|||
// Fetch some default value from config manager
|
||||
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
|
||||
num_workers_ = cfg->num_parallel_workers();
|
||||
rows_per_buffer_ = cfg->rows_per_buffer();
|
||||
connector_que_size_ = cfg->op_connector_size();
|
||||
worker_connector_size_ = cfg->worker_connector_size();
|
||||
}
|
||||
|
|
|
@ -92,7 +92,7 @@ constexpr char kTFRecordNode[] = "TFRecordDataset";
|
|||
constexpr char kVOCNode[] = "VOCDataset";
|
||||
|
||||
Status AddShuffleOp(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows,
|
||||
int32_t connector_que_size, int32_t rows_per_buffer, std::shared_ptr<DatasetOp> *shuffle_op);
|
||||
int32_t connector_que_size, std::shared_ptr<DatasetOp> *shuffle_op);
|
||||
|
||||
// Helper function to validate dataset files parameter
|
||||
Status ValidateDatasetFilesParam(const std::string &dataset_name, const std::vector<std::string> &dataset_files);
|
||||
|
@ -323,7 +323,6 @@ class DatasetNode : public std::enable_shared_from_this<DatasetNode> {
|
|||
std::shared_ptr<DatasetCache> cache_;
|
||||
int64_t dataset_size_;
|
||||
int32_t num_workers_;
|
||||
int32_t rows_per_buffer_;
|
||||
int32_t connector_que_size_;
|
||||
int32_t worker_connector_size_;
|
||||
int32_t total_repeats_; // Number of times required to run this operator
|
||||
|
|
|
@ -44,8 +44,7 @@ void ShuffleNode::Print(std::ostream &out) const {
|
|||
|
||||
// Function to build the ShuffleOp
|
||||
Status ShuffleNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
|
||||
auto op = std::make_shared<ShuffleOp>(shuffle_size_, shuffle_seed_, connector_que_size_, reset_every_epoch_,
|
||||
rows_per_buffer_);
|
||||
auto op = std::make_shared<ShuffleOp>(shuffle_size_, shuffle_seed_, connector_que_size_, reset_every_epoch_);
|
||||
op->set_total_repeats(GetTotalRepeats());
|
||||
op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
|
||||
node_ops->push_back(op);
|
||||
|
|
|
@ -74,8 +74,8 @@ Status AlbumNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops)
|
|||
std::shared_ptr<SamplerRT> sampler_rt = nullptr;
|
||||
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
|
||||
|
||||
auto album_op = std::make_shared<AlbumOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_, decode_,
|
||||
extensions, std::move(schema), std::move(sampler_rt));
|
||||
auto album_op = std::make_shared<AlbumOp>(num_workers_, dataset_dir_, connector_que_size_, decode_, extensions,
|
||||
std::move(schema), std::move(sampler_rt));
|
||||
album_op->set_total_repeats(GetTotalRepeats());
|
||||
album_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
|
||||
node_ops->push_back(album_op);
|
||||
|
|
|
@ -69,8 +69,8 @@ Status CelebANode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops
|
|||
std::shared_ptr<SamplerRT> sampler_rt = nullptr;
|
||||
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
|
||||
|
||||
auto celeba_op = std::make_shared<CelebAOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_,
|
||||
decode_, usage_, extensions_, std::move(schema), std::move(sampler_rt));
|
||||
auto celeba_op = std::make_shared<CelebAOp>(num_workers_, dataset_dir_, connector_que_size_, decode_, usage_,
|
||||
extensions_, std::move(schema), std::move(sampler_rt));
|
||||
celeba_op->set_total_repeats(GetTotalRepeats());
|
||||
celeba_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
|
||||
node_ops->push_back(celeba_op);
|
||||
|
|
|
@ -66,8 +66,7 @@ Status Cifar100Node::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_o
|
|||
std::shared_ptr<SamplerRT> sampler_rt = nullptr;
|
||||
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
|
||||
|
||||
auto cifar_op =
|
||||
std::make_shared<CifarOp>(CifarOp::CifarType::kCifar100, usage_, num_workers_, rows_per_buffer_, dataset_dir_,
|
||||
auto cifar_op = std::make_shared<CifarOp>(CifarOp::CifarType::kCifar100, usage_, num_workers_, dataset_dir_,
|
||||
connector_que_size_, std::move(schema), std::move(sampler_rt));
|
||||
cifar_op->set_total_repeats(GetTotalRepeats());
|
||||
cifar_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
|
||||
|
|
|
@ -64,8 +64,7 @@ Status Cifar10Node::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_op
|
|||
std::shared_ptr<SamplerRT> sampler_rt = nullptr;
|
||||
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
|
||||
|
||||
auto cifar_op =
|
||||
std::make_shared<CifarOp>(CifarOp::CifarType::kCifar10, usage_, num_workers_, rows_per_buffer_, dataset_dir_,
|
||||
auto cifar_op = std::make_shared<CifarOp>(CifarOp::CifarType::kCifar10, usage_, num_workers_, dataset_dir_,
|
||||
connector_que_size_, std::move(schema), std::move(sampler_rt));
|
||||
cifar_op->set_total_repeats(GetTotalRepeats());
|
||||
cifar_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
|
||||
|
|
|
@ -177,8 +177,8 @@ Status CLUENode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops)
|
|||
std::sort(sorted_dataset_files.begin(), sorted_dataset_files.end());
|
||||
|
||||
std::shared_ptr<ClueOp> clue_op =
|
||||
std::make_shared<ClueOp>(num_workers_, rows_per_buffer_, num_samples_, worker_connector_size_, ck_map,
|
||||
sorted_dataset_files, connector_que_size_, shuffle_files, num_shards_, shard_id_);
|
||||
std::make_shared<ClueOp>(num_workers_, num_samples_, worker_connector_size_, ck_map, sorted_dataset_files,
|
||||
connector_que_size_, shuffle_files, num_shards_, shard_id_);
|
||||
|
||||
RETURN_IF_NOT_OK(clue_op->Init());
|
||||
|
||||
|
@ -191,8 +191,8 @@ Status CLUENode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops)
|
|||
RETURN_IF_NOT_OK(ClueOp::CountAllFileRows(sorted_dataset_files, &num_rows));
|
||||
|
||||
// Add the shuffle op after this op
|
||||
RETURN_IF_NOT_OK(AddShuffleOp(sorted_dataset_files.size(), num_shards_, num_rows, 0, connector_que_size_,
|
||||
rows_per_buffer_, &shuffle_op));
|
||||
RETURN_IF_NOT_OK(
|
||||
AddShuffleOp(sorted_dataset_files.size(), num_shards_, num_rows, 0, connector_que_size_, &shuffle_op));
|
||||
shuffle_op->set_total_repeats(GetTotalRepeats());
|
||||
shuffle_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
|
||||
node_ops->push_back(shuffle_op);
|
||||
|
|
|
@ -123,8 +123,8 @@ Status CocoNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops)
|
|||
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
|
||||
|
||||
std::shared_ptr<CocoOp> op =
|
||||
std::make_shared<CocoOp>(task_type, dataset_dir_, annotation_file_, num_workers_, rows_per_buffer_,
|
||||
connector_que_size_, decode_, std::move(schema), std::move(sampler_rt));
|
||||
std::make_shared<CocoOp>(task_type, dataset_dir_, annotation_file_, num_workers_, connector_que_size_, decode_,
|
||||
std::move(schema), std::move(sampler_rt));
|
||||
op->set_total_repeats(GetTotalRepeats());
|
||||
op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
|
||||
node_ops->push_back(op);
|
||||
|
|
|
@ -114,8 +114,8 @@ Status CSVNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
|
|||
}
|
||||
|
||||
std::shared_ptr<CsvOp> csv_op = std::make_shared<CsvOp>(
|
||||
sorted_dataset_files, field_delim_, column_default_list, column_names_, num_workers_, rows_per_buffer_,
|
||||
num_samples_, worker_connector_size_, connector_que_size_, shuffle_files, num_shards_, shard_id_);
|
||||
sorted_dataset_files, field_delim_, column_default_list, column_names_, num_workers_, num_samples_,
|
||||
worker_connector_size_, connector_que_size_, shuffle_files, num_shards_, shard_id_);
|
||||
|
||||
RETURN_IF_NOT_OK(csv_op->Init());
|
||||
|
||||
|
@ -128,8 +128,8 @@ Status CSVNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
|
|||
RETURN_IF_NOT_OK(CsvOp::CountAllFileRows(sorted_dataset_files, column_names_.empty(), &num_rows));
|
||||
|
||||
// Add the shuffle op after this op
|
||||
RETURN_IF_NOT_OK(AddShuffleOp(sorted_dataset_files.size(), num_shards_, num_rows, 0, connector_que_size_,
|
||||
rows_per_buffer_, &shuffle_op));
|
||||
RETURN_IF_NOT_OK(
|
||||
AddShuffleOp(sorted_dataset_files.size(), num_shards_, num_rows, 0, connector_que_size_, &shuffle_op));
|
||||
shuffle_op->set_total_repeats(GetTotalRepeats());
|
||||
shuffle_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
|
||||
node_ops->push_back(shuffle_op);
|
||||
|
|
|
@ -72,9 +72,8 @@ Status ImageFolderNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const nod
|
|||
std::shared_ptr<SamplerRT> sampler_rt = nullptr;
|
||||
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
|
||||
|
||||
auto op =
|
||||
std::make_shared<ImageFolderOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_, recursive_,
|
||||
decode_, exts_, class_indexing_, std::move(schema), std::move(sampler_rt));
|
||||
auto op = std::make_shared<ImageFolderOp>(num_workers_, dataset_dir_, connector_que_size_, recursive_, decode_, exts_,
|
||||
class_indexing_, std::move(schema), std::move(sampler_rt));
|
||||
op->set_total_repeats(GetTotalRepeats());
|
||||
op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
|
||||
node_ops->push_back(op);
|
||||
|
|
|
@ -94,8 +94,8 @@ Status ManifestNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_o
|
|||
std::shared_ptr<SamplerRT> sampler_rt = nullptr;
|
||||
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
|
||||
|
||||
manifest_op = std::make_shared<ManifestOp>(num_workers_, rows_per_buffer_, dataset_file_, connector_que_size_,
|
||||
decode_, class_index_, std::move(schema), std::move(sampler_rt), usage_);
|
||||
manifest_op = std::make_shared<ManifestOp>(num_workers_, dataset_file_, connector_que_size_, decode_, class_index_,
|
||||
std::move(schema), std::move(sampler_rt), usage_);
|
||||
manifest_op->set_total_repeats(GetTotalRepeats());
|
||||
manifest_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
|
||||
node_ops->push_back(manifest_op);
|
||||
|
|
|
@ -60,8 +60,8 @@ Status MnistNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops)
|
|||
std::shared_ptr<SamplerRT> sampler_rt = nullptr;
|
||||
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
|
||||
|
||||
auto op = std::make_shared<MnistOp>(usage_, num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_,
|
||||
std::move(schema), std::move(sampler_rt));
|
||||
auto op = std::make_shared<MnistOp>(usage_, num_workers_, dataset_dir_, connector_que_size_, std::move(schema),
|
||||
std::move(sampler_rt));
|
||||
op->set_total_repeats(GetTotalRepeats());
|
||||
op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
|
||||
node_ops->push_back(op);
|
||||
|
|
|
@ -82,9 +82,9 @@ Status TextFileNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_o
|
|||
RETURN_IF_NOT_OK(schema->AddColumn(ColDescriptor("text", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
|
||||
|
||||
// Create and initialize TextFileOp
|
||||
std::shared_ptr<TextFileOp> text_file_op = std::make_shared<TextFileOp>(
|
||||
num_workers_, rows_per_buffer_, num_samples_, worker_connector_size_, std::move(schema), sorted_dataset_files,
|
||||
connector_que_size_, shuffle_files, num_shards_, shard_id_);
|
||||
std::shared_ptr<TextFileOp> text_file_op =
|
||||
std::make_shared<TextFileOp>(num_workers_, num_samples_, worker_connector_size_, std::move(schema),
|
||||
sorted_dataset_files, connector_que_size_, shuffle_files, num_shards_, shard_id_);
|
||||
RETURN_IF_NOT_OK(text_file_op->Init());
|
||||
|
||||
if (cache_ == nullptr && shuffle_ == ShuffleMode::kGlobal && !IsDescendantOfCache()) {
|
||||
|
@ -96,8 +96,8 @@ Status TextFileNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_o
|
|||
RETURN_IF_NOT_OK(TextFileOp::CountAllFileRows(sorted_dataset_files, &num_rows));
|
||||
|
||||
// Add the shuffle op after this op
|
||||
RETURN_IF_NOT_OK(AddShuffleOp(sorted_dataset_files.size(), num_shards_, num_rows, 0, connector_que_size_,
|
||||
rows_per_buffer_, &shuffle_op));
|
||||
RETURN_IF_NOT_OK(
|
||||
AddShuffleOp(sorted_dataset_files.size(), num_shards_, num_rows, 0, connector_que_size_, &shuffle_op));
|
||||
shuffle_op->set_total_repeats(GetTotalRepeats());
|
||||
shuffle_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
|
||||
node_ops->push_back(shuffle_op);
|
||||
|
|
|
@ -124,8 +124,8 @@ Status TFRecordNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_o
|
|||
|
||||
// Create and initialize TFReaderOp
|
||||
std::shared_ptr<TFReaderOp> tf_reader_op = std::make_shared<TFReaderOp>(
|
||||
num_workers_, worker_connector_size_, rows_per_buffer_, num_samples_, sorted_dir_files, std::move(data_schema),
|
||||
connector_que_size_, columns_list_, shuffle_files, num_shards_, shard_id_, shard_equal_rows_);
|
||||
num_workers_, worker_connector_size_, num_samples_, sorted_dir_files, std::move(data_schema), connector_que_size_,
|
||||
columns_list_, shuffle_files, num_shards_, shard_id_, shard_equal_rows_);
|
||||
|
||||
RETURN_IF_NOT_OK(tf_reader_op->Init());
|
||||
|
||||
|
@ -139,8 +139,7 @@ Status TFRecordNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_o
|
|||
RETURN_IF_NOT_OK(TFReaderOp::CountTotalRows(&num_rows, sorted_dir_files));
|
||||
|
||||
// Add the shuffle op after this op
|
||||
RETURN_IF_NOT_OK(AddShuffleOp(sorted_dir_files.size(), num_shards_, num_rows, 0, connector_que_size_,
|
||||
rows_per_buffer_, &shuffle_op));
|
||||
RETURN_IF_NOT_OK(AddShuffleOp(sorted_dir_files.size(), num_shards_, num_rows, 0, connector_que_size_, &shuffle_op));
|
||||
shuffle_op->set_total_repeats(GetTotalRepeats());
|
||||
shuffle_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
|
||||
node_ops->push_back(shuffle_op);
|
||||
|
|
|
@ -112,8 +112,8 @@ Status VOCNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
|
|||
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
|
||||
|
||||
std::shared_ptr<VOCOp> voc_op;
|
||||
voc_op = std::make_shared<VOCOp>(task_type_, usage_, dataset_dir_, class_index_, num_workers_, rows_per_buffer_,
|
||||
connector_que_size_, decode_, std::move(schema), std::move(sampler_rt));
|
||||
voc_op = std::make_shared<VOCOp>(task_type_, usage_, dataset_dir_, class_index_, num_workers_, connector_que_size_,
|
||||
decode_, std::move(schema), std::move(sampler_rt));
|
||||
voc_op->set_total_repeats(GetTotalRepeats());
|
||||
voc_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
|
||||
node_ops->push_back(voc_op);
|
||||
|
|
|
@ -43,11 +43,9 @@ void SyncWaitNode::Print(std::ostream &out) const {
|
|||
|
||||
// Function to build the BarrierOp
|
||||
Status SyncWaitNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
|
||||
// Right now barrier should only take num_rows_per_buffer = 1
|
||||
// The reason for this is because having it otherwise can lead to blocking issues
|
||||
// See barrier_op.h for more details
|
||||
const int32_t rows_per_buffer = 1;
|
||||
auto op = std::make_shared<BarrierOp>(rows_per_buffer, connector_que_size_, condition_name_, callback_);
|
||||
auto op = std::make_shared<BarrierOp>(connector_que_size_, condition_name_, callback_);
|
||||
op->set_total_repeats(GetTotalRepeats());
|
||||
op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
|
||||
node_ops->push_back(op);
|
||||
|
|
|
@ -58,7 +58,7 @@ Status ZipNode::ValidateParams() {
|
|||
}
|
||||
|
||||
Status ZipNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
|
||||
auto op = std::make_shared<ZipOp>(rows_per_buffer_, connector_que_size_);
|
||||
auto op = std::make_shared<ZipOp>(connector_que_size_);
|
||||
op->set_total_repeats(GetTotalRepeats());
|
||||
op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
|
||||
node_ops->push_back(op);
|
||||
|
|
|
@ -31,7 +31,7 @@ using mindspore::MsLogLevel::ERROR;
|
|||
using mindspore::ExceptionType::NoExceptionType;
|
||||
using mindspore::LogStream;
|
||||
|
||||
std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false, int rows_per_buf = 2);
|
||||
std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false);
|
||||
|
||||
std::shared_ptr<RepeatOp> Repeat(int repeat_cnt);
|
||||
|
||||
|
@ -43,7 +43,7 @@ std::shared_ptr<AlbumOp> Album(int64_t num_works, int64_t rows, int64_t conns, s
|
|||
AlbumOp::Builder builder;
|
||||
Status rc = builder.SetNumWorkers(num_works)
|
||||
.SetAlbumDir(path)
|
||||
.SetRowsPerBuffer(rows)
|
||||
|
||||
.SetOpConnectorSize(conns)
|
||||
.SetExtensions({".json"})
|
||||
.SetSampler(std::move(sampler))
|
||||
|
@ -62,7 +62,7 @@ std::shared_ptr<AlbumOp> AlbumSchema(int64_t num_works, int64_t rows, int64_t co
|
|||
.SetSchemaFile(schema_file)
|
||||
.SetColumnsToLoad(column_names)
|
||||
.SetAlbumDir(path)
|
||||
.SetRowsPerBuffer(rows)
|
||||
|
||||
.SetOpConnectorSize(conns)
|
||||
.SetExtensions({".json"})
|
||||
.SetSampler(std::move(sampler))
|
||||
|
@ -180,8 +180,8 @@ TEST_F(MindDataTestAlbum, TestSequentialAlbumWithFullSchema) {
|
|||
EXPECT_OK(tensor_map["_priority"]->GetItemAt<double>(&priority, {}));
|
||||
EXPECT_OK(tensor_map["id"]->GetItemAt<int64_t>(&id, {}));
|
||||
MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape"
|
||||
<< tensor_map["label"] << "priority: " << priority << " embedding : "
|
||||
<< tensor_map["_embedding"]->shape() << " id: " << id << "\n";
|
||||
<< tensor_map["label"] << "priority: " << priority
|
||||
<< " embedding : " << tensor_map["_embedding"]->shape() << " id: " << id << "\n";
|
||||
i++;
|
||||
di.GetNextAsMap(&tensor_map);
|
||||
}
|
||||
|
|
|
@ -34,7 +34,7 @@ class MindDataTestBatchOp : public UT::DatasetOpTesting {
|
|||
protected:
|
||||
};
|
||||
|
||||
std::shared_ptr<de::BatchOp> Batch(int32_t batch_size = 1, bool drop = false, int rows_per_buf = 2) {
|
||||
std::shared_ptr<de::BatchOp> Batch(int32_t batch_size = 1, bool drop = false) {
|
||||
Status rc;
|
||||
std::shared_ptr<de::BatchOp> op;
|
||||
rc = de::BatchOp::Builder(batch_size).SetDrop(drop).Build(&op);
|
||||
|
@ -50,10 +50,10 @@ std::shared_ptr<de::RepeatOp> Repeat(int repeat_cnt = 1) {
|
|||
return op;
|
||||
}
|
||||
|
||||
std::shared_ptr<de::TFReaderOp> TFReader(std::string schema, int rows_per_buf = 2, int num_works = 8) {
|
||||
std::shared_ptr<de::TFReaderOp> TFReader(std::string schema, int num_works = 8) {
|
||||
std::shared_ptr<de::TFReaderOp> so;
|
||||
de::TFReaderOp::Builder builder;
|
||||
builder.SetDatasetFilesList({schema}).SetRowsPerBuffer(rows_per_buf).SetNumWorkers(num_works);
|
||||
builder.SetDatasetFilesList({schema}).SetNumWorkers(num_works);
|
||||
Status rc = builder.Build(&so);
|
||||
return so;
|
||||
}
|
||||
|
@ -111,7 +111,7 @@ TEST_F(MindDataTestBatchOp, TestRepeatBatchDropTrue) {
|
|||
bool success = false;
|
||||
auto op1 = TFReader(schema_file);
|
||||
auto op2 = Repeat(2);
|
||||
auto op3 = Batch(7, true, 99);
|
||||
auto op3 = Batch(7, true);
|
||||
op1->set_total_repeats(2);
|
||||
op1->set_num_repeats_per_epoch(2);
|
||||
auto tree = Build({op1, op2, op3});
|
||||
|
@ -161,7 +161,7 @@ TEST_F(MindDataTestBatchOp, TestRepeatBatchDropFalse) {
|
|||
bool success = false;
|
||||
auto op1 = TFReader(schema_file);
|
||||
auto op2 = Repeat(2);
|
||||
auto op3 = Batch(7, false, 99);
|
||||
auto op3 = Batch(7, false);
|
||||
op1->set_total_repeats(2);
|
||||
op1->set_num_repeats_per_epoch(2);
|
||||
auto tree = Build({op1, op2, op3});
|
||||
|
@ -217,7 +217,7 @@ TEST_F(MindDataTestBatchOp, TestBatchDropFalseRepeat) {
|
|||
std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data";
|
||||
bool success = false;
|
||||
auto op1 = TFReader(schema_file);
|
||||
auto op2 = Batch(7, false, 99);
|
||||
auto op2 = Batch(7, false);
|
||||
auto op3 = Repeat(2);
|
||||
op1->set_total_repeats(2);
|
||||
op1->set_num_repeats_per_epoch(2);
|
||||
|
@ -270,7 +270,7 @@ TEST_F(MindDataTestBatchOp, TestBatchDropTrueRepeat) {
|
|||
std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data";
|
||||
bool success = false;
|
||||
auto op1 = TFReader(schema_file);
|
||||
auto op2 = Batch(5, true, 99);
|
||||
auto op2 = Batch(5, true);
|
||||
auto op3 = Repeat(2);
|
||||
op1->set_total_repeats(2);
|
||||
op1->set_num_repeats_per_epoch(2);
|
||||
|
|
|
@ -253,7 +253,7 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestRandomDataCache1) {
|
|||
// RandomDataOp
|
||||
std::shared_ptr<RandomDataOp> myRandomDataOp;
|
||||
rc = RandomDataOp::Builder()
|
||||
.SetRowsPerBuffer(4)
|
||||
|
||||
.SetNumWorkers(4)
|
||||
.SetDataSchema(std::move(testSchema))
|
||||
.SetTotalRows(50) // 50 samples for now
|
||||
|
@ -277,7 +277,7 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestRandomDataCache1) {
|
|||
rc = CacheOp::Builder()
|
||||
.SetNumWorkers(5)
|
||||
.SetClient(myClient)
|
||||
.SetRowsPerBuffer(1)
|
||||
|
||||
.SetSampler(std::move(seq_sampler))
|
||||
.Build(&myCacheOp);
|
||||
ASSERT_TRUE(rc.IsOk());
|
||||
|
@ -379,7 +379,7 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestRandomDataCacheSpill) {
|
|||
// RandomDataOp
|
||||
std::shared_ptr<RandomDataOp> myRandomDataOp;
|
||||
rc = RandomDataOp::Builder()
|
||||
.SetRowsPerBuffer(2)
|
||||
|
||||
.SetNumWorkers(4)
|
||||
.SetDataSchema(std::move(testSchema))
|
||||
.SetTotalRows(10)
|
||||
|
@ -401,7 +401,6 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestRandomDataCacheSpill) {
|
|||
rc = CacheOp::Builder()
|
||||
.SetNumWorkers(4)
|
||||
.SetClient(myClient)
|
||||
.SetRowsPerBuffer(3)
|
||||
.SetSampler(std::move(seq_sampler))
|
||||
.Build(&myCacheOp);
|
||||
ASSERT_TRUE(rc.IsOk());
|
||||
|
@ -484,7 +483,7 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestImageFolderCacheMerge) {
|
|||
ImageFolderOp::Builder builder;
|
||||
builder.SetOpConnectorSize(3)
|
||||
.SetNumWorkers(3)
|
||||
.SetRowsPerBuffer(2)
|
||||
|
||||
.SetExtensions({".jpg", ".JPEG"})
|
||||
.SetRecursive(true)
|
||||
.SetImageFolderDir(datasets_root_path_ + "/testPK/data");
|
||||
|
|
|
@ -26,26 +26,26 @@
|
|||
#include "securec.h"
|
||||
|
||||
using namespace mindspore::dataset;
|
||||
using mindspore::MsLogLevel::ERROR;
|
||||
using mindspore::ExceptionType::NoExceptionType;
|
||||
using mindspore::LogStream;
|
||||
using mindspore::ExceptionType::NoExceptionType;
|
||||
using mindspore::MsLogLevel::ERROR;
|
||||
|
||||
std::shared_ptr<RepeatOp> Repeat(int repeat_cnt);
|
||||
|
||||
std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops);
|
||||
|
||||
std::shared_ptr<CelebAOp> Celeba(int32_t num_workers, int32_t rows_per_buffer, int32_t queue_size,
|
||||
const std::string &dir, std::shared_ptr<SamplerRT> sampler = nullptr,
|
||||
bool decode = false, const std::string &dataset_type = "all") {
|
||||
std::shared_ptr<CelebAOp> Celeba(int32_t num_workers, int32_t queue_size, const std::string &dir,
|
||||
std::shared_ptr<SamplerRT> sampler = nullptr, bool decode = false,
|
||||
const std::string &dataset_type = "all") {
|
||||
std::shared_ptr<CelebAOp> so;
|
||||
CelebAOp::Builder builder;
|
||||
Status rc = builder.SetNumWorkers(num_workers)
|
||||
.SetCelebADir(dir)
|
||||
.SetRowsPerBuffer(rows_per_buffer)
|
||||
.SetOpConnectorSize(queue_size)
|
||||
.SetSampler(std::move(sampler))
|
||||
.SetDecode(decode)
|
||||
.SetUsage(dataset_type).Build(&so);
|
||||
.SetUsage(dataset_type)
|
||||
.Build(&so);
|
||||
return so;
|
||||
}
|
||||
|
||||
|
@ -55,12 +55,16 @@ protected:
|
|||
|
||||
TEST_F(MindDataTestCelebaDataset, TestSequentialCeleba) {
|
||||
std::string dir = datasets_root_path_ + "/testCelebAData/";
|
||||
uint32_t expect_labels[4][40] = {{0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1},
|
||||
{0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1},
|
||||
{0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1},
|
||||
{0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1}};
|
||||
uint32_t expect_labels[4][40] = {{0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
|
||||
0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1},
|
||||
{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
|
||||
0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1},
|
||||
{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
|
||||
0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1},
|
||||
{0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
|
||||
0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1}};
|
||||
uint32_t count = 0;
|
||||
auto tree = Build({Celeba(16, 2, 32, dir)});
|
||||
auto tree = Build({Celeba(16, 2, dir)});
|
||||
tree->Prepare();
|
||||
Status rc = tree->Launch();
|
||||
if (rc.IsError()) {
|
||||
|
@ -86,16 +90,24 @@ TEST_F(MindDataTestCelebaDataset, TestSequentialCeleba) {
|
|||
|
||||
TEST_F(MindDataTestCelebaDataset, TestCelebaRepeat) {
|
||||
std::string dir = datasets_root_path_ + "/testCelebAData/";
|
||||
uint32_t expect_labels[8][40] = {{0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1},
|
||||
{0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1},
|
||||
{0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1},
|
||||
{0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1},
|
||||
{0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1},
|
||||
{0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1},
|
||||
{0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1},
|
||||
{0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1}};
|
||||
uint32_t expect_labels[8][40] = {{0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
|
||||
0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1},
|
||||
{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
|
||||
0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1},
|
||||
{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
|
||||
0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1},
|
||||
{0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
|
||||
0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1},
|
||||
{0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
|
||||
0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1},
|
||||
{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
|
||||
0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1},
|
||||
{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
|
||||
0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1},
|
||||
{0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
|
||||
0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1}};
|
||||
uint32_t count = 0;
|
||||
auto op1 = Celeba(16, 2, 32, dir);
|
||||
auto op1 = Celeba(16, 2, dir);
|
||||
auto op2 = Repeat(2);
|
||||
auto tree = Build({op1, op2});
|
||||
op1->set_total_repeats(2);
|
||||
|
@ -131,7 +143,7 @@ TEST_F(MindDataTestCelebaDataset, TestSubsetRandomSamplerCeleba) {
|
|||
0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1}};
|
||||
std::string dir = datasets_root_path_ + "/testCelebAData/";
|
||||
uint32_t count = 0;
|
||||
auto tree = Build({Celeba(16, 2, 32, dir, std::move(sampler))});
|
||||
auto tree = Build({Celeba(16, 2, dir, std::move(sampler))});
|
||||
tree->Prepare();
|
||||
Status rc = tree->Launch();
|
||||
if (rc.IsError()) {
|
||||
|
|
|
@ -47,7 +47,7 @@ std::shared_ptr<CifarOp> Cifarop(uint64_t num_works, uint64_t rows, uint64_t con
|
|||
CifarOp::Builder builder;
|
||||
Status rc = builder.SetNumWorkers(num_works)
|
||||
.SetCifarDir(path)
|
||||
.SetRowsPerBuffer(rows)
|
||||
|
||||
.SetOpConnectorSize(conns)
|
||||
.SetSampler(std::move(sampler))
|
||||
.SetCifarType(cifar10)
|
||||
|
|
|
@ -45,20 +45,17 @@ TEST_F(MindDataTestClientConfig, TestClientConfig1) {
|
|||
std::shared_ptr<ConfigManager> my_conf = GlobalContext::config_manager();
|
||||
|
||||
ASSERT_EQ(my_conf->num_parallel_workers(), kCfgParallelWorkers);
|
||||
ASSERT_EQ(my_conf->rows_per_buffer(), kCfgRowsPerBuffer);
|
||||
ASSERT_EQ(my_conf->worker_connector_size(), kCfgWorkerConnectorSize);
|
||||
ASSERT_EQ(my_conf->op_connector_size(), kCfgOpConnectorSize);
|
||||
ASSERT_EQ(my_conf->seed(), kCfgDefaultSeed);
|
||||
|
||||
my_conf->set_num_parallel_workers(2);
|
||||
my_conf->set_rows_per_buffer(1);
|
||||
my_conf->set_worker_connector_size(3);
|
||||
my_conf->set_op_connector_size(4);
|
||||
my_conf->set_seed(5);
|
||||
|
||||
|
||||
ASSERT_EQ(my_conf->num_parallel_workers(), 2);
|
||||
ASSERT_EQ(my_conf->rows_per_buffer(), 1);
|
||||
ASSERT_EQ(my_conf->worker_connector_size(), 3);
|
||||
ASSERT_EQ(my_conf->op_connector_size(), 4);
|
||||
ASSERT_EQ(my_conf->seed(), 5);
|
||||
|
@ -67,7 +64,6 @@ TEST_F(MindDataTestClientConfig, TestClientConfig1) {
|
|||
ASSERT_TRUE(my_conf->LoadFile(file));
|
||||
|
||||
ASSERT_EQ(my_conf->num_parallel_workers(), kCfgParallelWorkers);
|
||||
ASSERT_EQ(my_conf->rows_per_buffer(), kCfgRowsPerBuffer);
|
||||
ASSERT_EQ(my_conf->worker_connector_size(), kCfgWorkerConnectorSize);
|
||||
ASSERT_EQ(my_conf->op_connector_size(), kCfgOpConnectorSize);
|
||||
ASSERT_EQ(my_conf->seed(), kCfgDefaultSeed);
|
||||
|
|
|
@ -50,7 +50,7 @@ TEST_F(MindDataTestCLUEOp, TestCLUEBasic) {
|
|||
std::shared_ptr<ClueOp> op;
|
||||
ClueOp::Builder builder;
|
||||
builder.SetClueFilesList({dataset_path})
|
||||
.SetRowsPerBuffer(16)
|
||||
|
||||
.SetOpConnectorSize(2)
|
||||
.SetColsKeyMap(key_map);
|
||||
|
||||
|
|
|
@ -43,7 +43,7 @@ using mindspore::MsLogLevel::ERROR;
|
|||
using mindspore::ExceptionType::NoExceptionType;
|
||||
using mindspore::LogStream;
|
||||
|
||||
std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false, int rows_per_buf = 2);
|
||||
std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false);
|
||||
|
||||
std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops);
|
||||
|
||||
|
|
|
@ -51,7 +51,7 @@ TEST_F(MindDataTestConcatOp, TestConcatProject) {
|
|||
// TFReaderOp1
|
||||
std::shared_ptr<TFReaderOp> my_tfreader_op1;
|
||||
TFReaderOp::Builder builder1;
|
||||
builder1.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16).SetWorkerConnectorSize(16);
|
||||
builder1.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(16);
|
||||
std::unique_ptr<DataSchema> schema1 = std::make_unique<DataSchema>();
|
||||
schema1->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema1Row.json", {});
|
||||
builder1.SetDataSchema(std::move(schema1));
|
||||
|
@ -63,7 +63,7 @@ TEST_F(MindDataTestConcatOp, TestConcatProject) {
|
|||
// TFReaderOp2
|
||||
std::shared_ptr<TFReaderOp> my_tfreader_op2;
|
||||
TFReaderOp::Builder builder2;
|
||||
builder2.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16).SetWorkerConnectorSize(16);
|
||||
builder2.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(16);
|
||||
std::unique_ptr<DataSchema> schema2 = std::make_unique<DataSchema>();
|
||||
schema2->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema1Row.json", {});
|
||||
builder2.SetDataSchema(std::move(schema2));
|
||||
|
|
|
@ -52,7 +52,7 @@ TEST_F(MindDataTestCSVOp, TestCSVBasic) {
|
|||
std::shared_ptr<CsvOp> op;
|
||||
CsvOp::Builder builder;
|
||||
builder.SetCsvFilesList({dataset_path})
|
||||
.SetRowsPerBuffer(16)
|
||||
|
||||
.SetShuffleFiles(false)
|
||||
.SetOpConnectorSize(2)
|
||||
.SetFieldDelim(',')
|
||||
|
|
|
@ -44,19 +44,15 @@ TEST_F(MindDataTestExecutionTree, TestExecutionTree1) {
|
|||
uint32_t shuffle_size = 32;
|
||||
uint32_t connector_size = 8;
|
||||
|
||||
|
||||
std::shared_ptr<ShuffleOp> leaf_op1 =
|
||||
std::make_shared<ShuffleOp>(shuffle_size, 0, connector_size, false, 32);
|
||||
std::shared_ptr<ShuffleOp> leaf_op1 = std::make_shared<ShuffleOp>(shuffle_size, 0, connector_size, false);
|
||||
ASSERT_NE(leaf_op1, nullptr);
|
||||
my_tree->AssociateNode(leaf_op1);
|
||||
shuffle_size = 16;
|
||||
std::shared_ptr<ShuffleOp> leaf_op2 =
|
||||
std::make_shared<ShuffleOp>(shuffle_size, 0, connector_size, false, 32);
|
||||
std::shared_ptr<ShuffleOp> leaf_op2 = std::make_shared<ShuffleOp>(shuffle_size, 0, connector_size, false);
|
||||
ASSERT_NE(leaf_op2, nullptr);
|
||||
my_tree->AssociateNode(leaf_op2);
|
||||
shuffle_size = 8;
|
||||
std::shared_ptr<ShuffleOp> parent_op =
|
||||
std::make_shared<ShuffleOp>(shuffle_size, 0, connector_size, false, 32);
|
||||
std::shared_ptr<ShuffleOp> parent_op = std::make_shared<ShuffleOp>(shuffle_size, 0, connector_size, false);
|
||||
ASSERT_NE(parent_op, nullptr);
|
||||
my_tree->AssociateNode(parent_op);
|
||||
|
||||
|
@ -68,8 +64,7 @@ TEST_F(MindDataTestExecutionTree, TestExecutionTree1) {
|
|||
parent_op->AddChild(std::move(leaf_op1));
|
||||
parent_op->AddChild(std::move(leaf_op2));
|
||||
shuffle_size = 4;
|
||||
std::shared_ptr<DatasetOp> root_op =
|
||||
std::make_shared<ShuffleOp>(shuffle_size, 0, connector_size, false, 32);
|
||||
std::shared_ptr<DatasetOp> root_op = std::make_shared<ShuffleOp>(shuffle_size, 0, connector_size, false);
|
||||
my_tree->AssignRoot(root_op);
|
||||
root_op->AddChild(parent_op);
|
||||
ASSERT_NE(root_op, nullptr);
|
||||
|
@ -106,7 +101,7 @@ TEST_F(MindDataTestExecutionTree, TestExecutionTree2) {
|
|||
std::shared_ptr<TFReaderOp> my_tfreader_op;
|
||||
TFReaderOp::Builder()
|
||||
.SetDatasetFilesList({dataset_path})
|
||||
.SetRowsPerBuffer(2)
|
||||
|
||||
.SetWorkerConnectorSize(2)
|
||||
.SetNumWorkers(2)
|
||||
.Build(&my_tfreader_op);
|
||||
|
|
|
@ -40,7 +40,7 @@ using mindspore::LogStream;
|
|||
using mindspore::ExceptionType::NoExceptionType;
|
||||
using mindspore::MsLogLevel::ERROR;
|
||||
|
||||
std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false, int rows_per_buf = 2);
|
||||
std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false);
|
||||
|
||||
std::shared_ptr<RepeatOp> Repeat(int repeat_cnt);
|
||||
|
||||
|
@ -53,7 +53,7 @@ std::shared_ptr<ImageFolderOp> ImageFolder(int64_t num_works, int64_t rows, int6
|
|||
ImageFolderOp::Builder builder;
|
||||
Status rc = builder.SetNumWorkers(num_works)
|
||||
.SetImageFolderDir(path)
|
||||
.SetRowsPerBuffer(rows)
|
||||
|
||||
.SetOpConnectorSize(conns)
|
||||
.SetExtensions({".jpg", ".JPEG"})
|
||||
.SetSampler(std::move(sampler))
|
||||
|
|
|
@ -156,7 +156,7 @@ TEST_F(MindDataTestCallback, TestBasicCallback) {
|
|||
ColDescriptor col("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &shape);
|
||||
ASSERT_OK(schema->AddColumn(col));
|
||||
std::shared_ptr<RandomDataOp> leaf;
|
||||
rc = RandomDataOp::Builder().SetRowsPerBuffer(1).SetDataSchema(std::move(schema)).SetTotalRows(44).Build(&leaf);
|
||||
rc = RandomDataOp::Builder().SetDataSchema(std::move(schema)).SetTotalRows(44).Build(&leaf);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
// config mapOp
|
||||
std::shared_ptr<MapOp> map_op;
|
||||
|
@ -208,7 +208,7 @@ TEST_F(MindDataTestCallback, TestMultiEpochCallback) {
|
|||
ColDescriptor col("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &shape);
|
||||
ASSERT_OK(schema->AddColumn(col));
|
||||
std::shared_ptr<RandomDataOp> leaf;
|
||||
rc = RandomDataOp::Builder().SetRowsPerBuffer(1).SetDataSchema(std::move(schema)).SetTotalRows(4).SetNumWorkers(4).Build(&leaf);
|
||||
rc = RandomDataOp::Builder().SetDataSchema(std::move(schema)).SetTotalRows(4).SetNumWorkers(4).Build(&leaf);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
// config mapOp
|
||||
std::shared_ptr<MapOp> map_op;
|
||||
|
@ -273,7 +273,7 @@ TEST_F(MindDataTestCallback, TestSelectedCallback) {
|
|||
ColDescriptor col("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &shape);
|
||||
ASSERT_OK(schema->AddColumn(col));
|
||||
std::shared_ptr<RandomDataOp> leaf;
|
||||
rc = RandomDataOp::Builder().SetRowsPerBuffer(1).SetDataSchema(std::move(schema)).SetTotalRows(4).SetNumWorkers(4).Build(&leaf);
|
||||
rc = RandomDataOp::Builder().SetDataSchema(std::move(schema)).SetTotalRows(4).SetNumWorkers(4).Build(&leaf);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
// config mapOp
|
||||
std::shared_ptr<MapOp> map_op;
|
||||
|
|
|
@ -46,9 +46,14 @@ std::shared_ptr<ManifestOp> Manifest(int32_t num_works, int32_t rows, int32_t co
|
|||
std::map<std::string, int32_t> map = {}, bool decode = false) {
|
||||
std::shared_ptr<ManifestOp> so;
|
||||
ManifestOp::Builder builder;
|
||||
Status rc = builder.SetNumWorkers(num_works).SetManifestFile(file).SetRowsPerBuffer(
|
||||
rows).SetOpConnectorSize(conns).SetSampler(std::move(sampler)).SetClassIndex(map).SetDecode(decode)
|
||||
.SetUsage(usage).Build(&so);
|
||||
Status rc = builder.SetNumWorkers(num_works)
|
||||
.SetManifestFile(file)
|
||||
.SetOpConnectorSize(conns)
|
||||
.SetSampler(std::move(sampler))
|
||||
.SetClassIndex(map)
|
||||
.SetDecode(decode)
|
||||
.SetUsage(usage)
|
||||
.Build(&so);
|
||||
return so;
|
||||
}
|
||||
|
||||
|
|
|
@ -110,7 +110,7 @@ class MindDataTestMapOp : public UT::DatasetOpTesting {
|
|||
TFReaderOp::Builder builder;
|
||||
builder.SetDatasetFilesList({dataset_path_})
|
||||
.SetColumnsToLoad({"image", "label", "A", "B"})
|
||||
.SetRowsPerBuffer(2)
|
||||
|
||||
.SetWorkerConnectorSize(2)
|
||||
.SetNumWorkers(2);
|
||||
|
||||
|
@ -516,7 +516,7 @@ TEST_F(MindDataTestMapOp, TFReader_Decode_Repeat_Resize) {
|
|||
TFReaderOp::Builder sobuilder;
|
||||
sobuilder.SetDatasetFilesList({dataset_path_})
|
||||
.SetColumnsToLoad({"image", "label"})
|
||||
.SetRowsPerBuffer(2)
|
||||
|
||||
.SetWorkerConnectorSize(2)
|
||||
.SetNumWorkers(2);
|
||||
rc = sobuilder.Build(&my_tfreader_op);
|
||||
|
|
|
@ -29,12 +29,11 @@
|
|||
namespace common = mindspore::common;
|
||||
|
||||
using namespace mindspore::dataset;
|
||||
using mindspore::MsLogLevel::INFO;
|
||||
using mindspore::ExceptionType::NoExceptionType;
|
||||
using mindspore::LogStream;
|
||||
using mindspore::ExceptionType::NoExceptionType;
|
||||
using mindspore::MsLogLevel::INFO;
|
||||
|
||||
class MindDataTestMindRecordOp : public UT::DatasetOpTesting {
|
||||
};
|
||||
class MindDataTestMindRecordOp : public UT::DatasetOpTesting {};
|
||||
|
||||
TEST_F(MindDataTestMindRecordOp, TestMindRecordBasic) {
|
||||
// single MindRecord op and nothing else
|
||||
|
@ -64,7 +63,6 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordBasic) {
|
|||
MindRecordOp::Builder builder;
|
||||
builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"})
|
||||
.SetLoadDataset(true)
|
||||
.SetRowsPerBuffer(3)
|
||||
.SetNumMindRecordWorkers(4)
|
||||
.SetColumnsToLoad(column_list);
|
||||
rc = builder.Build(&my_mindrecord_op);
|
||||
|
@ -135,7 +133,6 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordSample) {
|
|||
MindRecordOp::Builder builder;
|
||||
builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"})
|
||||
.SetLoadDataset(true)
|
||||
.SetRowsPerBuffer(3)
|
||||
.SetNumMindRecordWorkers(4)
|
||||
.SetColumnsToLoad(column_list)
|
||||
.SetOperators(operators);
|
||||
|
@ -207,7 +204,6 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordShuffle) {
|
|||
MindRecordOp::Builder builder;
|
||||
builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"})
|
||||
.SetLoadDataset(true)
|
||||
.SetRowsPerBuffer(3)
|
||||
.SetNumMindRecordWorkers(4)
|
||||
.SetColumnsToLoad(column_list)
|
||||
.SetOperators(operators);
|
||||
|
@ -282,7 +278,6 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordCategory) {
|
|||
MindRecordOp::Builder builder;
|
||||
builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"})
|
||||
.SetLoadDataset(true)
|
||||
.SetRowsPerBuffer(3)
|
||||
.SetNumMindRecordWorkers(4)
|
||||
.SetColumnsToLoad(column_list)
|
||||
.SetOperators(operators);
|
||||
|
@ -351,7 +346,6 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordRepeat) {
|
|||
MindRecordOp::Builder builder;
|
||||
builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"})
|
||||
.SetLoadDataset(true)
|
||||
.SetRowsPerBuffer(3)
|
||||
.SetNumMindRecordWorkers(4)
|
||||
.SetColumnsToLoad(column_list);
|
||||
rc = builder.Build(&my_mindrecord_op);
|
||||
|
@ -364,8 +358,7 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordRepeat) {
|
|||
|
||||
uint32_t num_repeats = 2;
|
||||
std::shared_ptr<RepeatOp> my_repeat_op;
|
||||
rc = RepeatOp::Builder(num_repeats)
|
||||
.Build(&my_repeat_op);
|
||||
rc = RepeatOp::Builder(num_repeats).Build(&my_repeat_op);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
rc = my_tree->AssociateNode(my_repeat_op);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
|
@ -375,7 +368,6 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordRepeat) {
|
|||
rc = my_repeat_op->AddChild(my_mindrecord_op);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
|
||||
|
||||
// Set children/root layout.
|
||||
rc = my_tree->AssignRoot(my_repeat_op);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
|
@ -407,7 +399,6 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordRepeat) {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
TEST_F(MindDataTestMindRecordOp, TestMindRecordBlockReaderRepeat) {
|
||||
// single MindRecord op and nothing else
|
||||
//
|
||||
|
@ -436,7 +427,6 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordBlockReaderRepeat) {
|
|||
MindRecordOp::Builder builder;
|
||||
builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"})
|
||||
.SetLoadDataset(true)
|
||||
.SetRowsPerBuffer(3)
|
||||
.SetNumMindRecordWorkers(4)
|
||||
.SetColumnsToLoad(column_list);
|
||||
rc = builder.Build(&my_mindrecord_op);
|
||||
|
@ -449,8 +439,7 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordBlockReaderRepeat) {
|
|||
|
||||
uint32_t num_repeats = 2;
|
||||
std::shared_ptr<RepeatOp> my_repeat_op;
|
||||
rc = RepeatOp::Builder(num_repeats)
|
||||
.Build(&my_repeat_op);
|
||||
rc = RepeatOp::Builder(num_repeats).Build(&my_repeat_op);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
rc = my_tree->AssociateNode(my_repeat_op);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
|
@ -519,7 +508,6 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordInvalidColumnList) {
|
|||
MindRecordOp::Builder builder;
|
||||
builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"})
|
||||
.SetLoadDataset(true)
|
||||
.SetRowsPerBuffer(3)
|
||||
.SetNumMindRecordWorkers(4)
|
||||
.SetColumnsToLoad(column_list);
|
||||
rc = builder.Build(&my_mindrecord_op);
|
||||
|
|
|
@ -42,7 +42,7 @@ using mindspore::MsLogLevel::ERROR;
|
|||
using mindspore::ExceptionType::NoExceptionType;
|
||||
using mindspore::LogStream;
|
||||
|
||||
std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false, int rows_per_buf = 2);
|
||||
std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false);
|
||||
|
||||
std::shared_ptr<RepeatOp> Repeat(int repeat_cnt);
|
||||
|
||||
|
@ -57,7 +57,7 @@ std::shared_ptr<MnistOp> CreateMnist(int64_t num_wrks, int64_t rows, int64_t con
|
|||
MnistOp::Builder builder;
|
||||
Status rc = builder.SetNumWorkers(num_wrks)
|
||||
.SetDir(path)
|
||||
.SetRowsPerBuffer(rows)
|
||||
|
||||
.SetOpConnectorSize(conns)
|
||||
.SetSampler(std::move(sampler))
|
||||
.Build(&so);
|
||||
|
|
|
@ -41,7 +41,7 @@ TEST_F(MindDataTestProjectOp, TestProjectProject) {
|
|||
|
||||
std::shared_ptr<TFReaderOp> my_tfreader_op;
|
||||
TFReaderOp::Builder builder;
|
||||
builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16).SetWorkerConnectorSize(16);
|
||||
builder.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(16);
|
||||
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
|
||||
schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
|
||||
builder.SetDataSchema(std::move(schema));
|
||||
|
|
|
@ -76,11 +76,7 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic1) {
|
|||
std::shared_ptr<RandomDataOp> myRandomDataOp;
|
||||
RandomDataOp::Builder builder;
|
||||
|
||||
rc = builder.SetRowsPerBuffer(2)
|
||||
.SetNumWorkers(1)
|
||||
.SetDataSchema(std::move(testSchema))
|
||||
.SetTotalRows(25)
|
||||
.Build(&myRandomDataOp);
|
||||
rc = builder.SetNumWorkers(1).SetDataSchema(std::move(testSchema)).SetTotalRows(25).Build(&myRandomDataOp);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
|
||||
rc = myTree->AssociateNode(myRandomDataOp);
|
||||
|
@ -134,9 +130,7 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic2) {
|
|||
std::shared_ptr<RandomDataOp> myRandomDataOp;
|
||||
RandomDataOp::Builder builder;
|
||||
|
||||
rc = builder.SetRowsPerBuffer(2)
|
||||
.SetNumWorkers(1)
|
||||
.Build(&myRandomDataOp);
|
||||
rc = builder.SetNumWorkers(1).Build(&myRandomDataOp);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
|
||||
rc = myTree->AssociateNode(myRandomDataOp);
|
||||
|
@ -171,11 +165,7 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic3) {
|
|||
std::shared_ptr<RandomDataOp> myRandomDataOp;
|
||||
RandomDataOp::Builder builder;
|
||||
|
||||
rc = builder.SetRowsPerBuffer(2)
|
||||
.SetNumWorkers(1)
|
||||
.SetDataSchema(std::move(testSchema))
|
||||
.SetTotalRows(10)
|
||||
.Build(&myRandomDataOp);
|
||||
rc = builder.SetNumWorkers(1).SetDataSchema(std::move(testSchema)).SetTotalRows(10).Build(&myRandomDataOp);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
|
||||
rc = myTree->AssociateNode(myRandomDataOp);
|
||||
|
@ -235,11 +225,7 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic4) {
|
|||
std::shared_ptr<RandomDataOp> myRandomDataOp;
|
||||
RandomDataOp::Builder builder;
|
||||
|
||||
rc = builder.SetRowsPerBuffer(2)
|
||||
.SetNumWorkers(1)
|
||||
.SetDataSchema(std::move(testSchema))
|
||||
.SetTotalRows(10)
|
||||
.Build(&myRandomDataOp);
|
||||
rc = builder.SetNumWorkers(1).SetDataSchema(std::move(testSchema)).SetTotalRows(10).Build(&myRandomDataOp);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
|
||||
rc = myTree->AssociateNode(myRandomDataOp);
|
||||
|
@ -315,11 +301,7 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic5) {
|
|||
std::shared_ptr<RandomDataOp> myRandomDataOp;
|
||||
RandomDataOp::Builder builder;
|
||||
|
||||
rc = builder.SetRowsPerBuffer(2)
|
||||
.SetNumWorkers(4)
|
||||
.SetDataSchema(std::move(testSchema))
|
||||
.SetTotalRows(10)
|
||||
.Build(&myRandomDataOp);
|
||||
rc = builder.SetNumWorkers(4).SetDataSchema(std::move(testSchema)).SetTotalRows(10).Build(&myRandomDataOp);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
|
||||
rc = myTree->AssociateNode(myRandomDataOp);
|
||||
|
@ -395,11 +377,7 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpTree1) {
|
|||
std::shared_ptr<RandomDataOp> myRandomDataOp;
|
||||
RandomDataOp::Builder builder;
|
||||
|
||||
rc = builder.SetRowsPerBuffer(2)
|
||||
.SetNumWorkers(4)
|
||||
.SetDataSchema(std::move(testSchema))
|
||||
.SetTotalRows(10)
|
||||
.Build(&myRandomDataOp);
|
||||
rc = builder.SetNumWorkers(4).SetDataSchema(std::move(testSchema)).SetTotalRows(10).Build(&myRandomDataOp);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
|
||||
rc = myTree->AssociateNode(myRandomDataOp);
|
||||
|
@ -407,7 +385,7 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpTree1) {
|
|||
|
||||
std::shared_ptr<ShuffleOp> myShuffleOp;
|
||||
rc = ShuffleOp::Builder()
|
||||
.SetRowsPerBuffer(2)
|
||||
|
||||
.SetShuffleSize(4)
|
||||
.Build(&myShuffleOp);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
|
|
|
@ -54,7 +54,7 @@ TEST_F(MindDataTestRenameOp, TestRenameOpDefault) {
|
|||
std::shared_ptr<TFReaderOp> my_tfreader_op;
|
||||
rc = TFReaderOp::Builder()
|
||||
.SetDatasetFilesList({dataset_path})
|
||||
.SetRowsPerBuffer(2)
|
||||
|
||||
.SetWorkerConnectorSize(16)
|
||||
.SetNumWorkers(1)
|
||||
.Build(&my_tfreader_op);
|
||||
|
|
|
@ -46,7 +46,7 @@ TEST_F(MindDataTestSentencePieceVocabOp, TestSentencePieceFromDatasetFuntions) {
|
|||
|
||||
std::shared_ptr<TextFileOp> file_op;
|
||||
TextFileOp::Builder builder_file;
|
||||
builder_file.SetTextFilesList({dataset_path}).SetRowsPerBuffer(1).SetNumWorkers(1).SetOpConnectorSize(2);
|
||||
builder_file.SetTextFilesList({dataset_path}).SetNumWorkers(1).SetOpConnectorSize(2);
|
||||
|
||||
Status rc = builder_file.Build(&file_op);
|
||||
ASSERT_TRUE(rc.IsOk());
|
||||
|
@ -119,7 +119,7 @@ TEST_F(MindDataTestSentencePieceVocabOp, TestSentencePieceTokenizerFuntions) {
|
|||
|
||||
std::shared_ptr<TextFileOp> file_op;
|
||||
TextFileOp::Builder builder_file;
|
||||
builder_file.SetTextFilesList({dataset_path}).SetRowsPerBuffer(1).SetNumWorkers(1).SetOpConnectorSize(2);
|
||||
builder_file.SetTextFilesList({dataset_path}).SetNumWorkers(1).SetOpConnectorSize(2);
|
||||
|
||||
Status rc = builder_file.Build(&file_op);
|
||||
ASSERT_TRUE(rc.IsOk());
|
||||
|
|
|
@ -57,7 +57,7 @@ TEST_F(MindDataTestShuffleOp, TestShuffleBasic1) {
|
|||
std::shared_ptr<TFReaderOp> my_tfreader_op;
|
||||
rc = TFReaderOp::Builder()
|
||||
.SetDatasetFilesList({dataset_path})
|
||||
.SetRowsPerBuffer(2)
|
||||
|
||||
.SetWorkerConnectorSize(16)
|
||||
.SetNumWorkers(1)
|
||||
.Build(&my_tfreader_op);
|
||||
|
@ -65,7 +65,7 @@ TEST_F(MindDataTestShuffleOp, TestShuffleBasic1) {
|
|||
rc = my_tree->AssociateNode(my_tfreader_op);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
std::shared_ptr<ShuffleOp> my_shuffle_op;
|
||||
rc = ShuffleOp::Builder().SetRowsPerBuffer(2).SetShuffleSize(4).Build(&my_shuffle_op);
|
||||
rc = ShuffleOp::Builder().SetShuffleSize(4).Build(&my_shuffle_op);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
rc = my_tree->AssociateNode(my_shuffle_op);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
|
@ -130,7 +130,6 @@ TEST_F(MindDataTestShuffleOp, TestShuffleBasic2) {
|
|||
std::shared_ptr<TFReaderOp> my_tfreader_op;
|
||||
rc = TFReaderOp::Builder()
|
||||
.SetDatasetFilesList({dataset_path})
|
||||
.SetRowsPerBuffer(3)
|
||||
.SetWorkerConnectorSize(16)
|
||||
.SetNumWorkers(2)
|
||||
.Build(&my_tfreader_op);
|
||||
|
@ -138,7 +137,7 @@ TEST_F(MindDataTestShuffleOp, TestShuffleBasic2) {
|
|||
rc = my_tree->AssociateNode(my_tfreader_op);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
std::shared_ptr<ShuffleOp> my_shuffle_op;
|
||||
rc = ShuffleOp::Builder().SetShuffleSize(4).SetShuffleSeed(100).SetRowsPerBuffer(3).Build(&my_shuffle_op);
|
||||
rc = ShuffleOp::Builder().SetShuffleSize(4).SetShuffleSeed(100).Build(&my_shuffle_op);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
rc = my_tree->AssociateNode(my_shuffle_op);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
|
@ -201,14 +200,13 @@ TEST_F(MindDataTestShuffleOp, TestShuffleBasic3) {
|
|||
std::shared_ptr<TFReaderOp> my_tfreader_op;
|
||||
rc = TFReaderOp::Builder()
|
||||
.SetDatasetFilesList({dataset_path})
|
||||
.SetRowsPerBuffer(3)
|
||||
.SetWorkerConnectorSize(16)
|
||||
.SetNumWorkers(2)
|
||||
.Build(&my_tfreader_op);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
my_tree->AssociateNode(my_tfreader_op);
|
||||
std::shared_ptr<ShuffleOp> my_shuffle_op;
|
||||
rc = ShuffleOp::Builder().SetShuffleSize(100).SetRowsPerBuffer(3).Build(&my_shuffle_op);
|
||||
rc = ShuffleOp::Builder().SetShuffleSize(100).Build(&my_shuffle_op);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
rc = my_tree->AssociateNode(my_shuffle_op);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
|
@ -275,7 +273,6 @@ TEST_F(MindDataTestShuffleOp, TestRepeatShuffle) {
|
|||
std::shared_ptr<TFReaderOp> my_tfreader_op;
|
||||
rc = TFReaderOp::Builder()
|
||||
.SetDatasetFilesList({dataset_path})
|
||||
.SetRowsPerBuffer(3)
|
||||
.SetWorkerConnectorSize(16)
|
||||
.SetNumWorkers(2)
|
||||
.Build(&my_tfreader_op);
|
||||
|
@ -286,7 +283,6 @@ TEST_F(MindDataTestShuffleOp, TestRepeatShuffle) {
|
|||
rc = ShuffleOp::Builder()
|
||||
.SetShuffleSize(4)
|
||||
.SetShuffleSeed(100)
|
||||
.SetRowsPerBuffer(3)
|
||||
.SetReshuffleEachEpoch(true)
|
||||
.Build(&my_shuffle_op);
|
||||
EXPECT_TRUE(rc.IsOk());
|
||||
|
|
|
@ -35,7 +35,7 @@ TEST_F(MindDataTestSkipOp, TestSkipOpFuntions) {
|
|||
|
||||
std::shared_ptr<TFReaderOp> my_tfreader_op;
|
||||
TFReaderOp::Builder builder;
|
||||
builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16).SetWorkerConnectorSize(16);
|
||||
builder.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(16);
|
||||
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
|
||||
schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
|
||||
builder.SetDataSchema(std::move(schema));
|
||||
|
|
|
@ -42,7 +42,7 @@ TEST_F(MindDataTestTakeOp, TestTakeProject) {
|
|||
// TFReaderOp
|
||||
std::shared_ptr<TFReaderOp> my_tfreader_op;
|
||||
TFReaderOp::Builder builder;
|
||||
builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16).SetWorkerConnectorSize(16);
|
||||
builder.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(16);
|
||||
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
|
||||
schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
|
||||
builder.SetDataSchema(std::move(schema));
|
||||
|
|
|
@ -45,7 +45,7 @@ TEST_F(MindDataTestTextFileOp, TestTextFileBasic) {
|
|||
|
||||
std::shared_ptr<TextFileOp> op;
|
||||
TextFileOp::Builder builder;
|
||||
builder.SetTextFilesList({dataset_path}).SetRowsPerBuffer(16).SetOpConnectorSize(2);
|
||||
builder.SetTextFilesList({dataset_path}).SetOpConnectorSize(2);
|
||||
|
||||
Status rc = builder.Build(&op);
|
||||
ASSERT_TRUE(rc.IsOk());
|
||||
|
@ -94,7 +94,7 @@ TEST_F(MindDataTestTextFileOp, TestTextFileFileNotExist) {
|
|||
|
||||
std::shared_ptr<TextFileOp> op;
|
||||
TextFileOp::Builder builder;
|
||||
builder.SetTextFilesList({dataset_path}).SetRowsPerBuffer(16).SetOpConnectorSize(2);
|
||||
builder.SetTextFilesList({dataset_path}).SetOpConnectorSize(2);
|
||||
|
||||
Status rc = builder.Build(&op);
|
||||
ASSERT_TRUE(rc.IsOk());
|
||||
|
|
|
@ -44,7 +44,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderBasic1) {
|
|||
|
||||
std::shared_ptr<TFReaderOp> my_tfreader_op;
|
||||
TFReaderOp::Builder builder;
|
||||
builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16);
|
||||
builder.SetDatasetFilesList({dataset_path});
|
||||
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
|
||||
schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
|
||||
builder.SetDataSchema(std::move(schema));
|
||||
|
@ -148,7 +148,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderSmallRowsPerBuffer) {
|
|||
|
||||
std::shared_ptr<TFReaderOp> my_tfreader_op;
|
||||
TFReaderOp::Builder builder;
|
||||
builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(1);
|
||||
builder.SetDatasetFilesList({dataset_path});
|
||||
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
|
||||
schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
|
||||
builder.SetDataSchema(std::move(schema));
|
||||
|
@ -200,7 +200,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderLargeQueueSize) {
|
|||
|
||||
std::shared_ptr<TFReaderOp> my_tfreader_op;
|
||||
TFReaderOp::Builder builder;
|
||||
builder.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(1).SetRowsPerBuffer(16);
|
||||
builder.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(1);
|
||||
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
|
||||
schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
|
||||
builder.SetDataSchema(std::move(schema));
|
||||
|
@ -252,8 +252,9 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderOneThread) {
|
|||
|
||||
std::shared_ptr<TFReaderOp> my_tfreader_op;
|
||||
TFReaderOp::Builder builder;
|
||||
builder.SetDatasetFilesList({dataset_path})
|
||||
.SetRowsPerBuffer(16)
|
||||
builder
|
||||
.SetDatasetFilesList({dataset_path})
|
||||
|
||||
.SetNumWorkers(1);
|
||||
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
|
||||
schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
|
||||
|
@ -307,7 +308,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderRepeat) {
|
|||
// TFReaderOp
|
||||
std::shared_ptr<TFReaderOp> my_tfreader_op;
|
||||
TFReaderOp::Builder builder;
|
||||
builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16).SetWorkerConnectorSize(16);
|
||||
builder.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(16);
|
||||
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
|
||||
schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
|
||||
builder.SetDataSchema(std::move(schema));
|
||||
|
@ -378,7 +379,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderSchemaConstructor) {
|
|||
std::shared_ptr<TFReaderOp> my_tfreader_op;
|
||||
TFReaderOp::Builder builder;
|
||||
builder.SetDatasetFilesList({dataset_path + "/test.data"})
|
||||
.SetRowsPerBuffer(16)
|
||||
|
||||
.SetNumWorkers(16)
|
||||
.SetDataSchema(std::move(data_schema));
|
||||
Status rc = builder.Build(&my_tfreader_op);
|
||||
|
@ -605,7 +606,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderBasicNoSchema) {
|
|||
|
||||
std::shared_ptr<TFReaderOp> my_tfreader_op;
|
||||
TFReaderOp::Builder builder;
|
||||
builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16);
|
||||
builder.SetDatasetFilesList({dataset_path});
|
||||
Status rc = builder.Build(&my_tfreader_op);
|
||||
ASSERT_TRUE(rc.IsOk());
|
||||
|
||||
|
@ -697,7 +698,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderInvalidFiles) {
|
|||
|
||||
std::shared_ptr<TFReaderOp> my_tfreader_op;
|
||||
TFReaderOp::Builder builder;
|
||||
builder.SetDatasetFilesList({invalid_file, valid_file, schema_file}).SetRowsPerBuffer(16);
|
||||
builder.SetDatasetFilesList({invalid_file, valid_file, schema_file});
|
||||
|
||||
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
|
||||
schema->LoadSchemaFile(schema_file, {});
|
||||
|
@ -706,7 +707,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderInvalidFiles) {
|
|||
Status rc = builder.Build(&my_tfreader_op);
|
||||
ASSERT_TRUE(!rc.IsOk());
|
||||
|
||||
builder.SetDatasetFilesList({invalid_file, valid_file, schema_file, nonexistent_file}).SetRowsPerBuffer(16);
|
||||
builder.SetDatasetFilesList({invalid_file, valid_file, schema_file, nonexistent_file});
|
||||
|
||||
schema = std::make_unique<DataSchema>();
|
||||
schema->LoadSchemaFile(schema_file, {});
|
||||
|
|
|
@ -39,11 +39,11 @@
|
|||
namespace common = mindspore::common;
|
||||
|
||||
using namespace mindspore::dataset;
|
||||
using mindspore::MsLogLevel::ERROR;
|
||||
using mindspore::ExceptionType::NoExceptionType;
|
||||
using mindspore::LogStream;
|
||||
using mindspore::ExceptionType::NoExceptionType;
|
||||
using mindspore::MsLogLevel::ERROR;
|
||||
|
||||
std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false, int rows_per_buf = 2);
|
||||
std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false);
|
||||
|
||||
std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops);
|
||||
|
||||
|
@ -61,8 +61,7 @@ TEST_F(MindDataTestVOCOp, TestVOCDetection) {
|
|||
std::string task_mode("train");
|
||||
std::shared_ptr<VOCOp> my_voc_op;
|
||||
VOCOp::Builder builder;
|
||||
Status rc = builder.SetDir(dataset_path).SetTask(task_type).SetUsage(task_mode)
|
||||
.Build(&my_voc_op);
|
||||
Status rc = builder.SetDir(dataset_path).SetTask(task_type).SetUsage(task_mode).Build(&my_voc_op);
|
||||
ASSERT_TRUE(rc.IsOk());
|
||||
|
||||
rc = my_tree->AssociateNode(my_voc_op);
|
||||
|
@ -112,8 +111,7 @@ TEST_F(MindDataTestVOCOp, TestVOCSegmentation) {
|
|||
std::string task_mode("train");
|
||||
std::shared_ptr<VOCOp> my_voc_op;
|
||||
VOCOp::Builder builder;
|
||||
Status rc = builder.SetDir(dataset_path).SetTask(task_type).SetUsage(task_mode)
|
||||
.Build(&my_voc_op);
|
||||
Status rc = builder.SetDir(dataset_path).SetTask(task_type).SetUsage(task_mode).Build(&my_voc_op);
|
||||
ASSERT_TRUE(rc.IsOk());
|
||||
|
||||
rc = my_tree->AssociateNode(my_voc_op);
|
||||
|
@ -168,9 +166,7 @@ TEST_F(MindDataTestVOCOp, TestVOCClassIndex) {
|
|||
std::shared_ptr<VOCOp> my_voc_op;
|
||||
VOCOp::Builder builder;
|
||||
Status rc =
|
||||
builder.SetDir(dataset_path).SetTask(task_type).SetUsage(task_mode)
|
||||
.SetClassIndex(class_index)
|
||||
.Build(&my_voc_op);
|
||||
builder.SetDir(dataset_path).SetTask(task_type).SetUsage(task_mode).SetClassIndex(class_index).Build(&my_voc_op);
|
||||
ASSERT_TRUE(rc.IsOk());
|
||||
|
||||
rc = my_tree->AssociateNode(my_voc_op);
|
||||
|
|
|
@ -36,12 +36,11 @@
|
|||
namespace common = mindspore::common;
|
||||
|
||||
using namespace mindspore::dataset;
|
||||
using mindspore::MsLogLevel::INFO;
|
||||
using mindspore::ExceptionType::NoExceptionType;
|
||||
using mindspore::LogStream;
|
||||
using mindspore::ExceptionType::NoExceptionType;
|
||||
using mindspore::MsLogLevel::INFO;
|
||||
|
||||
class MindDataTestZipOp : public UT::DatasetOpTesting {
|
||||
};
|
||||
class MindDataTestZipOp : public UT::DatasetOpTesting {};
|
||||
|
||||
TEST_F(MindDataTestZipOp, MindDataTestZipOpDefault) {
|
||||
/* Tree:
|
||||
|
@ -62,7 +61,6 @@ TEST_F(MindDataTestZipOp, MindDataTestZipOpDefault) {
|
|||
std::shared_ptr<TFReaderOp> my_tfreader_op;
|
||||
rc = TFReaderOp::Builder()
|
||||
.SetDatasetFilesList({dataset_path})
|
||||
.SetRowsPerBuffer(2)
|
||||
.SetWorkerConnectorSize(16)
|
||||
.SetNumWorkers(1)
|
||||
.Build(&my_tfreader_op);
|
||||
|
@ -72,7 +70,6 @@ TEST_F(MindDataTestZipOp, MindDataTestZipOpDefault) {
|
|||
std::shared_ptr<TFReaderOp> my_tfreader_op2;
|
||||
rc = TFReaderOp::Builder()
|
||||
.SetDatasetFilesList({dataset_path2})
|
||||
.SetRowsPerBuffer(2)
|
||||
.SetWorkerConnectorSize(1)
|
||||
.SetNumWorkers(1)
|
||||
.Build(&my_tfreader_op2);
|
||||
|
@ -126,7 +123,6 @@ TEST_F(MindDataTestZipOp, MindDataTestZipOpDefault) {
|
|||
ASSERT_EQ(row_count, 3); // Should be 3 rows fetched
|
||||
}
|
||||
|
||||
|
||||
TEST_F(MindDataTestZipOp, MindDataTestZipOpRepeat) {
|
||||
/* Tree:
|
||||
* OpId(3) Repeat(3)
|
||||
|
@ -147,7 +143,7 @@ TEST_F(MindDataTestZipOp, MindDataTestZipOpRepeat) {
|
|||
std::shared_ptr<TFReaderOp> my_tfreader_op;
|
||||
rc = TFReaderOp::Builder()
|
||||
.SetDatasetFilesList({dataset_path})
|
||||
.SetRowsPerBuffer(2)
|
||||
|
||||
.SetWorkerConnectorSize(16)
|
||||
.SetNumWorkers(1)
|
||||
.Build(&my_tfreader_op);
|
||||
|
@ -157,7 +153,7 @@ TEST_F(MindDataTestZipOp, MindDataTestZipOpRepeat) {
|
|||
std::shared_ptr<TFReaderOp> my_tfreader_op2;
|
||||
rc = TFReaderOp::Builder()
|
||||
.SetDatasetFilesList({dataset_path2})
|
||||
.SetRowsPerBuffer(2)
|
||||
|
||||
.SetWorkerConnectorSize(1)
|
||||
.SetNumWorkers(1)
|
||||
.Build(&my_tfreader_op2);
|
||||
|
|
|
@ -43,21 +43,18 @@ def test_basic():
|
|||
|
||||
ds.config.load('../data/dataset/declient.cfg')
|
||||
|
||||
# assert ds.config.get_rows_per_buffer() == 32
|
||||
assert ds.config.get_num_parallel_workers() == 8
|
||||
# assert ds.config.get_worker_connector_size() == 16
|
||||
assert ds.config.get_prefetch_size() == 16
|
||||
assert ds.config.get_seed() == 5489
|
||||
assert ds.config.get_monitor_sampling_interval() == 15
|
||||
|
||||
# ds.config.set_rows_per_buffer(1)
|
||||
ds.config.set_num_parallel_workers(2)
|
||||
# ds.config.set_worker_connector_size(3)
|
||||
ds.config.set_prefetch_size(4)
|
||||
ds.config.set_seed(5)
|
||||
ds.config.set_monitor_sampling_interval(45)
|
||||
|
||||
# assert ds.config.get_rows_per_buffer() == 1
|
||||
assert ds.config.get_num_parallel_workers() == 2
|
||||
# assert ds.config.get_worker_connector_size() == 3
|
||||
assert ds.config.get_prefetch_size() == 4
|
||||
|
|
Loading…
Reference in New Issue