!14633 Remove rows_per_buffer from MindData

From: @hfarahat
Reviewed-by: @robingrosman,@pandoublefeng
Signed-off-by: @pandoublefeng
This commit is contained in:
mindspore-ci-bot 2021-04-06 03:49:54 +08:00 committed by Gitee
commit 98307c10db
98 changed files with 411 additions and 669 deletions

View File

@ -42,7 +42,6 @@ PYBIND_REGISTER(ConfigManager, 0, ([](const py::module *m) {
.def("get_numa_enable", &ConfigManager::numa_enable)
.def("set_numa_enable", &ConfigManager::set_numa_enable)
.def("get_op_connector_size", &ConfigManager::op_connector_size)
.def("get_rows_per_buffer", &ConfigManager::rows_per_buffer)
.def("get_seed", &ConfigManager::seed)
.def("set_rank_id", &ConfigManager::set_rank_id)
.def("get_worker_connector_size", &ConfigManager::worker_connector_size)
@ -54,7 +53,6 @@ PYBIND_REGISTER(ConfigManager, 0, ([](const py::module *m) {
.def("get_profiler_file_status", &ConfigManager::get_profiler_file_status)
.def("set_num_parallel_workers", &ConfigManager::set_num_parallel_workers)
.def("set_op_connector_size", &ConfigManager::set_op_connector_size)
.def("set_rows_per_buffer", &ConfigManager::set_rows_per_buffer)
.def("set_seed", &ConfigManager::set_seed)
.def("set_worker_connector_size", &ConfigManager::set_worker_connector_size)
.def("load", [](ConfigManager &c, std::string s) { THROW_IF_ERROR(c.LoadFile(s)); });

View File

@ -31,8 +31,7 @@
namespace mindspore {
namespace dataset {
ConfigManager::ConfigManager()
: rows_per_buffer_(kCfgRowsPerBuffer),
num_parallel_workers_(kCfgParallelWorkers),
: num_parallel_workers_(kCfgParallelWorkers),
worker_connector_size_(kCfgWorkerConnectorSize),
op_connector_size_(kCfgOpConnectorSize),
rank_id_(kCfgDefaultRankId),
@ -70,7 +69,6 @@ void ConfigManager::Print(std::ostream &out) const {
// Don't show the test/internal ones. Only display the main ones here.
// fyi, boolalpha tells the output stream to write "true" and "false" for bools
out << "\nClient config settings :"
<< "\nDataCache Rows per buffer : " << rows_per_buffer_
<< "\nParallelOp workers : " << num_parallel_workers_
<< "\nParallelOp worker connector size : " << worker_connector_size_
<< "\nSize of each Connector : " << op_connector_size_ << std::endl;
@ -78,7 +76,6 @@ void ConfigManager::Print(std::ostream &out) const {
// Private helper function that takes a nlohmann json format and populates the settings
Status ConfigManager::FromJson(const nlohmann::json &j) {
set_rows_per_buffer(j.value("rowsPerBuffer", rows_per_buffer_));
set_num_parallel_workers(j.value("numParallelWorkers", num_parallel_workers_));
set_worker_connector_size(j.value("workerConnectorSize", worker_connector_size_));
set_op_connector_size(j.value("opConnectorSize", op_connector_size_));
@ -115,9 +112,6 @@ Status ConfigManager::LoadFile(const std::string &settingsFile) {
return rc;
}
// Setter function
void ConfigManager::set_rows_per_buffer(int32_t rows_per_buffer) { rows_per_buffer_ = rows_per_buffer; }
// Setter function
void ConfigManager::set_num_parallel_workers(int32_t num_parallel_workers) {
num_parallel_workers_ = num_parallel_workers;

View File

@ -74,10 +74,6 @@ class ConfigManager {
// @return Status error code
Status LoadFile(const std::string &settingsFile);
// getter function
// @return The rows per buffer setting
int32_t rows_per_buffer() const { return rows_per_buffer_; }
// getter function
// @return The number of workers setting
int32_t num_parallel_workers() const { return num_parallel_workers_; }
@ -112,10 +108,6 @@ class ConfigManager {
/// \return auto_num_workers_
bool auto_num_workers() const { return auto_num_workers_; }
// setter function
// @param rows_per_buffer - The setting to apply to the config
void set_rows_per_buffer(int32_t rows_per_buffer);
// setter function
// @param num_parallel_workers - The setting to apply to the config
void set_num_parallel_workers(int32_t num_parallel_workers);
@ -230,7 +222,6 @@ class ConfigManager {
void set_auto_worker_config_(uint8_t cfg) { auto_worker_config_ = cfg; }
private:
int32_t rows_per_buffer_;
int32_t num_parallel_workers_;
int32_t worker_connector_size_;
int32_t op_connector_size_;

View File

@ -35,7 +35,7 @@ TensorRow::TensorRow(row_id_type id, const std::initializer_list<value_type> &ls
TensorRow::TensorRow(const TensorRow &tr)
: id_(tr.id_), path_(tr.path_), row_(tr.row_), tensor_row_flag_(tr.tensor_row_flag_) {}
TensorRow::TensorRow(TensorRow::TensorRowFlags flag) : tensor_row_flag_(flag) {}
TensorRow::TensorRow(TensorRow::TensorRowFlags flag) : id_(kDefaultRowId), path_({}), tensor_row_flag_(flag) {}
TensorRow &TensorRow::operator=(const TensorRow &tr) {
if (this == &tr) {

View File

@ -540,8 +540,7 @@ Status CachePerfRun::Run() {
int64_t elapse_time = std::chrono::duration_cast<std::chrono::seconds>(end_tick - start_tick).count();
std::cout << "Epoch one (build phase) elapsed time " << elapse_time << " seconds" << std::endl;
std::cout << "Epoch one (build phase) per pipeline per worker summary. Buffer size = " << cfg_.rows_per_buffer()
<< std::endl;
std::cout << "Epoch one (build phase) per pipeline per worker summary." << std::endl;
PrintEpochSummary();
// Get some stat but we need to connect. The server will thinks it is the (n+1) pipeline

View File

@ -228,16 +228,13 @@ Status CachePipelineRun::RunFirstEpoch() {
}
std::vector<row_id_type> keys;
auto rows_per_buffer = cfg_.rows_per_buffer();
keys.reserve(rows_per_buffer);
keys.reserve(1);
int32_t worker_id = 0;
for (auto i = start_row_; i <= end_row_; ++i) {
keys.push_back(i);
if (keys.size() == rows_per_buffer) {
auto blk = std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone));
RETURN_IF_NOT_OK(io_block_queues_[worker_id++ % num_workers]->Add(std::move(blk)));
keys.clear();
}
auto blk = std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone));
RETURN_IF_NOT_OK(io_block_queues_[worker_id++ % num_workers]->Add(std::move(blk)));
keys.clear();
}
if (!keys.empty()) {
auto blk = std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone));
@ -355,9 +352,8 @@ Status CachePipelineRun::WriterWorkerEntry(int32_t worker_id) {
Status CachePipelineRun::RunReadEpoch() {
std::vector<row_id_type> keys;
auto rows_per_buffer = cc_->GetPrefetchSize(); // We will use prefetch size to read.
auto num_workers = cfg_.num_parallel_workers();
keys.reserve(rows_per_buffer);
keys.reserve(1);
// Spawn workers
auto f = std::bind(&CachePipelineRun::ReaderWorkerEntry, this, std::placeholders::_1);
std::vector<Task *> worker_threads;
@ -381,11 +377,9 @@ Status CachePipelineRun::RunReadEpoch() {
int32_t worker_id = 0;
for (auto id : all_keys) {
keys.push_back(id);
if (keys.size() == rows_per_buffer) {
auto blk = std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone));
RETURN_IF_NOT_OK(io_block_queues_[worker_id++ % num_workers]->Add(std::move(blk)));
keys.clear();
}
auto blk = std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone));
RETURN_IF_NOT_OK(io_block_queues_[worker_id++ % num_workers]->Add(std::move(blk)));
keys.clear();
}
if (!keys.empty()) {
auto blk = std::make_unique<IOBlock>(IOBlock(keys, IOBlock::kDeIoBlockNone));

View File

@ -31,7 +31,6 @@ BarrierOp::Builder::Builder() {
// using the various builder set methods.
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
builder_rows_per_buffer_ = cfg->rows_per_buffer();
builder_op_connector_size_ = cfg->op_connector_size();
}
@ -39,17 +38,13 @@ Status BarrierOp::Builder::SanityCheck() const { return Status::OK(); }
Status BarrierOp::Builder::Build(std::shared_ptr<BarrierOp> *ptr) {
RETURN_IF_NOT_OK(SanityCheck());
*ptr = std::make_shared<BarrierOp>(builder_rows_per_buffer_, builder_op_connector_size_, builder_condition_name_,
builder_condition_func_);
*ptr = std::make_shared<BarrierOp>(builder_op_connector_size_, builder_condition_name_, builder_condition_func_);
return Status::OK();
}
// Construct BarrierOp here, local variables initialized in operator due to tree construction restrictions
BarrierOp::BarrierOp(int32_t rows_per_buffer, int32_t op_connector_size, const std::string &condition_name,
py::function condition_func)
BarrierOp::BarrierOp(int32_t op_connector_size, const std::string &condition_name, py::function condition_func)
: PipelineOp(op_connector_size),
rows_per_buffer_(rows_per_buffer),
buffer_id_(0),
clean_up_(false),
eof_(false),
condition_name_(condition_name),

View File

@ -98,16 +98,13 @@ class BarrierOp : public PipelineOp {
};
// Constructor for BarrierOp
// @param rows_per_buffer - number of rows in output buffer
// @param op_connector_size - connector size
// @param condition_name - the condition name associated with this operator
// @param condition_func - the blocking condition check per row
// @note - currently rows_per_buffer should = 1 for barrier.
// The reason for this is having other values would complicate how the pipeline behaves with other operators
// One example of such case is having batch after barrier. Batch would be waiting for data and having
// rows per buffer in this case can result in hanging
BarrierOp(int32_t rows_per_buffer, int32_t op_connector_size, const std::string &condition_name,
py::function condition_func);
BarrierOp(int32_t op_connector_size, const std::string &condition_name, py::function condition_func);
// Destructor
~BarrierOp();
@ -156,10 +153,6 @@ class BarrierOp : public PipelineOp {
bool clean_up_;
// end of file state, we stop reading data and shut down
bool eof_;
// rows per buffer
int32_t rows_per_buffer_;
// buffer_id
int32_t buffer_id_;
// iterator to pull new rows, we only have one child
std::unique_ptr<ChildIterator> child_iterator_;
// condition name, to support multiple barriers

View File

@ -248,7 +248,7 @@ Status BatchOp::WorkerEntry(int32_t workerId) {
RETURN_IF_NOT_OK(out_connector_->SendEOF(workerId));
} else if (table_pair.second.ctrl_ == batchCtrl::kNoCtrl) {
TensorRow new_row;
RETURN_IF_NOT_OK(MakeBatchedBuffer(std::move(table_pair), &new_row));
RETURN_IF_NOT_OK(MakeBatchedRow(std::move(table_pair), &new_row));
RETURN_IF_NOT_OK(out_connector_->Add(std::move(new_row), workerId));
}
RETURN_IF_NOT_OK(worker_queues_[workerId]->PopFront(&table_pair));
@ -256,7 +256,7 @@ Status BatchOp::WorkerEntry(int32_t workerId) {
return Status::OK();
}
Status BatchOp::MakeBatchedBuffer(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair, TensorRow *new_row) {
Status BatchOp::MakeBatchedRow(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair, TensorRow *new_row) {
RETURN_UNEXPECTED_IF_NULL(table_pair.first);
#ifdef ENABLE_PYTHON
if (!in_col_names_.empty()) RETURN_IF_NOT_OK(MapColumns(&table_pair)); // pass it through pyfunc

View File

@ -225,7 +225,7 @@ class BatchOp : public ParallelOp {
// Generate buffer with batched tensors
// @return Status The status code returned
Status MakeBatchedBuffer(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair, TensorRow *new_row);
Status MakeBatchedRow(std::pair<std::unique_ptr<TensorQTable>, CBatchInfo> table_pair, TensorRow *new_row);
#ifdef ENABLE_PYTHON
// Function that calls pyfunc to perform map on batch

View File

@ -45,14 +45,13 @@ Status CacheBase::Reset() {
MS_LOG(DEBUG) << Name() << " performing a self-reset.";
return Status::OK();
}
CacheBase::CacheBase(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf,
std::shared_ptr<CacheClient> cache_client, std::shared_ptr<SamplerRT> sampler)
CacheBase::CacheBase(int32_t num_workers, int32_t op_connector_size, std::shared_ptr<CacheClient> cache_client,
std::shared_ptr<SamplerRT> sampler)
: ParallelOp(num_workers, op_connector_size, std::move(sampler)),
row_cnt_(0),
num_cache_miss_(0),
cache_client_(std::move(cache_client)),
rows_per_buffer_(rows_per_buf),
prefetch_size_(rows_per_buffer_),
prefetch_size_(1),
num_prefetchers_(num_workers_) {
// Adjust the prefetch size based on the number of workers.
auto prefetch_sz_per_thread = cache_client_->GetPrefetchSize() / num_prefetchers_;
@ -92,7 +91,7 @@ Status CacheBase::FetchSamplesToWorkers() {
row_cnt_ = 0;
++wait_cnt;
std::vector<row_id_type> keys;
keys.reserve(rows_per_buffer_);
keys.reserve(1);
std::vector<row_id_type> prefetch_keys;
prefetch_keys.reserve(prefetch_size_);
std::unique_ptr<DataBuffer> sampler_buffer;
@ -107,15 +106,11 @@ Status CacheBase::FetchSamplesToWorkers() {
// Batch enough rows for performance reason.
if (row_cnt_ % prefetch_size_ == 0) {
RETURN_IF_NOT_OK(send_to_que(prefetch_queues_, prefetch_cnt++ % num_prefetchers_, prefetch_keys));
// Now we tell the WorkerEntry to wait for them to come back. If prefetch_size_ is a multiple
// of rows_per_buffer_, the keys vector will always be empty. But it can be partially filled.
// The only requirement we set up is rows_per_buffer_ is less than or equal to prefetch_size_.
// Now we tell the WorkerEntry to wait for them to come back.
for (auto row_id : prefetch_keys) {
keys.push_back(row_id);
if (keys.size() == rows_per_buffer_) {
RETURN_IF_NOT_OK(send_to_que(io_block_queues_, buf_cnt++ % num_workers_, keys));
keys.clear();
}
RETURN_IF_NOT_OK(send_to_que(io_block_queues_, buf_cnt++ % num_workers_, keys));
keys.clear();
}
prefetch_keys.clear();
}
@ -127,10 +122,8 @@ Status CacheBase::FetchSamplesToWorkers() {
RETURN_IF_NOT_OK(send_to_que(prefetch_queues_, prefetch_cnt++ % num_prefetchers_, prefetch_keys));
for (auto row_id : prefetch_keys) {
keys.push_back(row_id);
if (keys.size() == rows_per_buffer_) {
RETURN_IF_NOT_OK(send_to_que(io_block_queues_, buf_cnt++ % num_workers_, keys));
keys.clear();
}
RETURN_IF_NOT_OK(send_to_que(io_block_queues_, buf_cnt++ % num_workers_, keys));
keys.clear();
}
}
if (!keys.empty()) {

View File

@ -42,11 +42,10 @@ class CacheBase : public ParallelOp {
/// \brief Base class constructor
/// \param num_workers Number of parallel workers
/// \param op_connector_size Connector size
/// \param rows_per_buf Number of rows per buffer
/// \param cache_client CacheClient for communication to the CacheServer
/// \param sampler Sampler which is mandatory
CacheBase(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf,
std::shared_ptr<CacheClient> cache_client, std::shared_ptr<SamplerRT> sampler);
CacheBase(int32_t num_workers, int32_t op_connector_size, std::shared_ptr<CacheClient> cache_client,
std::shared_ptr<SamplerRT> sampler);
/// \brief Destructor
~CacheBase();
@ -87,7 +86,6 @@ class CacheBase : public ParallelOp {
int64_t row_cnt_;
std::atomic<int64_t> num_cache_miss_;
std::shared_ptr<CacheClient> cache_client_;
int32_t rows_per_buffer_;
std::unique_ptr<Connector<std::vector<row_id_type>>> keys_miss_;
/// \brief Common function to register resources for interrupt

View File

@ -31,7 +31,6 @@ namespace dataset {
CacheLookupOp::Builder::Builder() : build_cache_client_(nullptr), build_sampler_(nullptr) {
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
build_num_workers_ = cfg->num_parallel_workers();
rows_per_buffer_ = cfg->rows_per_buffer();
build_op_connector_size_ = cfg->op_connector_size();
}
@ -52,8 +51,8 @@ Status CacheLookupOp::Builder::SanityCheck() const {
// The builder "build" method creates the final object and does some init on it
Status CacheLookupOp::Builder::Build(std::shared_ptr<CacheLookupOp> *ptr) {
RETURN_IF_NOT_OK(SanityCheck());
*ptr = std::make_shared<CacheLookupOp>(build_num_workers_, build_op_connector_size_, rows_per_buffer_,
build_cache_client_, build_sampler_);
*ptr =
std::make_shared<CacheLookupOp>(build_num_workers_, build_op_connector_size_, build_cache_client_, build_sampler_);
return Status::OK();
}
Status CacheLookupOp::operator()() {

View File

@ -74,7 +74,6 @@ class CacheLookupOp : public CacheBase, public SamplerRT {
private:
int32_t build_num_workers_;
int32_t rows_per_buffer_;
int32_t build_op_connector_size_;
std::shared_ptr<CacheClient> build_cache_client_;
std::shared_ptr<SamplerRT> build_sampler_;
@ -86,9 +85,9 @@ class CacheLookupOp : public CacheBase, public SamplerRT {
/// \brief Constructor
/// \note It takes the same argument as the base class.
/// \see CacheBase
CacheLookupOp(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf,
std::shared_ptr<CacheClient> cache_client, std::shared_ptr<SamplerRT> sampler)
: CacheBase(num_workers, op_connector_size, rows_per_buf, cache_client, sampler), SamplerRT(*(sampler.get())) {}
CacheLookupOp(int32_t num_workers, int32_t op_connector_size, std::shared_ptr<CacheClient> cache_client,
std::shared_ptr<SamplerRT> sampler)
: CacheBase(num_workers, op_connector_size, cache_client, sampler), SamplerRT(*(sampler.get())) {}
~CacheLookupOp() = default;
// As a parallel op, we override these two functions
Status operator()() override;

View File

@ -33,7 +33,6 @@ namespace dataset {
CacheOp::Builder::Builder() : build_cache_client_(nullptr), build_sampler_(nullptr) {
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
build_num_workers_ = cfg->num_parallel_workers();
rows_per_buffer_ = cfg->rows_per_buffer();
build_op_connector_size_ = cfg->op_connector_size();
}
@ -54,17 +53,16 @@ Status CacheOp::Builder::SanityCheck() const {
// The builder "build" method creates the final object and does some init on it
Status CacheOp::Builder::Build(std::shared_ptr<CacheOp> *ptr) {
RETURN_IF_NOT_OK(SanityCheck());
*ptr = std::make_shared<CacheOp>(build_num_workers_, build_op_connector_size_, rows_per_buffer_, build_cache_client_,
build_sampler_);
*ptr = std::make_shared<CacheOp>(build_num_workers_, build_op_connector_size_, build_cache_client_, build_sampler_);
RETURN_IF_NOT_OK((*ptr)->InitCache());
return Status::OK();
}
// Constructor of CacheOp
CacheOp::CacheOp(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf,
std::shared_ptr<CacheClient> cache_client, std::shared_ptr<SamplerRT> sampler)
: CacheBase(num_workers, op_connector_size, rows_per_buf, std::move(cache_client), std::move(sampler)),
CacheOp::CacheOp(int32_t num_workers, int32_t op_connector_size, std::shared_ptr<CacheClient> cache_client,
std::shared_ptr<SamplerRT> sampler)
: CacheBase(num_workers, op_connector_size, std::move(cache_client), std::move(sampler)),
num_guys_in_(0),
phase_(Phase::kBuildPhase) {}

View File

@ -70,14 +70,6 @@ class CacheOp : public CacheBase, public RandomAccessOp {
return *this;
}
/// \brief Setter method
/// \param rows_per_buffer
/// \return Builder setter method returns reference to the builder.
Builder &SetRowsPerBuffer(int32_t rows_per_buffer) {
rows_per_buffer_ = rows_per_buffer;
return *this;
}
/// \brief Setter method
/// \param sampler
/// \return Builder setter method returns reference to the builder.
@ -93,7 +85,6 @@ class CacheOp : public CacheBase, public RandomAccessOp {
private:
int32_t build_num_workers_;
int32_t rows_per_buffer_;
int32_t build_op_connector_size_;
std::shared_ptr<CacheClient> build_cache_client_;
std::shared_ptr<SamplerRT> build_sampler_;
@ -107,8 +98,8 @@ class CacheOp : public CacheBase, public RandomAccessOp {
/// \note The builder class should be used to call it.
/// \param num_workers The number of worker threads.
/// \param op_connector_size The size of each queue in the connector.
CacheOp(int32_t num_workers, int32_t op_connector_size, int32_t rows_per_buf,
std::shared_ptr<CacheClient> cache_client, std::shared_ptr<SamplerRT> sampler);
CacheOp(int32_t num_workers, int32_t op_connector_size, std::shared_ptr<CacheClient> cache_client,
std::shared_ptr<SamplerRT> sampler);
// Destructor
~CacheOp();

View File

@ -41,7 +41,6 @@ constexpr int32_t ShuffleOp::kShuffleStateDrain;
ShuffleOp::Builder::Builder() : build_shuffle_size_(0), build_reshuffle_each_epoch_(true) {
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
build_op_connector_size_ = cfg->op_connector_size();
build_rows_per_buffer_ = cfg->rows_per_buffer();
build_shuffle_seed_ = GetSeed();
}
@ -56,20 +55,17 @@ Status ShuffleOp::Builder::SanityCheck() const {
Status ShuffleOp::Builder::Build(std::shared_ptr<ShuffleOp> *ptr) {
RETURN_IF_NOT_OK(SanityCheck());
*ptr = std::make_shared<ShuffleOp>(build_shuffle_size_, build_shuffle_seed_, build_op_connector_size_,
build_reshuffle_each_epoch_, build_rows_per_buffer_);
build_reshuffle_each_epoch_);
return Status::OK();
}
// Constructor of the ShuffleOp
ShuffleOp::ShuffleOp(int32_t shuffle_size, uint32_t shuffle_seed, int32_t op_connector_size, bool reset_every_epoch,
int32_t rows_per_buffer)
ShuffleOp::ShuffleOp(int32_t shuffle_size, uint32_t shuffle_seed, int32_t op_connector_size, bool reset_every_epoch)
: PipelineOp(op_connector_size),
shuffle_size_(shuffle_size),
shuffle_seed_(shuffle_seed),
reshuffle_each_epoch_(reset_every_epoch),
rng_(shuffle_seed),
buffer_counter_(0),
rows_per_buffer_(rows_per_buffer),
shuffle_buffer_(std::make_unique<TensorTable>()),
shuffle_last_row_idx_(0),
shuffle_buffer_state_(kShuffleStateInit) {}
@ -87,7 +83,6 @@ Status ShuffleOp::SelfReset() {
}
shuffle_buffer_ = std::make_unique<TensorTable>();
buffer_counter_ = 0;
shuffle_last_row_idx_ = 0;
shuffle_buffer_state_ = kShuffleStateInit;
return Status::OK();
@ -104,8 +99,8 @@ void ShuffleOp::Print(std::ostream &out, bool show_all) const {
// Call the super class for displaying any common detailed info
PipelineOp::Print(out, show_all);
// Then show any custom derived-internal stuff
out << "\nShuffle size: " << shuffle_size_ << "\nRows per buffer: " << rows_per_buffer_
<< "\nShuffle buffer state: " << shuffle_buffer_state_ << "\nShuffle seed: " << shuffle_seed_ << "\n\n";
out << "\nShuffle size: " << shuffle_size_ << "\nShuffle buffer state: " << shuffle_buffer_state_
<< "\nShuffle seed: " << shuffle_seed_ << "\n\n";
}
}

View File

@ -121,9 +121,7 @@ class ShuffleOp : public PipelineOp {
// @param shuffle_size - The size for the shuffle buffer
// @param shuffle_seed - The seed to use for random number generation
// @param op_connector_size - The output connector queue size
// @param rows_per_buffer - The requested number of rows per buffer
ShuffleOp(int32_t shuffle_size, uint32_t shuffle_seed, int32_t op_connector_size, bool reset_every_epoch,
int32_t rows_per_buffer);
ShuffleOp(int32_t shuffle_size, uint32_t shuffle_seed, int32_t op_connector_size, bool reset_every_epoch);
// Destructor
~ShuffleOp() = default;
@ -183,8 +181,6 @@ class ShuffleOp : public PipelineOp {
// (ie uniform_int_distribution) because we will need to create up to |dataset| instances
// of the distribution object in the common case of a perfect shuffle
std::mt19937_64 rng_;
int32_t buffer_counter_; // For creating new buffer id's
int32_t rows_per_buffer_; // Number of rows to pack into output buffer
// A single (potentially large) buffer of tensor rows for performing shuffling.
std::unique_ptr<TensorTable> shuffle_buffer_;
int32_t shuffle_last_row_idx_; // Internal tracking of the last slot of our shuffle buffer

View File

@ -32,7 +32,6 @@ namespace dataset {
AlbumOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) {
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
builder_num_workers_ = cfg->num_parallel_workers();
builder_rows_per_buffer_ = cfg->rows_per_buffer();
builder_op_connector_size_ = cfg->op_connector_size();
}
@ -52,9 +51,8 @@ Status AlbumOp::Builder::Build(std::shared_ptr<AlbumOp> *ptr) {
MS_LOG(INFO) << "Schema file provided: " << builder_schema_file_ << ".";
builder_schema_->LoadSchemaFile(builder_schema_file_, builder_columns_to_load_);
}
*ptr = std::make_shared<AlbumOp>(builder_num_workers_, builder_rows_per_buffer_, builder_dir_,
builder_op_connector_size_, builder_decode_, builder_extensions_,
std::move(builder_schema_), std::move(builder_sampler_));
*ptr = std::make_shared<AlbumOp>(builder_num_workers_, builder_dir_, builder_op_connector_size_, builder_decode_,
builder_extensions_, std::move(builder_schema_), std::move(builder_sampler_));
return Status::OK();
}
@ -69,10 +67,10 @@ Status AlbumOp::Builder::SanityCheck() {
return err_msg.empty() ? Status::OK() : Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg);
}
AlbumOp::AlbumOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size, bool do_decode,
AlbumOp::AlbumOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, bool do_decode,
const std::set<std::string> &exts, std::unique_ptr<DataSchema> data_schema,
std::shared_ptr<SamplerRT> sampler)
: MappableLeafOp(num_wkrs, queue_size, std::move(sampler), rows_per_buffer),
: MappableLeafOp(num_wkrs, queue_size, std::move(sampler)),
folder_path_(file_dir),
decode_(do_decode),
extensions_(exts),

View File

@ -58,14 +58,6 @@ class AlbumOp : public MappableLeafOp {
/// \brief Destructor.
~Builder() = default;
/// \brief Setter method
/// \param[in] rows_per_buffer
/// \return Builder setter method returns reference to the builder
Builder &SetRowsPerBuffer(int32_t rows_per_buffer) {
builder_rows_per_buffer_ = rows_per_buffer;
return *this;
}
/// \brief Setter method
/// \param[in] size
/// \return Builder setter method returns reference to the builder
@ -154,16 +146,14 @@ class AlbumOp : public MappableLeafOp {
/// \brief Constructor
/// \param[in] num_wkrs - Num of workers reading images in parallel
/// \param[in] rows_per_buffer Number of images (rows) in each buffer
/// \param[in] file_dir - directory of Album
/// \param[in] queue_size - connector size
/// \param[in] do_decode - decode image files
/// \param[in] exts - set of file extensions to read, if empty, read everything under the dir
/// \param[in] data_schema - schema of dataset
/// \param[in] sampler - sampler tells AlbumOp what to read
AlbumOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size, bool do_decode,
const std::set<std::string> &exts, std::unique_ptr<DataSchema> data_schema,
std::shared_ptr<SamplerRT> sampler);
AlbumOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, bool do_decode, const std::set<std::string> &exts,
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler);
/// \brief Destructor.
~AlbumOp() = default;
@ -273,7 +263,6 @@ class AlbumOp : public MappableLeafOp {
/// \return Status The status code returned
Status ComputeColMap() override;
int32_t rows_per_buffer_;
std::string folder_path_; // directory of image folder
bool decode_;
std::set<std::string> extensions_; // extensions allowed

View File

@ -34,7 +34,6 @@ namespace dataset {
CelebAOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) {
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
builder_num_workers_ = cfg->num_parallel_workers();
builder_rows_per_buffer_ = cfg->rows_per_buffer();
builder_op_connector_size_ = cfg->op_connector_size();
}
@ -54,9 +53,9 @@ Status CelebAOp::Builder::Build(std::shared_ptr<CelebAOp> *op) {
// label is like this:0 1 0 0 1......
RETURN_IF_NOT_OK(
builder_schema_->AddColumn(ColDescriptor("attr", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
*op = std::make_shared<CelebAOp>(builder_num_workers_, builder_rows_per_buffer_, builder_dir_,
builder_op_connector_size_, builder_decode_, builder_usage_, builder_extensions_,
std::move(builder_schema_), std::move(builder_sampler_));
*op = std::make_shared<CelebAOp>(builder_num_workers_, builder_dir_, builder_op_connector_size_, builder_decode_,
builder_usage_, builder_extensions_, std::move(builder_schema_),
std::move(builder_sampler_));
if (*op == nullptr) {
return Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, "CelebAOp init failed.");
}
@ -76,10 +75,10 @@ Status CelebAOp::Builder::SanityCheck() {
return err_msg.empty() ? Status::OK() : Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg);
}
CelebAOp::CelebAOp(int32_t num_workers, int32_t rows_per_buffer, const std::string &dir, int32_t queue_size,
bool decode, const std::string &usage, const std::set<std::string> &exts,
std::unique_ptr<DataSchema> schema, std::shared_ptr<SamplerRT> sampler)
: MappableLeafOp(num_workers, queue_size, std::move(sampler), rows_per_buffer),
CelebAOp::CelebAOp(int32_t num_workers, const std::string &dir, int32_t queue_size, bool decode,
const std::string &usage, const std::set<std::string> &exts, std::unique_ptr<DataSchema> schema,
std::shared_ptr<SamplerRT> sampler)
: MappableLeafOp(num_workers, queue_size, std::move(sampler)),
folder_path_(dir),
decode_(decode),
extensions_(exts),

View File

@ -53,14 +53,6 @@ class CelebAOp : public MappableLeafOp {
// Destructor.
~Builder() = default;
// Setter method
// @param int32_t rows_per_buffer
// @return Builder setter method returns reference to the builder.
Builder &SetRowsPerBuffer(int32_t rows_per_buffer) {
builder_rows_per_buffer_ = rows_per_buffer;
return *this;
}
// Setter method
// @param int32_t size
// @return Builder setter method returns reference to the builder.
@ -139,13 +131,11 @@ class CelebAOp : public MappableLeafOp {
// Constructor
// @param int32_t - num_workers - Num of workers reading images in parallel
// @param int32_t - rows_per_buffer Number of images (rows) in each buffer
// @param std::string - dir directory of celeba dataset
// @param int32_t queueSize - connector queue size
// @param std::unique_ptr<Sampler> sampler - sampler tells CelebAOp what to read
CelebAOp(int32_t num_workers, int32_t rows_per_buffer, const std::string &dir, int32_t queue_size, bool decode,
const std::string &usage, const std::set<std::string> &exts, std::unique_ptr<DataSchema> schema,
std::shared_ptr<SamplerRT> sampler);
CelebAOp(int32_t num_workers, const std::string &dir, int32_t queue_size, bool decode, const std::string &usage,
const std::set<std::string> &exts, std::unique_ptr<DataSchema> schema, std::shared_ptr<SamplerRT> sampler);
~CelebAOp() override = default;

View File

@ -39,7 +39,6 @@ constexpr uint32_t kCifarImageSize = kCifarImageHeight * kCifarImageWidth * kCif
CifarOp::Builder::Builder() : sampler_(nullptr), usage_("") {
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
num_workers_ = cfg->num_parallel_workers();
rows_per_buffer_ = cfg->rows_per_buffer();
op_connect_size_ = cfg->op_connector_size();
cifar_type_ = kCifar10;
}
@ -65,8 +64,8 @@ Status CifarOp::Builder::Build(std::shared_ptr<CifarOp> *ptr) {
ColDescriptor("fine_label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &another_scalar)));
}
*ptr = std::make_shared<CifarOp>(cifar_type_, usage_, num_workers_, rows_per_buffer_, dir_, op_connect_size_,
std::move(schema_), std::move(sampler_));
*ptr = std::make_shared<CifarOp>(cifar_type_, usage_, num_workers_, dir_, op_connect_size_, std::move(schema_),
std::move(sampler_));
return Status::OK();
}
@ -85,10 +84,9 @@ Status CifarOp::Builder::SanityCheck() {
return err_msg.empty() ? Status::OK() : Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg);
}
CifarOp::CifarOp(CifarType type, const std::string &usage, int32_t num_works, int32_t rows_per_buf,
const std::string &file_dir, int32_t queue_size, std::unique_ptr<DataSchema> data_schema,
std::shared_ptr<SamplerRT> sampler)
: MappableLeafOp(num_works, queue_size, std::move(sampler), rows_per_buf),
CifarOp::CifarOp(CifarType type, const std::string &usage, int32_t num_works, const std::string &file_dir,
int32_t queue_size, std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler)
: MappableLeafOp(num_works, queue_size, std::move(sampler)),
cifar_type_(type),
usage_(usage),
folder_path_(file_dir),

View File

@ -49,14 +49,6 @@ class CifarOp : public MappableLeafOp {
// Destructor.
~Builder() = default;
// Setter method
// @param uint32_t rows_per_buffer
// @return Builder setter method returns reference to the builder.
Builder &SetRowsPerBuffer(int32_t rows_per_buffer) {
rows_per_buffer_ = rows_per_buffer;
return *this;
}
// Setter method
// @param uint32_t size
// @return Builder setter method returns reference to the builder.
@ -122,7 +114,6 @@ class CifarOp : public MappableLeafOp {
std::string dir_;
std::string usage_;
int32_t num_workers_;
int32_t rows_per_buffer_;
int32_t op_connect_size_;
std::shared_ptr<SamplerRT> sampler_;
std::unique_ptr<DataSchema> schema_;
@ -133,13 +124,11 @@ class CifarOp : public MappableLeafOp {
// @param CifarType type - Cifar10 or Cifar100
// @param const std::string &usage - Usage of this dataset, can be 'train', 'test' or 'all'
// @param uint32_t numWorks - Num of workers reading images in parallel
// @param uint32_t - rowsPerBuffer Number of images (rows) in each buffer
// @param std::string - dir directory of cifar dataset
// @param uint32_t - queueSize - connector queue size
// @param std::unique_ptr<Sampler> sampler - sampler tells ImageFolderOp what to read
CifarOp(CifarType type, const std::string &usage, int32_t num_works, int32_t rows_per_buf,
const std::string &file_dir, int32_t queue_size, std::unique_ptr<DataSchema> data_schema,
std::shared_ptr<SamplerRT> sampler);
CifarOp(CifarType type, const std::string &usage, int32_t num_works, const std::string &file_dir, int32_t queue_size,
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler);
// Destructor.
~CifarOp() = default;

View File

@ -36,7 +36,6 @@ ClueOp::Builder::Builder()
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
builder_num_workers_ = config_manager->num_parallel_workers();
builder_op_connector_size_ = config_manager->op_connector_size();
builder_rows_per_buffer_ = config_manager->rows_per_buffer();
builder_worker_connector_size_ = config_manager->worker_connector_size();
}
@ -67,9 +66,8 @@ Status ClueOp::Builder::Build(std::shared_ptr<ClueOp> *op) {
}
std::shared_ptr<ClueOp> clue_op = std::make_shared<ClueOp>(
builder_num_workers_, builder_rows_per_buffer_, builder_num_samples_, builder_worker_connector_size_, ck_map,
builder_clue_files_list_, builder_op_connector_size_, builder_shuffle_files_, builder_num_devices_,
builder_device_id_);
builder_num_workers_, builder_num_samples_, builder_worker_connector_size_, ck_map, builder_clue_files_list_,
builder_op_connector_size_, builder_shuffle_files_, builder_num_devices_, builder_device_id_);
RETURN_IF_NOT_OK(clue_op->Init());
*op = std::move(clue_op);
@ -87,11 +85,11 @@ std::vector<std::string> ClueOp::Builder::split(const std::string &s, char delim
return res;
}
ClueOp::ClueOp(int32_t num_workers, int64_t rows_per_buffer, int64_t num_samples, int32_t worker_connector_size,
ColKeyMap cols_to_keyword, std::vector<std::string> clue_files_list, int32_t op_connector_size,
bool shuffle_files, int32_t num_devices, int32_t device_id)
: NonMappableLeafOp(num_workers, worker_connector_size, rows_per_buffer, num_samples, op_connector_size,
shuffle_files, num_devices, device_id),
ClueOp::ClueOp(int32_t num_workers, int64_t num_samples, int32_t worker_connector_size, ColKeyMap cols_to_keyword,
std::vector<std::string> clue_files_list, int32_t op_connector_size, bool shuffle_files,
int32_t num_devices, int32_t device_id)
: NonMappableLeafOp(num_workers, worker_connector_size, num_samples, op_connector_size, shuffle_files, num_devices,
device_id),
clue_files_list_(std::move(clue_files_list)),
cols_to_keyword_(cols_to_keyword) {}
@ -200,8 +198,7 @@ void ClueOp::Print(std::ostream &out, bool show_all) const {
// Call the super class for displaying any common detailed info
ParallelOp::Print(out, show_all);
// Then show any custom derived-internal stuff
out << "\nRows per buffer: " << rows_per_buffer_ << "\nSample count: " << total_rows_
<< "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_
out << "\nSample count: " << total_rows_ << "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_
<< "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no") << "\nClue files list:\n";
for (int i = 0; i < clue_files_list_.size(); ++i) {
out << " " << clue_files_list_[i];

View File

@ -138,9 +138,9 @@ class ClueOp : public NonMappableLeafOp {
};
// Constructor of ClueOp
ClueOp(int32_t num_workers, int64_t rows_per_buffer, int64_t num_samples, int32_t worker_connector_size,
ColKeyMap cols_to_keyword, std::vector<std::string> clue_files_list, int32_t op_connector_size,
bool shuffle_files, int32_t num_devices, int32_t device_id);
ClueOp(int32_t num_workers, int64_t num_samples, int32_t worker_connector_size, ColKeyMap cols_to_keyword,
std::vector<std::string> clue_files_list, int32_t op_connector_size, bool shuffle_files, int32_t num_devices,
int32_t device_id);
// Default destructor
~ClueOp() = default;

View File

@ -50,7 +50,6 @@ const unsigned int kPadValueZero = 0;
CocoOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) {
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
builder_num_workers_ = cfg->num_parallel_workers();
builder_rows_per_buffer_ = cfg->rows_per_buffer();
builder_op_connector_size_ = cfg->op_connector_size();
builder_task_type_ = TaskType::Detection;
}
@ -100,8 +99,8 @@ Status CocoOp::Builder::Build(std::shared_ptr<CocoOp> *ptr) {
RETURN_STATUS_UNEXPECTED("Invalid parameter, task type should be Detection, Stuff, Keypoint or Panoptic.");
}
*ptr = std::make_shared<CocoOp>(builder_task_type_, builder_dir_, builder_file_, builder_num_workers_,
builder_rows_per_buffer_, builder_op_connector_size_, builder_decode_,
std::move(builder_schema_), std::move(builder_sampler_));
builder_op_connector_size_, builder_decode_, std::move(builder_schema_),
std::move(builder_sampler_));
return Status::OK();
}
@ -122,9 +121,9 @@ Status CocoOp::Builder::SanityCheck() {
}
CocoOp::CocoOp(const TaskType &task_type, const std::string &image_folder_path, const std::string &annotation_path,
int32_t num_workers, int32_t rows_per_buffer, int32_t queue_size, bool decode,
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler)
: MappableLeafOp(num_workers, queue_size, std::move(sampler), rows_per_buffer),
int32_t num_workers, int32_t queue_size, bool decode, std::unique_ptr<DataSchema> data_schema,
std::shared_ptr<SamplerRT> sampler)
: MappableLeafOp(num_workers, queue_size, std::move(sampler)),
decode_(decode),
task_type_(task_type),
image_folder_path_(image_folder_path),

View File

@ -109,14 +109,6 @@ class CocoOp : public MappableLeafOp {
return *this;
}
// Setter method.
// @param int32_t rows_per_buffer
// @return Builder setter method returns reference to the builder.
Builder &SetRowsPerBuffer(int32_t rows_per_buffer) {
builder_rows_per_buffer_ = rows_per_buffer;
return *this;
}
// Setter method.
// @param std::shared_ptr<Sampler> sampler
// @return Builder setter method returns reference to the builder.
@ -159,15 +151,14 @@ class CocoOp : public MappableLeafOp {
// @param std::string image_folder_path - image folder path of Coco
// @param std::string annotation_path - annotation json path of Coco
// @param int32_t num_workers - number of workers reading images in parallel
// @param int32_t rows_per_buffer - number of images (rows) in each buffer
// @param int32_t queue_size - connector queue size
// @param int64_t num_samples - number of samples to read
// @param bool decode - whether to decode images
// @param std::unique_ptr<DataSchema> data_schema - the schema of the Coco dataset
// @param std::shared_ptr<Sampler> sampler - sampler tells CocoOp what to read
CocoOp(const TaskType &task_type, const std::string &image_folder_path, const std::string &annotation_path,
int32_t num_workers, int32_t rows_per_buffer, int32_t queue_size, bool decode,
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler);
int32_t num_workers, int32_t queue_size, bool decode, std::unique_ptr<DataSchema> data_schema,
std::shared_ptr<SamplerRT> sampler);
// Destructor
~CocoOp() = default;

View File

@ -32,7 +32,6 @@ CsvOp::Builder::Builder()
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
builder_num_workers_ = config_manager->num_parallel_workers();
builder_op_connector_size_ = config_manager->op_connector_size();
builder_rows_per_buffer_ = config_manager->rows_per_buffer();
builder_worker_connector_size_ = config_manager->worker_connector_size();
}
@ -59,8 +58,8 @@ Status CsvOp::Builder::Build(std::shared_ptr<CsvOp> *op) {
std::shared_ptr<CsvOp> csv_op = std::make_shared<CsvOp>(
builder_csv_files_list_, builder_field_delim_, builder_column_default_list_, builder_column_name_list_,
builder_num_workers_, builder_rows_per_buffer_, builder_num_samples_, builder_worker_connector_size_,
builder_op_connector_size_, builder_shuffle_files_, builder_num_devices_, builder_device_id_);
builder_num_workers_, builder_num_samples_, builder_worker_connector_size_, builder_op_connector_size_,
builder_shuffle_files_, builder_num_devices_, builder_device_id_);
RETURN_IF_NOT_OK(csv_op->Init());
*op = std::move(csv_op);
@ -69,11 +68,11 @@ Status CsvOp::Builder::Build(std::shared_ptr<CsvOp> *op) {
CsvOp::CsvOp(const std::vector<std::string> &csv_files_list, char field_delim,
const std::vector<std::shared_ptr<BaseRecord>> &column_default,
const std::vector<std::string> &column_name, int32_t num_workers, int64_t rows_per_buffer,
int64_t num_samples, int32_t worker_connector_size, int32_t op_connector_size, bool shuffle_files,
int32_t num_devices, int32_t device_id)
: NonMappableLeafOp(num_workers, worker_connector_size, rows_per_buffer, num_samples, op_connector_size,
shuffle_files, num_devices, device_id),
const std::vector<std::string> &column_name, int32_t num_workers, int64_t num_samples,
int32_t worker_connector_size, int32_t op_connector_size, bool shuffle_files, int32_t num_devices,
int32_t device_id)
: NonMappableLeafOp(num_workers, worker_connector_size, num_samples, op_connector_size, shuffle_files, num_devices,
device_id),
csv_files_list_(std::move(csv_files_list)),
field_delim_(field_delim),
column_default_list_(column_default),
@ -91,11 +90,10 @@ Status CsvOp::Init() {
return Status::OK();
}
CsvOp::CsvParser::CsvParser(int32_t worker_id, JaggedConnector *connector, int64_t rows_per_buffer, char field_delim,
CsvOp::CsvParser::CsvParser(int32_t worker_id, JaggedConnector *connector, char field_delim,
std::vector<std::shared_ptr<CsvOp::BaseRecord>> column_default, std::string file_path)
: worker_id_(worker_id),
buffer_connector_(connector),
csv_rows_per_buffer_(rows_per_buffer),
csv_field_delim_(field_delim),
column_default_(column_default),
file_path_(file_path),
@ -469,8 +467,7 @@ Status CsvOp::CsvParser::InitCsvParser() {
}
Status CsvOp::LoadFile(const std::string &file, int64_t start_offset, int64_t end_offset, int32_t worker_id) {
CsvParser csv_parser(worker_id, jagged_buffer_connector_.get(), rows_per_buffer_, field_delim_, column_default_list_,
file);
CsvParser csv_parser(worker_id, jagged_buffer_connector_.get(), field_delim_, column_default_list_, file);
csv_parser.SetStartOffset(start_offset);
csv_parser.SetEndOffset(end_offset);
std::ifstream ifs;
@ -516,8 +513,7 @@ void CsvOp::Print(std::ostream &out, bool show_all) const {
// Call the super class for displaying any common detailed info
ParallelOp::Print(out, show_all);
// Then show any custom derived-internal stuff
out << "\nRows per buffer: " << rows_per_buffer_ << "\nSample count: " << total_rows_
<< "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_
out << "\nSample count: " << total_rows_ << "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_
<< "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no") << "\nCsv files list:\n";
for (int i = 0; i < csv_files_list_.size(); ++i) {
out << " " << csv_files_list_[i];
@ -592,7 +588,7 @@ Status CsvOp::CalculateNumRowsPerShard() {
}
int64_t CsvOp::CountTotalRows(const std::string &file) {
CsvParser csv_parser(0, jagged_buffer_connector_.get(), rows_per_buffer_, field_delim_, column_default_list_, file);
CsvParser csv_parser(0, jagged_buffer_connector_.get(), field_delim_, column_default_list_, file);
std::ifstream ifs;
ifs.open(file, std::ifstream::in);
if (!ifs.is_open()) {

View File

@ -65,7 +65,7 @@ class CsvOp : public NonMappableLeafOp {
public:
CsvParser() = delete;
CsvParser(int32_t worker_id, JaggedConnector *connector, int64_t rows_per_buffer, char field_delim,
CsvParser(int32_t worker_id, JaggedConnector *connector, char field_delim,
std::vector<std::shared_ptr<CsvOp::BaseRecord>> column_default, std::string file_path);
~CsvParser() = default;
@ -128,7 +128,6 @@ class CsvOp : public NonMappableLeafOp {
int32_t worker_id_;
JaggedConnector *buffer_connector_;
int64_t csv_rows_per_buffer_;
const char csv_field_delim_;
std::vector<std::shared_ptr<CsvOp::BaseRecord>> column_default_;
State cur_state_;
@ -261,8 +260,8 @@ class CsvOp : public NonMappableLeafOp {
CsvOp(const std::vector<std::string> &csv_files_list, char field_delim,
const std::vector<std::shared_ptr<BaseRecord>> &column_default, const std::vector<std::string> &column_name,
int32_t num_workers, int64_t rows_per_buffer, int64_t num_samples, int32_t worker_connector_size,
int32_t op_connector_size, bool shuffle_files, int32_t num_devices, int32_t device_id);
int32_t num_workers, int64_t num_samples, int32_t worker_connector_size, int32_t op_connector_size,
bool shuffle_files, int32_t num_devices, int32_t device_id);
// Default destructor
~CsvOp() = default;

View File

@ -28,7 +28,6 @@ namespace dataset {
ImageFolderOp::Builder::Builder() : builder_decode_(false), builder_recursive_(false), builder_sampler_(nullptr) {
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
builder_num_workers_ = cfg->num_parallel_workers();
builder_rows_per_buffer_ = cfg->rows_per_buffer();
builder_op_connector_size_ = cfg->op_connector_size();
}
@ -45,10 +44,9 @@ Status ImageFolderOp::Builder::Build(std::shared_ptr<ImageFolderOp> *ptr) {
builder_schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
RETURN_IF_NOT_OK(builder_schema_->AddColumn(
ColDescriptor("label", DataType(DataType::DE_INT32), TensorImpl::kFlexible, 0, &scalar)));
*ptr = std::make_shared<ImageFolderOp>(builder_num_workers_, builder_rows_per_buffer_, builder_dir_,
builder_op_connector_size_, builder_recursive_, builder_decode_,
builder_extensions_, builder_labels_to_read_, std::move(builder_schema_),
std::move(builder_sampler_));
*ptr = std::make_shared<ImageFolderOp>(
builder_num_workers_, builder_dir_, builder_op_connector_size_, builder_recursive_, builder_decode_,
builder_extensions_, builder_labels_to_read_, std::move(builder_schema_), std::move(builder_sampler_));
return Status::OK();
}
@ -64,11 +62,10 @@ Status ImageFolderOp::Builder::SanityCheck() {
return err_msg.empty() ? Status::OK() : Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg);
}
ImageFolderOp::ImageFolderOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size,
bool recursive, bool do_decode, const std::set<std::string> &exts,
const std::map<std::string, int32_t> &map, std::unique_ptr<DataSchema> data_schema,
std::shared_ptr<SamplerRT> sampler)
: MappableLeafOp(num_wkrs, queue_size, std::move(sampler), rows_per_buffer),
ImageFolderOp::ImageFolderOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, bool recursive, bool do_decode,
const std::set<std::string> &exts, const std::map<std::string, int32_t> &map,
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler)
: MappableLeafOp(num_wkrs, queue_size, std::move(sampler)),
folder_path_(file_dir),
recursive_(recursive),
decode_(do_decode),

View File

@ -63,14 +63,6 @@ class ImageFolderOp : public MappableLeafOp {
// Destructor.
~Builder() = default;
// Setter method
// @param int32_t rows_per_buffer
// @return Builder setter method returns reference to the builder.
Builder &SetRowsPerBuffer(int32_t rows_per_buffer) {
builder_rows_per_buffer_ = rows_per_buffer;
return *this;
}
// Setter method
// @param int32_t size
// @return Builder setter method returns reference to the builder.
@ -159,13 +151,12 @@ class ImageFolderOp : public MappableLeafOp {
// Constructor
// @param int32_t num_wkrs - Num of workers reading images in parallel
// @param int32_t - rows_per_buffer Number of images (rows) in each buffer
// @param std::string - dir directory of ImageNetFolder
// @param int32_t queue_size - connector queue size
// @param std::set<std::string> exts - set of file extensions to read, if empty, read everything under the dir
// @param td::unique_ptr<Sampler> sampler - sampler tells ImageFolderOp what to read
ImageFolderOp(int32_t num_wkrs, int32_t rows_per_buffer, std::string file_dir, int32_t queue_size, bool recursive,
bool do_decode, const std::set<std::string> &exts, const std::map<std::string, int32_t> &map,
ImageFolderOp(int32_t num_wkrs, std::string file_dir, int32_t queue_size, bool recursive, bool do_decode,
const std::set<std::string> &exts, const std::map<std::string, int32_t> &map,
std::unique_ptr<DataSchema>, std::shared_ptr<SamplerRT> sampler);
// Destructor.

View File

@ -33,7 +33,6 @@ namespace dataset {
ManifestOp::Builder::Builder() : builder_sampler_(nullptr), builder_decode_(false) {
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
builder_num_workers_ = cfg->num_parallel_workers();
builder_rows_per_buffer_ = cfg->rows_per_buffer();
builder_op_connector_size_ = cfg->op_connector_size();
}
@ -49,9 +48,9 @@ Status ManifestOp::Builder::Build(std::shared_ptr<ManifestOp> *ptr) {
builder_schema_->AddColumn(ColDescriptor("image", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
RETURN_IF_NOT_OK(
builder_schema_->AddColumn(ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
*ptr = std::make_shared<ManifestOp>(builder_num_workers_, builder_rows_per_buffer_, builder_file_,
builder_op_connector_size_, builder_decode_, builder_labels_to_read_,
std::move(builder_schema_), std::move(builder_sampler_), builder_usage_);
*ptr = std::make_shared<ManifestOp>(builder_num_workers_, builder_file_, builder_op_connector_size_, builder_decode_,
builder_labels_to_read_, std::move(builder_schema_), std::move(builder_sampler_),
builder_usage_);
return Status::OK();
}
@ -64,10 +63,10 @@ Status ManifestOp::Builder::SanityCheck() {
return err_msg.empty() ? Status::OK() : Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg);
}
ManifestOp::ManifestOp(int32_t num_works, int32_t rows_per_buffer, std::string file, int32_t queue_size, bool decode,
ManifestOp::ManifestOp(int32_t num_works, std::string file, int32_t queue_size, bool decode,
const std::map<std::string, int32_t> &class_index, std::unique_ptr<DataSchema> data_schema,
std::shared_ptr<SamplerRT> sampler, std::string usage)
: MappableLeafOp(num_works, queue_size, std::move(sampler), rows_per_buffer),
: MappableLeafOp(num_works, queue_size, std::move(sampler)),
io_block_pushed_(0),
sampler_ind_(0),
data_schema_(std::move(data_schema)),

View File

@ -46,14 +46,6 @@ class ManifestOp : public MappableLeafOp {
// Destructor
~Builder() = default;
// Setter method
// @param int32_t rows_per_buffer
// @return Builder setter method returns reference to the builder.
Builder &SetRowsPerBuffer(int32_t rows_per_buffer) {
builder_rows_per_buffer_ = rows_per_buffer;
return *this;
}
// Setter method
// @param int32_t size
// @return Builder setter method returns reference to the builder.
@ -134,11 +126,10 @@ class ManifestOp : public MappableLeafOp {
// Constructor
// @param int32_t num_works - Num of workers reading images in parallel
// @param int32_t - rows_per_buffer Number of images (rows) in each buffer
// @param std::string - file list of Manifest
// @param int32_t queue_size - connector queue size
// @param td::unique_ptr<Sampler> sampler - sampler tells ImageFolderOp what to read
ManifestOp(int32_t num_works, int32_t rows_per_buffer, std::string file, int32_t queue_size, bool decode,
ManifestOp(int32_t num_works, std::string file, int32_t queue_size, bool decode,
const std::map<std::string, int32_t> &class_index, std::unique_ptr<DataSchema> data_schema,
std::shared_ptr<SamplerRT> sampler, std::string usage);
// Destructor.

View File

@ -24,9 +24,8 @@
namespace mindspore {
namespace dataset {
MappableLeafOp::MappableLeafOp(int32_t num_wkrs, int32_t queue_size, std::shared_ptr<SamplerRT> sampler,
int32_t rows_per_buffer)
: ParallelOp(num_wkrs, queue_size, std::move(sampler)), rows_per_buffer_(rows_per_buffer) {}
MappableLeafOp::MappableLeafOp(int32_t num_wkrs, int32_t queue_size, std::shared_ptr<SamplerRT> sampler)
: ParallelOp(num_wkrs, queue_size, std::move(sampler)) {}
// Main logic, Register Queue with TaskGroup, launch all threads and do the functor's work
Status MappableLeafOp::operator()() {

View File

@ -47,16 +47,13 @@ namespace dataset {
template <typename T>
class Queue;
using ImageLabelPair = std::shared_ptr<std::pair<std::string, int32_t>>;
using FolderImagesPair = std::shared_ptr<std::pair<std::string, std::queue<ImageLabelPair>>>;
class MappableLeafOp : public ParallelOp, public RandomAccessOp {
public:
/// Constructor
/// \param int32_t num_wkrs - Num of workers reading images in parallel
/// \param int32_t queue_size - connector queue size
/// \param td::unique_ptr<Sampler> sampler - sampler tells the source what to read
MappableLeafOp(int32_t num_wkrs, int32_t queue_size, std::shared_ptr<SamplerRT> sampler, int32_t rows_per_buffer);
MappableLeafOp(int32_t num_wkrs, int32_t queue_size, std::shared_ptr<SamplerRT> sampler);
/// Destructor.
~MappableLeafOp() = default;
@ -94,10 +91,6 @@ class MappableLeafOp : public ParallelOp, public RandomAccessOp {
/// Reset function to be called after every epoch to reset the source op after
/// \return Status The status code returned
Status Reset() override;
int32_t rows_per_buffer_;
int64_t row_cnt_;
int64_t buf_cnt_;
};
} // namespace dataset
} // namespace mindspore

View File

@ -50,7 +50,6 @@ MindRecordOp::Builder::Builder() : build_dataset_file_({}) {
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
build_num_mind_record_workers_ = kDefaultMindRecordWorkers;
build_rows_per_buffer_ = cfg->rows_per_buffer();
build_op_connector_queue_size_ = cfg->op_connector_size();
builder_num_workers_ = 0;
build_load_dataset_ = false;
@ -114,7 +113,7 @@ MindRecordOp::MindRecordOp(int32_t num_mind_record_workers, std::vector<std::str
int32_t op_connector_queue_size, const std::vector<std::string> &columns_to_load,
const std::vector<std::shared_ptr<ShardOperator>> &operators, int64_t num_padded,
const mindrecord::json &sample_json, const std::map<std::string, std::string> &sample_bytes)
: MappableLeafOp(num_mind_record_workers, op_connector_queue_size, std::make_shared<SequentialSamplerRT>(0, 0), 1),
: MappableLeafOp(num_mind_record_workers, op_connector_queue_size, std::make_shared<SequentialSamplerRT>(0, 0)),
dataset_file_(dataset_file),
load_dataset_(load_dataset),
columns_to_load_(columns_to_load),

View File

@ -35,7 +35,6 @@ const int32_t kMnistImageCols = 28;
MnistOp::Builder::Builder() : builder_sampler_(nullptr), builder_usage_("") {
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
builder_num_workers_ = cfg->num_parallel_workers();
builder_rows_per_buffer_ = cfg->rows_per_buffer();
builder_op_connector_size_ = cfg->op_connector_size();
}
@ -52,8 +51,8 @@ Status MnistOp::Builder::Build(std::shared_ptr<MnistOp> *ptr) {
TensorShape scalar = TensorShape::CreateScalar();
RETURN_IF_NOT_OK(builder_schema_->AddColumn(
ColDescriptor("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &scalar)));
*ptr = std::make_shared<MnistOp>(builder_usage_, builder_num_workers_, builder_rows_per_buffer_, builder_dir_,
builder_op_connector_size_, std::move(builder_schema_), std::move(builder_sampler_));
*ptr = std::make_shared<MnistOp>(builder_usage_, builder_num_workers_, builder_dir_, builder_op_connector_size_,
std::move(builder_schema_), std::move(builder_sampler_));
return Status::OK();
}
@ -73,9 +72,9 @@ Status MnistOp::Builder::SanityCheck() {
return err_msg.empty() ? Status::OK() : Status(StatusCode::kMDUnexpectedError, __LINE__, __FILE__, err_msg);
}
MnistOp::MnistOp(const std::string &usage, int32_t num_workers, int32_t rows_per_buffer, std::string folder_path,
int32_t queue_size, std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler)
: MappableLeafOp(num_workers, queue_size, std::move(sampler), rows_per_buffer),
MnistOp::MnistOp(const std::string &usage, int32_t num_workers, std::string folder_path, int32_t queue_size,
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler)
: MappableLeafOp(num_workers, queue_size, std::move(sampler)),
usage_(usage),
folder_path_(folder_path),
image_path_({}),

View File

@ -52,14 +52,6 @@ class MnistOp : public MappableLeafOp {
// Destructor.
~Builder() = default;
// Setter method
// @param int32_t rows_per_buffer
// @return Builder setter method returns reference to the builder.
Builder &SetRowsPerBuffer(int32_t rows_per_buffer) {
builder_rows_per_buffer_ = rows_per_buffer;
return *this;
}
// Setter method
// @param int32_t op_connector_size
// @return Builder setter method returns reference to the builder.
@ -121,13 +113,12 @@ class MnistOp : public MappableLeafOp {
// Constructor
// @param const std::string &usage - Usage of this dataset, can be 'train', 'test' or 'all'
// @param int32_t num_workers - number of workers reading images in parallel
// @param int32_t rows_per_buffer - number of images (rows) in each buffer
// @param std::string folder_path - dir directory of mnist
// @param int32_t queue_size - connector queue size
// @param std::unique_ptr<DataSchema> data_schema - the schema of the mnist dataset
// @param td::unique_ptr<Sampler> sampler - sampler tells MnistOp what to read
MnistOp(const std::string &usage, int32_t num_workers, int32_t rows_per_buffer, std::string folder_path,
int32_t queue_size, std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler);
MnistOp(const std::string &usage, int32_t num_workers, std::string folder_path, int32_t queue_size,
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler);
// Destructor.
~MnistOp() = default;

View File

@ -36,13 +36,12 @@
namespace mindspore {
namespace dataset {
NonMappableLeafOp::NonMappableLeafOp(int32_t num_workers, int32_t worker_connector_size, int64_t rows_per_buffer,
int64_t total_num_rows, int32_t op_connector_size, bool shuffle_files,
int32_t num_devices, int32_t device_id)
NonMappableLeafOp::NonMappableLeafOp(int32_t num_workers, int32_t worker_connector_size, int64_t total_num_rows,
int32_t op_connector_size, bool shuffle_files, int32_t num_devices,
int32_t device_id)
: ParallelOp(num_workers, op_connector_size),
device_id_(device_id),
num_devices_(num_devices),
rows_per_buffer_(rows_per_buffer),
filename_index_(std::make_unique<StringIndex>()),
load_io_block_queue_(true),
load_jagged_connector_(true),

View File

@ -49,14 +49,13 @@ class NonMappableLeafOp : public ParallelOp {
// @note The builder class should be used to call this constructor.
// @param num_workers - number of worker threads reading data from tf_file files.
// @param worker_connector_size - size of each internal queue.
// @param rows_per_buffer - number of rows that a full buffer will contain.
// @param total_num_rows - Number of rows to read
// @param dataset_files_list - list of filepaths for the dataset files.
// @param op_connector_size - size of each queue in the connector that the child operator pulls from.
// @param columns_to_load - the names of the columns to load data from.
// @param shuffle_files - whether or not to shuffle the files before reading data.
// @param equal_rows_per_shard - whether or not to get equal rows for each process.
NonMappableLeafOp(int32_t num_workers, int32_t worker_connector_size, int64_t rows_per_buffer, int64_t total_num_rows,
NonMappableLeafOp(int32_t num_workers, int32_t worker_connector_size, int64_t total_num_rows,
int32_t op_connector_size, bool shuffle_files, int32_t num_devices, int32_t device_id);
// Default destructor
@ -77,9 +76,6 @@ class NonMappableLeafOp : public ParallelOp {
// @return Status - the error code returned.
Status Reset() override;
// Getter method
int64_t rows_per_buffer() const { return rows_per_buffer_; }
// Op name getter
// @return Name of the current Op
std::string Name() const override { return "NonMappableLeafOp"; }
@ -157,7 +153,6 @@ class NonMappableLeafOp : public ParallelOp {
bool finished_reading_dataset_;
int64_t total_rows_;
int64_t rows_per_buffer_;
WaitPost io_block_queue_wait_post_;
bool load_io_block_queue_;
std::mutex load_io_block_queue_mutex_;

View File

@ -37,7 +37,6 @@ RandomDataOp::Builder::Builder()
// Some arguments to the RandomDataOp have a default argument that is taken from the config.
// The user may override these defaults by using the builder set methods.
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
builder_rows_per_buffer_ = cfg->rows_per_buffer();
builder_num_workers_ = cfg->num_parallel_workers();
builder_op_connector_size_ = cfg->op_connector_size();
}

View File

@ -97,16 +97,6 @@ class RandomDataOp : public ParallelOp {
return *this;
}
/**
* Builder set method
* @param rows_per_buffer - The number of rows in each DataBuffer
* @return Builder - The modified builder by reference
*/
Builder &SetRowsPerBuffer(int64_t rows_per_buffer) {
builder_rows_per_buffer_ = rows_per_buffer;
return *this;
}
/**
* Builder set method
* @param total_rows - The total number of rows in the dataset

View File

@ -36,7 +36,6 @@ TextFileOp::Builder::Builder()
std::shared_ptr<ConfigManager> config_manager = GlobalContext::config_manager();
builder_num_workers_ = config_manager->num_parallel_workers();
builder_op_connector_size_ = config_manager->op_connector_size();
builder_rows_per_buffer_ = config_manager->rows_per_buffer();
builder_worker_connector_size_ = config_manager->worker_connector_size();
}
@ -65,21 +64,21 @@ Status TextFileOp::Builder::Build(std::shared_ptr<TextFileOp> *op) {
RETURN_IF_NOT_OK(
builder_schema_->AddColumn(ColDescriptor("text", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
std::shared_ptr<TextFileOp> text_file_op = std::make_shared<TextFileOp>(
builder_num_workers_, builder_rows_per_buffer_, builder_total_rows_, builder_worker_connector_size_,
std::move(builder_schema_), builder_text_files_list_, builder_op_connector_size_, builder_shuffle_files_,
builder_num_devices_, builder_device_id_);
std::shared_ptr<TextFileOp> text_file_op =
std::make_shared<TextFileOp>(builder_num_workers_, builder_total_rows_, builder_worker_connector_size_,
std::move(builder_schema_), builder_text_files_list_, builder_op_connector_size_,
builder_shuffle_files_, builder_num_devices_, builder_device_id_);
RETURN_IF_NOT_OK(text_file_op->Init());
*op = std::move(text_file_op);
return Status::OK();
}
TextFileOp::TextFileOp(int32_t num_workers, int64_t rows_per_buffer, int64_t total_rows, int32_t worker_connector_size,
TextFileOp::TextFileOp(int32_t num_workers, int64_t total_rows, int32_t worker_connector_size,
std::unique_ptr<DataSchema> schema, std::vector<std::string> text_files_list,
int32_t op_connector_size, bool shuffle_files, int32_t num_devices, int32_t device_id)
: NonMappableLeafOp(num_workers, worker_connector_size, rows_per_buffer, total_rows, op_connector_size,
shuffle_files, num_devices, device_id),
: NonMappableLeafOp(num_workers, worker_connector_size, total_rows, op_connector_size, shuffle_files, num_devices,
device_id),
text_files_list_(std::move(text_files_list)),
data_schema_(std::move(schema)) {}
@ -94,9 +93,8 @@ void TextFileOp::Print(std::ostream &out, bool show_all) const {
// Call the super class for displaying any common detailed info
ParallelOp::Print(out, show_all);
// Then show any custom derived-internal stuff
out << "\nRows per buffer: " << rows_per_buffer_ << "\nRow count: " << total_rows_ << "\nDevice id: " << device_id_
<< "\nNumber of devices: " << num_devices_ << "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no")
<< "\nText files list:\n";
out << "\nRow count: " << total_rows_ << "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_
<< "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no") << "\nText files list:\n";
for (int i = 0; i < text_files_list_.size(); ++i) {
out << " " << text_files_list_[i];
}

View File

@ -129,7 +129,6 @@ class TextFileOp : public NonMappableLeafOp {
// Constructor of TextFileOp
// @note The builder class should be used to call this constructor.
// @param num_workers - number of worker threads reading data from tf_file files.
// @param rows_per_buffer - number of rows that a full buffer will contain.
// @param total_num_rows - number of rows to read
// @param dataset_files_list - list of filepaths for the dataset files.
// @param data_schema - the data schema object.
@ -137,9 +136,9 @@ class TextFileOp : public NonMappableLeafOp {
// @param columns_to_load - the names of the columns to load data from.
// @param shuffle_files - whether or not to shuffle the files before reading data.
// @param equal_rows_per_shard - whether or not to get equal rows for each process.
TextFileOp(int32_t num_workers, int64_t rows_per_buffer, int64_t total_rows, int32_t worker_connector_size,
std::unique_ptr<DataSchema>, std::vector<std::string> text_files_list, int32_t op_connector_size,
bool shuffle_files, int32_t num_devices, int32_t device_id);
TextFileOp(int32_t num_workers, int64_t total_rows, int32_t worker_connector_size, std::unique_ptr<DataSchema>,
std::vector<std::string> text_files_list, int32_t op_connector_size, bool shuffle_files,
int32_t num_devices, int32_t device_id);
// Default destructor
~TextFileOp() = default;

View File

@ -47,7 +47,6 @@ TFReaderOp::Builder::Builder()
builder_num_workers_ = config_manager->num_parallel_workers();
builder_worker_connector_size_ = config_manager->worker_connector_size();
builder_op_connector_size_ = config_manager->op_connector_size();
builder_rows_per_buffer_ = config_manager->rows_per_buffer();
builder_shuffle_files_ = false;
builder_data_schema_ = std::make_unique<DataSchema>();
}
@ -114,22 +113,21 @@ Status TFReaderOp::Builder::Build(std::shared_ptr<TFReaderOp> *out_tf_reader_op)
}
std::shared_ptr<TFReaderOp> new_tf_reader_op = std::make_shared<TFReaderOp>(
builder_num_workers_, builder_worker_connector_size_, builder_rows_per_buffer_, builder_total_rows_,
builder_dataset_files_list_, std::move(builder_data_schema_), builder_op_connector_size_, builder_columns_to_load_,
builder_shuffle_files_, builder_num_devices_, builder_device_id_, builder_equal_rows_per_shard_);
builder_num_workers_, builder_worker_connector_size_, builder_total_rows_, builder_dataset_files_list_,
std::move(builder_data_schema_), builder_op_connector_size_, builder_columns_to_load_, builder_shuffle_files_,
builder_num_devices_, builder_device_id_, builder_equal_rows_per_shard_);
RETURN_IF_NOT_OK(new_tf_reader_op->Init());
*out_tf_reader_op = std::move(new_tf_reader_op);
return Status::OK();
}
TFReaderOp::TFReaderOp(int32_t num_workers, int32_t worker_connector_size, int64_t rows_per_buffer,
int64_t total_num_rows, std::vector<std::string> dataset_files_list,
std::unique_ptr<DataSchema> data_schema, int32_t op_connector_size,
std::vector<std::string> columns_to_load, bool shuffle_files, int32_t num_devices,
int32_t device_id, bool equal_rows_per_shard)
: NonMappableLeafOp(num_workers, worker_connector_size, rows_per_buffer, total_num_rows, op_connector_size,
shuffle_files, num_devices, device_id),
TFReaderOp::TFReaderOp(int32_t num_workers, int32_t worker_connector_size, int64_t total_num_rows,
std::vector<std::string> dataset_files_list, std::unique_ptr<DataSchema> data_schema,
int32_t op_connector_size, std::vector<std::string> columns_to_load, bool shuffle_files,
int32_t num_devices, int32_t device_id, bool equal_rows_per_shard)
: NonMappableLeafOp(num_workers, worker_connector_size, total_num_rows, op_connector_size, shuffle_files,
num_devices, device_id),
dataset_files_list_(std::move(dataset_files_list)),
columns_to_load_(std::move(columns_to_load)),
data_schema_(std::move(data_schema)),
@ -146,8 +144,8 @@ void TFReaderOp::Print(std::ostream &out, bool show_all) const {
// Call the super class for displaying any common detailed info
ParallelOp::Print(out, show_all);
// Then show any custom derived-internal stuff
out << "\nRows per buffer: " << rows_per_buffer_ << "\nTotal rows: " << total_rows_ << "\nDevice id: " << device_id_
<< "\nNumber of devices: " << num_devices_ << "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no")
out << "\nTotal rows: " << total_rows_ << "\nDevice id: " << device_id_ << "\nNumber of devices: " << num_devices_
<< "\nShuffle files: " << ((shuffle_files_) ? "yes" : "no")
<< "\nDataset files list: Size: " << dataset_files_list_.size() << "\n";
for (int i = 0; i < dataset_files_list_.size(); ++i) {
out << " " << dataset_files_list_[i];

View File

@ -173,7 +173,6 @@ class TFReaderOp : public NonMappableLeafOp {
// @note The builder class should be used to call this constructor.
// @param num_workers - number of worker threads reading data from tf_file files.
// @param worker_connector_size - size of each internal queue.
// @param rows_per_buffer - number of rows that a full buffer will contain.
// @param total_num_rows - Number of rows to read
// @param dataset_files_list - list of filepaths for the dataset files.
// @param data_schema - the data schema object.
@ -181,7 +180,7 @@ class TFReaderOp : public NonMappableLeafOp {
// @param columns_to_load - the names of the columns to load data from.
// @param shuffle_files - whether or not to shuffle the files before reading data.
// @param equal_rows_per_shard - whether or not to get equal rows for each process.
TFReaderOp(int32_t num_workers, int32_t worker_connector_size, int64_t rows_per_buffer, int64_t total_num_rows,
TFReaderOp(int32_t num_workers, int32_t worker_connector_size, int64_t total_num_rows,
std::vector<std::string> dataset_files_list, std::unique_ptr<DataSchema> data_schema,
int32_t op_connector_size, std::vector<std::string> columns_to_load, bool shuffle_files,
int32_t num_devices, int32_t device_id, bool equal_rows_per_shard);

View File

@ -47,7 +47,6 @@ const char kImageSetsExtension[] = ".txt";
VOCOp::Builder::Builder() : builder_decode_(false), builder_sampler_(nullptr) {
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
builder_num_workers_ = cfg->num_parallel_workers();
builder_rows_per_buffer_ = cfg->rows_per_buffer();
builder_op_connector_size_ = cfg->op_connector_size();
builder_task_type_ = TaskType::Segmentation;
}
@ -78,8 +77,8 @@ Status VOCOp::Builder::Build(std::shared_ptr<VOCOp> *ptr) {
ColDescriptor(std::string(kColumnTruncate), DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 1)));
}
*ptr = std::make_shared<VOCOp>(builder_task_type_, builder_usage_, builder_dir_, builder_labels_to_read_,
builder_num_workers_, builder_rows_per_buffer_, builder_op_connector_size_,
builder_decode_, std::move(builder_schema_), std::move(builder_sampler_));
builder_num_workers_, builder_op_connector_size_, builder_decode_,
std::move(builder_schema_), std::move(builder_sampler_));
return Status::OK();
}
@ -96,10 +95,9 @@ Status VOCOp::Builder::SanityCheck() {
}
VOCOp::VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path,
const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t rows_per_buffer,
int32_t queue_size, bool decode, std::unique_ptr<DataSchema> data_schema,
std::shared_ptr<SamplerRT> sampler)
: MappableLeafOp(num_workers, queue_size, std::move(sampler), rows_per_buffer),
const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t queue_size, bool decode,
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler)
: MappableLeafOp(num_workers, queue_size, std::move(sampler)),
decode_(decode),
task_type_(task_type),
usage_(task_mode),

View File

@ -112,14 +112,6 @@ class VOCOp : public MappableLeafOp {
return *this;
}
// Setter method.
// @param int32_t rows_per_buffer
// @return Builder setter method returns reference to the builder.
Builder &SetRowsPerBuffer(int32_t rows_per_buffer) {
builder_rows_per_buffer_ = rows_per_buffer;
return *this;
}
// Setter method.
// @param std::shared_ptr<Sampler> sampler
// @return Builder setter method returns reference to the builder.
@ -164,14 +156,13 @@ class VOCOp : public MappableLeafOp {
// @param std::string folder_path - dir directory of VOC
// @param std::map<std::string, int32_t> class_index - input class-to-index of annotation
// @param int32_t num_workers - number of workers reading images in parallel
// @param int32_t rows_per_buffer - number of images (rows) in each buffer
// @param int32_t queue_size - connector queue size
// @param bool decode - whether to decode images
// @param std::unique_ptr<DataSchema> data_schema - the schema of the VOC dataset
// @param std::shared_ptr<Sampler> sampler - sampler tells VOCOp what to read
VOCOp(const TaskType &task_type, const std::string &task_mode, const std::string &folder_path,
const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t rows_per_buffer,
int32_t queue_size, bool decode, std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler);
const std::map<std::string, int32_t> &class_index, int32_t num_workers, int32_t queue_size, bool decode,
std::unique_ptr<DataSchema> data_schema, std::shared_ptr<SamplerRT> sampler);
// Destructor
~VOCOp() = default;
@ -255,11 +246,9 @@ class VOCOp : public MappableLeafOp {
bool decode_;
int64_t row_cnt_;
int64_t buf_cnt_;
std::string folder_path_;
TaskType task_type_;
std::string usage_;
int32_t rows_per_buffer_;
std::unique_ptr<DataSchema> data_schema_;
std::vector<std::string> image_ids_;

View File

@ -33,7 +33,6 @@ ZipOp::Builder::Builder() {
// using the various builder set methods.
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
builder_rows_per_buffer_ = cfg->rows_per_buffer();
builder_op_connector_size_ = cfg->op_connector_size();
}
@ -41,18 +40,13 @@ Status ZipOp::Builder::SanityCheck() const { return Status::OK(); }
Status ZipOp::Builder::Build(std::shared_ptr<ZipOp> *ptr) {
RETURN_IF_NOT_OK(SanityCheck());
*ptr = std::make_shared<ZipOp>(builder_rows_per_buffer_, builder_op_connector_size_);
*ptr = std::make_shared<ZipOp>(builder_op_connector_size_);
return Status::OK();
}
// Construct ZipOp here, local variables initialized in operator due to tree construction restrictions
ZipOp::ZipOp(int32_t rows_per_buffer, int32_t op_connector_size)
: PipelineOp(op_connector_size),
children_num_(0),
rows_per_buffer_(rows_per_buffer),
buffer_id_(0),
draining_(false),
eof_(false) {}
ZipOp::ZipOp(int32_t op_connector_size)
: PipelineOp(op_connector_size), children_num_(0), draining_(false), eof_(false) {}
// destructor
ZipOp::~ZipOp() {}

View File

@ -76,9 +76,8 @@ class ZipOp : public PipelineOp {
};
// Constructor for ZipOp
// @param rows_per_buffer - number of rows in output buffer
// @param op_connector_size - connector size
ZipOp(int32_t rows_per_buffer, int32_t op_connector_size);
explicit ZipOp(int32_t op_connector_size);
// Destructor
~ZipOp();
@ -136,8 +135,6 @@ class ZipOp : public PipelineOp {
Status ComputeColMap() override;
int32_t children_num_;
int32_t rows_per_buffer_;
int32_t buffer_id_;
bool draining_;
bool eof_;
std::vector<std::unique_ptr<ChildIterator>> child_iterators_;

View File

@ -58,13 +58,13 @@ Status ComputeShuffleSize(int64_t num_files, int64_t num_devices, int64_t num_ro
// Helper function to inject a shuffle operator over top of current operator being built
Status AddShuffleOp(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows,
int32_t connector_que_size, int32_t rows_per_buffer, std::shared_ptr<DatasetOp> *shuffle_op) {
int32_t connector_que_size, std::shared_ptr<DatasetOp> *shuffle_op) {
std::shared_ptr<ShuffleOp> new_shuffle_op = nullptr;
int64_t shuffle_size = 0;
RETURN_IF_NOT_OK(ComputeShuffleSize(num_files, num_devices, num_rows, total_rows, &shuffle_size));
MS_LOG(INFO) << "Dataset::AddShuffleOp - num_rows: " << num_rows << ", shuffle_size: " << shuffle_size;
// Add the shuffle op
*shuffle_op = std::make_shared<ShuffleOp>(shuffle_size, GetSeed(), connector_que_size, true, rows_per_buffer);
*shuffle_op = std::make_shared<ShuffleOp>(shuffle_size, GetSeed(), connector_que_size, true);
return Status::OK();
}
@ -231,7 +231,6 @@ DatasetNode::DatasetNode()
// Fetch some default value from config manager
std::shared_ptr<ConfigManager> cfg = GlobalContext::config_manager();
num_workers_ = cfg->num_parallel_workers();
rows_per_buffer_ = cfg->rows_per_buffer();
connector_que_size_ = cfg->op_connector_size();
worker_connector_size_ = cfg->worker_connector_size();
}

View File

@ -92,7 +92,7 @@ constexpr char kTFRecordNode[] = "TFRecordDataset";
constexpr char kVOCNode[] = "VOCDataset";
Status AddShuffleOp(int64_t num_files, int64_t num_devices, int64_t num_rows, int64_t total_rows,
int32_t connector_que_size, int32_t rows_per_buffer, std::shared_ptr<DatasetOp> *shuffle_op);
int32_t connector_que_size, std::shared_ptr<DatasetOp> *shuffle_op);
// Helper function to validate dataset files parameter
Status ValidateDatasetFilesParam(const std::string &dataset_name, const std::vector<std::string> &dataset_files);
@ -323,7 +323,6 @@ class DatasetNode : public std::enable_shared_from_this<DatasetNode> {
std::shared_ptr<DatasetCache> cache_;
int64_t dataset_size_;
int32_t num_workers_;
int32_t rows_per_buffer_;
int32_t connector_que_size_;
int32_t worker_connector_size_;
int32_t total_repeats_; // Number of times required to run this operator

View File

@ -44,8 +44,7 @@ void ShuffleNode::Print(std::ostream &out) const {
// Function to build the ShuffleOp
Status ShuffleNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
auto op = std::make_shared<ShuffleOp>(shuffle_size_, shuffle_seed_, connector_que_size_, reset_every_epoch_,
rows_per_buffer_);
auto op = std::make_shared<ShuffleOp>(shuffle_size_, shuffle_seed_, connector_que_size_, reset_every_epoch_);
op->set_total_repeats(GetTotalRepeats());
op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
node_ops->push_back(op);

View File

@ -74,8 +74,8 @@ Status AlbumNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops)
std::shared_ptr<SamplerRT> sampler_rt = nullptr;
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
auto album_op = std::make_shared<AlbumOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_, decode_,
extensions, std::move(schema), std::move(sampler_rt));
auto album_op = std::make_shared<AlbumOp>(num_workers_, dataset_dir_, connector_que_size_, decode_, extensions,
std::move(schema), std::move(sampler_rt));
album_op->set_total_repeats(GetTotalRepeats());
album_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
node_ops->push_back(album_op);

View File

@ -69,8 +69,8 @@ Status CelebANode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops
std::shared_ptr<SamplerRT> sampler_rt = nullptr;
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
auto celeba_op = std::make_shared<CelebAOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_,
decode_, usage_, extensions_, std::move(schema), std::move(sampler_rt));
auto celeba_op = std::make_shared<CelebAOp>(num_workers_, dataset_dir_, connector_que_size_, decode_, usage_,
extensions_, std::move(schema), std::move(sampler_rt));
celeba_op->set_total_repeats(GetTotalRepeats());
celeba_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
node_ops->push_back(celeba_op);

View File

@ -66,9 +66,8 @@ Status Cifar100Node::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_o
std::shared_ptr<SamplerRT> sampler_rt = nullptr;
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
auto cifar_op =
std::make_shared<CifarOp>(CifarOp::CifarType::kCifar100, usage_, num_workers_, rows_per_buffer_, dataset_dir_,
connector_que_size_, std::move(schema), std::move(sampler_rt));
auto cifar_op = std::make_shared<CifarOp>(CifarOp::CifarType::kCifar100, usage_, num_workers_, dataset_dir_,
connector_que_size_, std::move(schema), std::move(sampler_rt));
cifar_op->set_total_repeats(GetTotalRepeats());
cifar_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
node_ops->push_back(cifar_op);

View File

@ -64,9 +64,8 @@ Status Cifar10Node::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_op
std::shared_ptr<SamplerRT> sampler_rt = nullptr;
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
auto cifar_op =
std::make_shared<CifarOp>(CifarOp::CifarType::kCifar10, usage_, num_workers_, rows_per_buffer_, dataset_dir_,
connector_que_size_, std::move(schema), std::move(sampler_rt));
auto cifar_op = std::make_shared<CifarOp>(CifarOp::CifarType::kCifar10, usage_, num_workers_, dataset_dir_,
connector_que_size_, std::move(schema), std::move(sampler_rt));
cifar_op->set_total_repeats(GetTotalRepeats());
cifar_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
node_ops->push_back(cifar_op);

View File

@ -177,8 +177,8 @@ Status CLUENode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops)
std::sort(sorted_dataset_files.begin(), sorted_dataset_files.end());
std::shared_ptr<ClueOp> clue_op =
std::make_shared<ClueOp>(num_workers_, rows_per_buffer_, num_samples_, worker_connector_size_, ck_map,
sorted_dataset_files, connector_que_size_, shuffle_files, num_shards_, shard_id_);
std::make_shared<ClueOp>(num_workers_, num_samples_, worker_connector_size_, ck_map, sorted_dataset_files,
connector_que_size_, shuffle_files, num_shards_, shard_id_);
RETURN_IF_NOT_OK(clue_op->Init());
@ -191,8 +191,8 @@ Status CLUENode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops)
RETURN_IF_NOT_OK(ClueOp::CountAllFileRows(sorted_dataset_files, &num_rows));
// Add the shuffle op after this op
RETURN_IF_NOT_OK(AddShuffleOp(sorted_dataset_files.size(), num_shards_, num_rows, 0, connector_que_size_,
rows_per_buffer_, &shuffle_op));
RETURN_IF_NOT_OK(
AddShuffleOp(sorted_dataset_files.size(), num_shards_, num_rows, 0, connector_que_size_, &shuffle_op));
shuffle_op->set_total_repeats(GetTotalRepeats());
shuffle_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
node_ops->push_back(shuffle_op);

View File

@ -123,8 +123,8 @@ Status CocoNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops)
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
std::shared_ptr<CocoOp> op =
std::make_shared<CocoOp>(task_type, dataset_dir_, annotation_file_, num_workers_, rows_per_buffer_,
connector_que_size_, decode_, std::move(schema), std::move(sampler_rt));
std::make_shared<CocoOp>(task_type, dataset_dir_, annotation_file_, num_workers_, connector_que_size_, decode_,
std::move(schema), std::move(sampler_rt));
op->set_total_repeats(GetTotalRepeats());
op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
node_ops->push_back(op);

View File

@ -114,8 +114,8 @@ Status CSVNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
}
std::shared_ptr<CsvOp> csv_op = std::make_shared<CsvOp>(
sorted_dataset_files, field_delim_, column_default_list, column_names_, num_workers_, rows_per_buffer_,
num_samples_, worker_connector_size_, connector_que_size_, shuffle_files, num_shards_, shard_id_);
sorted_dataset_files, field_delim_, column_default_list, column_names_, num_workers_, num_samples_,
worker_connector_size_, connector_que_size_, shuffle_files, num_shards_, shard_id_);
RETURN_IF_NOT_OK(csv_op->Init());
@ -128,8 +128,8 @@ Status CSVNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
RETURN_IF_NOT_OK(CsvOp::CountAllFileRows(sorted_dataset_files, column_names_.empty(), &num_rows));
// Add the shuffle op after this op
RETURN_IF_NOT_OK(AddShuffleOp(sorted_dataset_files.size(), num_shards_, num_rows, 0, connector_que_size_,
rows_per_buffer_, &shuffle_op));
RETURN_IF_NOT_OK(
AddShuffleOp(sorted_dataset_files.size(), num_shards_, num_rows, 0, connector_que_size_, &shuffle_op));
shuffle_op->set_total_repeats(GetTotalRepeats());
shuffle_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
node_ops->push_back(shuffle_op);

View File

@ -72,9 +72,8 @@ Status ImageFolderNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const nod
std::shared_ptr<SamplerRT> sampler_rt = nullptr;
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
auto op =
std::make_shared<ImageFolderOp>(num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_, recursive_,
decode_, exts_, class_indexing_, std::move(schema), std::move(sampler_rt));
auto op = std::make_shared<ImageFolderOp>(num_workers_, dataset_dir_, connector_que_size_, recursive_, decode_, exts_,
class_indexing_, std::move(schema), std::move(sampler_rt));
op->set_total_repeats(GetTotalRepeats());
op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
node_ops->push_back(op);

View File

@ -94,8 +94,8 @@ Status ManifestNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_o
std::shared_ptr<SamplerRT> sampler_rt = nullptr;
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
manifest_op = std::make_shared<ManifestOp>(num_workers_, rows_per_buffer_, dataset_file_, connector_que_size_,
decode_, class_index_, std::move(schema), std::move(sampler_rt), usage_);
manifest_op = std::make_shared<ManifestOp>(num_workers_, dataset_file_, connector_que_size_, decode_, class_index_,
std::move(schema), std::move(sampler_rt), usage_);
manifest_op->set_total_repeats(GetTotalRepeats());
manifest_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
node_ops->push_back(manifest_op);

View File

@ -60,8 +60,8 @@ Status MnistNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops)
std::shared_ptr<SamplerRT> sampler_rt = nullptr;
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
auto op = std::make_shared<MnistOp>(usage_, num_workers_, rows_per_buffer_, dataset_dir_, connector_que_size_,
std::move(schema), std::move(sampler_rt));
auto op = std::make_shared<MnistOp>(usage_, num_workers_, dataset_dir_, connector_que_size_, std::move(schema),
std::move(sampler_rt));
op->set_total_repeats(GetTotalRepeats());
op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
node_ops->push_back(op);

View File

@ -82,9 +82,9 @@ Status TextFileNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_o
RETURN_IF_NOT_OK(schema->AddColumn(ColDescriptor("text", DataType(DataType::DE_UINT8), TensorImpl::kFlexible, 1)));
// Create and initialize TextFileOp
std::shared_ptr<TextFileOp> text_file_op = std::make_shared<TextFileOp>(
num_workers_, rows_per_buffer_, num_samples_, worker_connector_size_, std::move(schema), sorted_dataset_files,
connector_que_size_, shuffle_files, num_shards_, shard_id_);
std::shared_ptr<TextFileOp> text_file_op =
std::make_shared<TextFileOp>(num_workers_, num_samples_, worker_connector_size_, std::move(schema),
sorted_dataset_files, connector_que_size_, shuffle_files, num_shards_, shard_id_);
RETURN_IF_NOT_OK(text_file_op->Init());
if (cache_ == nullptr && shuffle_ == ShuffleMode::kGlobal && !IsDescendantOfCache()) {
@ -96,8 +96,8 @@ Status TextFileNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_o
RETURN_IF_NOT_OK(TextFileOp::CountAllFileRows(sorted_dataset_files, &num_rows));
// Add the shuffle op after this op
RETURN_IF_NOT_OK(AddShuffleOp(sorted_dataset_files.size(), num_shards_, num_rows, 0, connector_que_size_,
rows_per_buffer_, &shuffle_op));
RETURN_IF_NOT_OK(
AddShuffleOp(sorted_dataset_files.size(), num_shards_, num_rows, 0, connector_que_size_, &shuffle_op));
shuffle_op->set_total_repeats(GetTotalRepeats());
shuffle_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
node_ops->push_back(shuffle_op);

View File

@ -124,8 +124,8 @@ Status TFRecordNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_o
// Create and initialize TFReaderOp
std::shared_ptr<TFReaderOp> tf_reader_op = std::make_shared<TFReaderOp>(
num_workers_, worker_connector_size_, rows_per_buffer_, num_samples_, sorted_dir_files, std::move(data_schema),
connector_que_size_, columns_list_, shuffle_files, num_shards_, shard_id_, shard_equal_rows_);
num_workers_, worker_connector_size_, num_samples_, sorted_dir_files, std::move(data_schema), connector_que_size_,
columns_list_, shuffle_files, num_shards_, shard_id_, shard_equal_rows_);
RETURN_IF_NOT_OK(tf_reader_op->Init());
@ -139,8 +139,7 @@ Status TFRecordNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_o
RETURN_IF_NOT_OK(TFReaderOp::CountTotalRows(&num_rows, sorted_dir_files));
// Add the shuffle op after this op
RETURN_IF_NOT_OK(AddShuffleOp(sorted_dir_files.size(), num_shards_, num_rows, 0, connector_que_size_,
rows_per_buffer_, &shuffle_op));
RETURN_IF_NOT_OK(AddShuffleOp(sorted_dir_files.size(), num_shards_, num_rows, 0, connector_que_size_, &shuffle_op));
shuffle_op->set_total_repeats(GetTotalRepeats());
shuffle_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
node_ops->push_back(shuffle_op);

View File

@ -112,8 +112,8 @@ Status VOCNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
RETURN_IF_NOT_OK(sampler_->SamplerBuild(&sampler_rt));
std::shared_ptr<VOCOp> voc_op;
voc_op = std::make_shared<VOCOp>(task_type_, usage_, dataset_dir_, class_index_, num_workers_, rows_per_buffer_,
connector_que_size_, decode_, std::move(schema), std::move(sampler_rt));
voc_op = std::make_shared<VOCOp>(task_type_, usage_, dataset_dir_, class_index_, num_workers_, connector_que_size_,
decode_, std::move(schema), std::move(sampler_rt));
voc_op->set_total_repeats(GetTotalRepeats());
voc_op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
node_ops->push_back(voc_op);

View File

@ -43,11 +43,9 @@ void SyncWaitNode::Print(std::ostream &out) const {
// Function to build the BarrierOp
Status SyncWaitNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
// Right now barrier should only take num_rows_per_buffer = 1
// The reason for this is because having it otherwise can lead to blocking issues
// See barrier_op.h for more details
const int32_t rows_per_buffer = 1;
auto op = std::make_shared<BarrierOp>(rows_per_buffer, connector_que_size_, condition_name_, callback_);
auto op = std::make_shared<BarrierOp>(connector_que_size_, condition_name_, callback_);
op->set_total_repeats(GetTotalRepeats());
op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
node_ops->push_back(op);

View File

@ -58,7 +58,7 @@ Status ZipNode::ValidateParams() {
}
Status ZipNode::Build(std::vector<std::shared_ptr<DatasetOp>> *const node_ops) {
auto op = std::make_shared<ZipOp>(rows_per_buffer_, connector_que_size_);
auto op = std::make_shared<ZipOp>(connector_que_size_);
op->set_total_repeats(GetTotalRepeats());
op->set_num_repeats_per_epoch(GetNumRepeatsPerEpoch());
node_ops->push_back(op);

View File

@ -31,7 +31,7 @@ using mindspore::MsLogLevel::ERROR;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::LogStream;
std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false, int rows_per_buf = 2);
std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false);
std::shared_ptr<RepeatOp> Repeat(int repeat_cnt);
@ -42,10 +42,10 @@ std::shared_ptr<AlbumOp> Album(int64_t num_works, int64_t rows, int64_t conns, s
std::shared_ptr<AlbumOp> so;
AlbumOp::Builder builder;
Status rc = builder.SetNumWorkers(num_works)
.SetAlbumDir(path)
.SetRowsPerBuffer(rows)
.SetOpConnectorSize(conns)
.SetExtensions({".json"})
.SetAlbumDir(path)
.SetOpConnectorSize(conns)
.SetExtensions({".json"})
.SetSampler(std::move(sampler))
.SetDecode(decode)
.Build(&so);
@ -59,12 +59,12 @@ std::shared_ptr<AlbumOp> AlbumSchema(int64_t num_works, int64_t rows, int64_t co
std::shared_ptr<AlbumOp> so;
AlbumOp::Builder builder;
Status rc = builder.SetNumWorkers(num_works)
.SetSchemaFile(schema_file)
.SetColumnsToLoad(column_names)
.SetAlbumDir(path)
.SetRowsPerBuffer(rows)
.SetOpConnectorSize(conns)
.SetExtensions({".json"})
.SetSchemaFile(schema_file)
.SetColumnsToLoad(column_names)
.SetAlbumDir(path)
.SetOpConnectorSize(conns)
.SetExtensions({".json"})
.SetSampler(std::move(sampler))
.SetDecode(decode)
.Build(&so);
@ -180,8 +180,8 @@ TEST_F(MindDataTestAlbum, TestSequentialAlbumWithFullSchema) {
EXPECT_OK(tensor_map["_priority"]->GetItemAt<double>(&priority, {}));
EXPECT_OK(tensor_map["id"]->GetItemAt<int64_t>(&id, {}));
MS_LOG(DEBUG) << "row: " << i << "\t" << tensor_map["image"]->shape() << "label:" << label << "label shape"
<< tensor_map["label"] << "priority: " << priority << " embedding : "
<< tensor_map["_embedding"]->shape() << " id: " << id << "\n";
<< tensor_map["label"] << "priority: " << priority
<< " embedding : " << tensor_map["_embedding"]->shape() << " id: " << id << "\n";
i++;
di.GetNextAsMap(&tensor_map);
}

View File

@ -34,7 +34,7 @@ class MindDataTestBatchOp : public UT::DatasetOpTesting {
protected:
};
std::shared_ptr<de::BatchOp> Batch(int32_t batch_size = 1, bool drop = false, int rows_per_buf = 2) {
std::shared_ptr<de::BatchOp> Batch(int32_t batch_size = 1, bool drop = false) {
Status rc;
std::shared_ptr<de::BatchOp> op;
rc = de::BatchOp::Builder(batch_size).SetDrop(drop).Build(&op);
@ -50,10 +50,10 @@ std::shared_ptr<de::RepeatOp> Repeat(int repeat_cnt = 1) {
return op;
}
std::shared_ptr<de::TFReaderOp> TFReader(std::string schema, int rows_per_buf = 2, int num_works = 8) {
std::shared_ptr<de::TFReaderOp> TFReader(std::string schema, int num_works = 8) {
std::shared_ptr<de::TFReaderOp> so;
de::TFReaderOp::Builder builder;
builder.SetDatasetFilesList({schema}).SetRowsPerBuffer(rows_per_buf).SetNumWorkers(num_works);
builder.SetDatasetFilesList({schema}).SetNumWorkers(num_works);
Status rc = builder.Build(&so);
return so;
}
@ -111,7 +111,7 @@ TEST_F(MindDataTestBatchOp, TestRepeatBatchDropTrue) {
bool success = false;
auto op1 = TFReader(schema_file);
auto op2 = Repeat(2);
auto op3 = Batch(7, true, 99);
auto op3 = Batch(7, true);
op1->set_total_repeats(2);
op1->set_num_repeats_per_epoch(2);
auto tree = Build({op1, op2, op3});
@ -161,7 +161,7 @@ TEST_F(MindDataTestBatchOp, TestRepeatBatchDropFalse) {
bool success = false;
auto op1 = TFReader(schema_file);
auto op2 = Repeat(2);
auto op3 = Batch(7, false, 99);
auto op3 = Batch(7, false);
op1->set_total_repeats(2);
op1->set_num_repeats_per_epoch(2);
auto tree = Build({op1, op2, op3});
@ -217,7 +217,7 @@ TEST_F(MindDataTestBatchOp, TestBatchDropFalseRepeat) {
std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data";
bool success = false;
auto op1 = TFReader(schema_file);
auto op2 = Batch(7, false, 99);
auto op2 = Batch(7, false);
auto op3 = Repeat(2);
op1->set_total_repeats(2);
op1->set_num_repeats_per_epoch(2);
@ -270,7 +270,7 @@ TEST_F(MindDataTestBatchOp, TestBatchDropTrueRepeat) {
std::string schema_file = datasets_root_path_ + "/testBatchDataset/test.data";
bool success = false;
auto op1 = TFReader(schema_file);
auto op2 = Batch(5, true, 99);
auto op2 = Batch(5, true);
auto op3 = Repeat(2);
op1->set_total_repeats(2);
op1->set_num_repeats_per_epoch(2);

View File

@ -253,7 +253,7 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestRandomDataCache1) {
// RandomDataOp
std::shared_ptr<RandomDataOp> myRandomDataOp;
rc = RandomDataOp::Builder()
.SetRowsPerBuffer(4)
.SetNumWorkers(4)
.SetDataSchema(std::move(testSchema))
.SetTotalRows(50) // 50 samples for now
@ -277,7 +277,7 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestRandomDataCache1) {
rc = CacheOp::Builder()
.SetNumWorkers(5)
.SetClient(myClient)
.SetRowsPerBuffer(1)
.SetSampler(std::move(seq_sampler))
.Build(&myCacheOp);
ASSERT_TRUE(rc.IsOk());
@ -379,7 +379,7 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestRandomDataCacheSpill) {
// RandomDataOp
std::shared_ptr<RandomDataOp> myRandomDataOp;
rc = RandomDataOp::Builder()
.SetRowsPerBuffer(2)
.SetNumWorkers(4)
.SetDataSchema(std::move(testSchema))
.SetTotalRows(10)
@ -401,7 +401,6 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestRandomDataCacheSpill) {
rc = CacheOp::Builder()
.SetNumWorkers(4)
.SetClient(myClient)
.SetRowsPerBuffer(3)
.SetSampler(std::move(seq_sampler))
.Build(&myCacheOp);
ASSERT_TRUE(rc.IsOk());
@ -484,7 +483,7 @@ TEST_F(MindDataTestCacheOp, DISABLED_TestImageFolderCacheMerge) {
ImageFolderOp::Builder builder;
builder.SetOpConnectorSize(3)
.SetNumWorkers(3)
.SetRowsPerBuffer(2)
.SetExtensions({".jpg", ".JPEG"})
.SetRecursive(true)
.SetImageFolderDir(datasets_root_path_ + "/testPK/data");

View File

@ -26,41 +26,45 @@
#include "securec.h"
using namespace mindspore::dataset;
using mindspore::MsLogLevel::ERROR;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::LogStream;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::MsLogLevel::ERROR;
std::shared_ptr<RepeatOp> Repeat(int repeat_cnt);
std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops);
std::shared_ptr<CelebAOp> Celeba(int32_t num_workers, int32_t rows_per_buffer, int32_t queue_size,
const std::string &dir, std::shared_ptr<SamplerRT> sampler = nullptr,
bool decode = false, const std::string &dataset_type = "all") {
std::shared_ptr<CelebAOp> Celeba(int32_t num_workers, int32_t queue_size, const std::string &dir,
std::shared_ptr<SamplerRT> sampler = nullptr, bool decode = false,
const std::string &dataset_type = "all") {
std::shared_ptr<CelebAOp> so;
CelebAOp::Builder builder;
Status rc = builder.SetNumWorkers(num_workers)
.SetCelebADir(dir)
.SetRowsPerBuffer(rows_per_buffer)
.SetOpConnectorSize(queue_size)
.SetSampler(std::move(sampler))
.SetDecode(decode)
.SetUsage(dataset_type).Build(&so);
.SetUsage(dataset_type)
.Build(&so);
return so;
}
class MindDataTestCelebaDataset : public UT::DatasetOpTesting {
protected:
protected:
};
TEST_F(MindDataTestCelebaDataset, TestSequentialCeleba) {
std::string dir = datasets_root_path_ + "/testCelebAData/";
uint32_t expect_labels[4][40] = {{0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1},
{0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1},
{0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1},
{0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1}};
uint32_t expect_labels[4][40] = {{0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1},
{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1},
{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1},
{0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1}};
uint32_t count = 0;
auto tree = Build({Celeba(16, 2, 32, dir)});
auto tree = Build({Celeba(16, 2, dir)});
tree->Prepare();
Status rc = tree->Launch();
if (rc.IsError()) {
@ -86,16 +90,24 @@ TEST_F(MindDataTestCelebaDataset, TestSequentialCeleba) {
TEST_F(MindDataTestCelebaDataset, TestCelebaRepeat) {
std::string dir = datasets_root_path_ + "/testCelebAData/";
uint32_t expect_labels[8][40] = {{0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1},
{0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1},
{0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1},
{0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1},
{0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1},
{0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1},
{0,0,0,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1},
{0,1,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,1,0,0,1,0,0,0,1,1,0,1,0,1,0,0,1}};
uint32_t expect_labels[8][40] = {{0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1},
{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1},
{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1},
{0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1},
{0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1},
{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1},
{0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1},
{0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1,
0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1}};
uint32_t count = 0;
auto op1 = Celeba(16, 2, 32, dir);
auto op1 = Celeba(16, 2, dir);
auto op2 = Repeat(2);
auto tree = Build({op1, op2});
op1->set_total_repeats(2);
@ -131,7 +143,7 @@ TEST_F(MindDataTestCelebaDataset, TestSubsetRandomSamplerCeleba) {
0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1}};
std::string dir = datasets_root_path_ + "/testCelebAData/";
uint32_t count = 0;
auto tree = Build({Celeba(16, 2, 32, dir, std::move(sampler))});
auto tree = Build({Celeba(16, 2, dir, std::move(sampler))});
tree->Prepare();
Status rc = tree->Launch();
if (rc.IsError()) {

View File

@ -47,7 +47,7 @@ std::shared_ptr<CifarOp> Cifarop(uint64_t num_works, uint64_t rows, uint64_t con
CifarOp::Builder builder;
Status rc = builder.SetNumWorkers(num_works)
.SetCifarDir(path)
.SetRowsPerBuffer(rows)
.SetOpConnectorSize(conns)
.SetSampler(std::move(sampler))
.SetCifarType(cifar10)

View File

@ -45,20 +45,17 @@ TEST_F(MindDataTestClientConfig, TestClientConfig1) {
std::shared_ptr<ConfigManager> my_conf = GlobalContext::config_manager();
ASSERT_EQ(my_conf->num_parallel_workers(), kCfgParallelWorkers);
ASSERT_EQ(my_conf->rows_per_buffer(), kCfgRowsPerBuffer);
ASSERT_EQ(my_conf->worker_connector_size(), kCfgWorkerConnectorSize);
ASSERT_EQ(my_conf->op_connector_size(), kCfgOpConnectorSize);
ASSERT_EQ(my_conf->seed(), kCfgDefaultSeed);
my_conf->set_num_parallel_workers(2);
my_conf->set_rows_per_buffer(1);
my_conf->set_worker_connector_size(3);
my_conf->set_op_connector_size(4);
my_conf->set_seed(5);
ASSERT_EQ(my_conf->num_parallel_workers(), 2);
ASSERT_EQ(my_conf->rows_per_buffer(), 1);
ASSERT_EQ(my_conf->worker_connector_size(), 3);
ASSERT_EQ(my_conf->op_connector_size(), 4);
ASSERT_EQ(my_conf->seed(), 5);
@ -67,7 +64,6 @@ TEST_F(MindDataTestClientConfig, TestClientConfig1) {
ASSERT_TRUE(my_conf->LoadFile(file));
ASSERT_EQ(my_conf->num_parallel_workers(), kCfgParallelWorkers);
ASSERT_EQ(my_conf->rows_per_buffer(), kCfgRowsPerBuffer);
ASSERT_EQ(my_conf->worker_connector_size(), kCfgWorkerConnectorSize);
ASSERT_EQ(my_conf->op_connector_size(), kCfgOpConnectorSize);
ASSERT_EQ(my_conf->seed(), kCfgDefaultSeed);

View File

@ -50,7 +50,7 @@ TEST_F(MindDataTestCLUEOp, TestCLUEBasic) {
std::shared_ptr<ClueOp> op;
ClueOp::Builder builder;
builder.SetClueFilesList({dataset_path})
.SetRowsPerBuffer(16)
.SetOpConnectorSize(2)
.SetColsKeyMap(key_map);

View File

@ -43,7 +43,7 @@ using mindspore::MsLogLevel::ERROR;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::LogStream;
std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false, int rows_per_buf = 2);
std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false);
std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops);

View File

@ -51,7 +51,7 @@ TEST_F(MindDataTestConcatOp, TestConcatProject) {
// TFReaderOp1
std::shared_ptr<TFReaderOp> my_tfreader_op1;
TFReaderOp::Builder builder1;
builder1.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16).SetWorkerConnectorSize(16);
builder1.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(16);
std::unique_ptr<DataSchema> schema1 = std::make_unique<DataSchema>();
schema1->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema1Row.json", {});
builder1.SetDataSchema(std::move(schema1));
@ -63,7 +63,7 @@ TEST_F(MindDataTestConcatOp, TestConcatProject) {
// TFReaderOp2
std::shared_ptr<TFReaderOp> my_tfreader_op2;
TFReaderOp::Builder builder2;
builder2.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16).SetWorkerConnectorSize(16);
builder2.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(16);
std::unique_ptr<DataSchema> schema2 = std::make_unique<DataSchema>();
schema2->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema1Row.json", {});
builder2.SetDataSchema(std::move(schema2));

View File

@ -52,7 +52,7 @@ TEST_F(MindDataTestCSVOp, TestCSVBasic) {
std::shared_ptr<CsvOp> op;
CsvOp::Builder builder;
builder.SetCsvFilesList({dataset_path})
.SetRowsPerBuffer(16)
.SetShuffleFiles(false)
.SetOpConnectorSize(2)
.SetFieldDelim(',')

View File

@ -44,19 +44,15 @@ TEST_F(MindDataTestExecutionTree, TestExecutionTree1) {
uint32_t shuffle_size = 32;
uint32_t connector_size = 8;
std::shared_ptr<ShuffleOp> leaf_op1 =
std::make_shared<ShuffleOp>(shuffle_size, 0, connector_size, false, 32);
std::shared_ptr<ShuffleOp> leaf_op1 = std::make_shared<ShuffleOp>(shuffle_size, 0, connector_size, false);
ASSERT_NE(leaf_op1, nullptr);
my_tree->AssociateNode(leaf_op1);
shuffle_size = 16;
std::shared_ptr<ShuffleOp> leaf_op2 =
std::make_shared<ShuffleOp>(shuffle_size, 0, connector_size, false, 32);
std::shared_ptr<ShuffleOp> leaf_op2 = std::make_shared<ShuffleOp>(shuffle_size, 0, connector_size, false);
ASSERT_NE(leaf_op2, nullptr);
my_tree->AssociateNode(leaf_op2);
shuffle_size = 8;
std::shared_ptr<ShuffleOp> parent_op =
std::make_shared<ShuffleOp>(shuffle_size, 0, connector_size, false, 32);
std::shared_ptr<ShuffleOp> parent_op = std::make_shared<ShuffleOp>(shuffle_size, 0, connector_size, false);
ASSERT_NE(parent_op, nullptr);
my_tree->AssociateNode(parent_op);
@ -68,8 +64,7 @@ TEST_F(MindDataTestExecutionTree, TestExecutionTree1) {
parent_op->AddChild(std::move(leaf_op1));
parent_op->AddChild(std::move(leaf_op2));
shuffle_size = 4;
std::shared_ptr<DatasetOp> root_op =
std::make_shared<ShuffleOp>(shuffle_size, 0, connector_size, false, 32);
std::shared_ptr<DatasetOp> root_op = std::make_shared<ShuffleOp>(shuffle_size, 0, connector_size, false);
my_tree->AssignRoot(root_op);
root_op->AddChild(parent_op);
ASSERT_NE(root_op, nullptr);
@ -105,10 +100,10 @@ TEST_F(MindDataTestExecutionTree, TestExecutionTree2) {
std::string dataset_path = datasets_root_path_ + "/testDataset1/testDataset1.data";
std::shared_ptr<TFReaderOp> my_tfreader_op;
TFReaderOp::Builder()
.SetDatasetFilesList({dataset_path})
.SetRowsPerBuffer(2)
.SetWorkerConnectorSize(2)
.SetNumWorkers(2)
.SetDatasetFilesList({dataset_path})
.SetWorkerConnectorSize(2)
.SetNumWorkers(2)
.Build(&my_tfreader_op);
my_tree->AssociateNode(my_tfreader_op);

View File

@ -40,7 +40,7 @@ using mindspore::LogStream;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::MsLogLevel::ERROR;
std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false, int rows_per_buf = 2);
std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false);
std::shared_ptr<RepeatOp> Repeat(int repeat_cnt);
@ -53,7 +53,7 @@ std::shared_ptr<ImageFolderOp> ImageFolder(int64_t num_works, int64_t rows, int6
ImageFolderOp::Builder builder;
Status rc = builder.SetNumWorkers(num_works)
.SetImageFolderDir(path)
.SetRowsPerBuffer(rows)
.SetOpConnectorSize(conns)
.SetExtensions({".jpg", ".JPEG"})
.SetSampler(std::move(sampler))

View File

@ -156,7 +156,7 @@ TEST_F(MindDataTestCallback, TestBasicCallback) {
ColDescriptor col("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &shape);
ASSERT_OK(schema->AddColumn(col));
std::shared_ptr<RandomDataOp> leaf;
rc = RandomDataOp::Builder().SetRowsPerBuffer(1).SetDataSchema(std::move(schema)).SetTotalRows(44).Build(&leaf);
rc = RandomDataOp::Builder().SetDataSchema(std::move(schema)).SetTotalRows(44).Build(&leaf);
EXPECT_TRUE(rc.IsOk());
// config mapOp
std::shared_ptr<MapOp> map_op;
@ -208,7 +208,7 @@ TEST_F(MindDataTestCallback, TestMultiEpochCallback) {
ColDescriptor col("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &shape);
ASSERT_OK(schema->AddColumn(col));
std::shared_ptr<RandomDataOp> leaf;
rc = RandomDataOp::Builder().SetRowsPerBuffer(1).SetDataSchema(std::move(schema)).SetTotalRows(4).SetNumWorkers(4).Build(&leaf);
rc = RandomDataOp::Builder().SetDataSchema(std::move(schema)).SetTotalRows(4).SetNumWorkers(4).Build(&leaf);
EXPECT_TRUE(rc.IsOk());
// config mapOp
std::shared_ptr<MapOp> map_op;
@ -273,7 +273,7 @@ TEST_F(MindDataTestCallback, TestSelectedCallback) {
ColDescriptor col("label", DataType(DataType::DE_UINT32), TensorImpl::kFlexible, 0, &shape);
ASSERT_OK(schema->AddColumn(col));
std::shared_ptr<RandomDataOp> leaf;
rc = RandomDataOp::Builder().SetRowsPerBuffer(1).SetDataSchema(std::move(schema)).SetTotalRows(4).SetNumWorkers(4).Build(&leaf);
rc = RandomDataOp::Builder().SetDataSchema(std::move(schema)).SetTotalRows(4).SetNumWorkers(4).Build(&leaf);
EXPECT_TRUE(rc.IsOk());
// config mapOp
std::shared_ptr<MapOp> map_op;

View File

@ -46,9 +46,14 @@ std::shared_ptr<ManifestOp> Manifest(int32_t num_works, int32_t rows, int32_t co
std::map<std::string, int32_t> map = {}, bool decode = false) {
std::shared_ptr<ManifestOp> so;
ManifestOp::Builder builder;
Status rc = builder.SetNumWorkers(num_works).SetManifestFile(file).SetRowsPerBuffer(
rows).SetOpConnectorSize(conns).SetSampler(std::move(sampler)).SetClassIndex(map).SetDecode(decode)
.SetUsage(usage).Build(&so);
Status rc = builder.SetNumWorkers(num_works)
.SetManifestFile(file)
.SetOpConnectorSize(conns)
.SetSampler(std::move(sampler))
.SetClassIndex(map)
.SetDecode(decode)
.SetUsage(usage)
.Build(&so);
return so;
}

View File

@ -110,7 +110,7 @@ class MindDataTestMapOp : public UT::DatasetOpTesting {
TFReaderOp::Builder builder;
builder.SetDatasetFilesList({dataset_path_})
.SetColumnsToLoad({"image", "label", "A", "B"})
.SetRowsPerBuffer(2)
.SetWorkerConnectorSize(2)
.SetNumWorkers(2);
@ -516,7 +516,7 @@ TEST_F(MindDataTestMapOp, TFReader_Decode_Repeat_Resize) {
TFReaderOp::Builder sobuilder;
sobuilder.SetDatasetFilesList({dataset_path_})
.SetColumnsToLoad({"image", "label"})
.SetRowsPerBuffer(2)
.SetWorkerConnectorSize(2)
.SetNumWorkers(2);
rc = sobuilder.Build(&my_tfreader_op);

View File

@ -29,12 +29,11 @@
namespace common = mindspore::common;
using namespace mindspore::dataset;
using mindspore::MsLogLevel::INFO;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::LogStream;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::MsLogLevel::INFO;
class MindDataTestMindRecordOp : public UT::DatasetOpTesting {
};
class MindDataTestMindRecordOp : public UT::DatasetOpTesting {};
TEST_F(MindDataTestMindRecordOp, TestMindRecordBasic) {
// single MindRecord op and nothing else
@ -63,10 +62,9 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordBasic) {
std::shared_ptr<MindRecordOp> my_mindrecord_op;
MindRecordOp::Builder builder;
builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"})
.SetLoadDataset(true)
.SetRowsPerBuffer(3)
.SetNumMindRecordWorkers(4)
.SetColumnsToLoad(column_list);
.SetLoadDataset(true)
.SetNumMindRecordWorkers(4)
.SetColumnsToLoad(column_list);
rc = builder.Build(&my_mindrecord_op);
ASSERT_TRUE(rc.IsOk());
@ -134,11 +132,10 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordSample) {
std::shared_ptr<MindRecordOp> my_mindrecord_op;
MindRecordOp::Builder builder;
builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"})
.SetLoadDataset(true)
.SetRowsPerBuffer(3)
.SetNumMindRecordWorkers(4)
.SetColumnsToLoad(column_list)
.SetOperators(operators);
.SetLoadDataset(true)
.SetNumMindRecordWorkers(4)
.SetColumnsToLoad(column_list)
.SetOperators(operators);
rc = builder.Build(&my_mindrecord_op);
ASSERT_TRUE(rc.IsOk());
@ -206,11 +203,10 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordShuffle) {
std::shared_ptr<MindRecordOp> my_mindrecord_op;
MindRecordOp::Builder builder;
builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"})
.SetLoadDataset(true)
.SetRowsPerBuffer(3)
.SetNumMindRecordWorkers(4)
.SetColumnsToLoad(column_list)
.SetOperators(operators);
.SetLoadDataset(true)
.SetNumMindRecordWorkers(4)
.SetColumnsToLoad(column_list)
.SetOperators(operators);
rc = builder.Build(&my_mindrecord_op);
ASSERT_TRUE(rc.IsOk());
@ -281,11 +277,10 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordCategory) {
std::shared_ptr<MindRecordOp> my_mindrecord_op;
MindRecordOp::Builder builder;
builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"})
.SetLoadDataset(true)
.SetRowsPerBuffer(3)
.SetNumMindRecordWorkers(4)
.SetColumnsToLoad(column_list)
.SetOperators(operators);
.SetLoadDataset(true)
.SetNumMindRecordWorkers(4)
.SetColumnsToLoad(column_list)
.SetOperators(operators);
rc = builder.Build(&my_mindrecord_op);
ASSERT_TRUE(rc.IsOk());
@ -350,10 +345,9 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordRepeat) {
std::shared_ptr<MindRecordOp> my_mindrecord_op;
MindRecordOp::Builder builder;
builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"})
.SetLoadDataset(true)
.SetRowsPerBuffer(3)
.SetNumMindRecordWorkers(4)
.SetColumnsToLoad(column_list);
.SetLoadDataset(true)
.SetNumMindRecordWorkers(4)
.SetColumnsToLoad(column_list);
rc = builder.Build(&my_mindrecord_op);
ASSERT_TRUE(rc.IsOk());
@ -364,8 +358,7 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordRepeat) {
uint32_t num_repeats = 2;
std::shared_ptr<RepeatOp> my_repeat_op;
rc = RepeatOp::Builder(num_repeats)
.Build(&my_repeat_op);
rc = RepeatOp::Builder(num_repeats).Build(&my_repeat_op);
EXPECT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_repeat_op);
EXPECT_TRUE(rc.IsOk());
@ -375,7 +368,6 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordRepeat) {
rc = my_repeat_op->AddChild(my_mindrecord_op);
EXPECT_TRUE(rc.IsOk());
// Set children/root layout.
rc = my_tree->AssignRoot(my_repeat_op);
EXPECT_TRUE(rc.IsOk());
@ -407,7 +399,6 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordRepeat) {
}
}
TEST_F(MindDataTestMindRecordOp, TestMindRecordBlockReaderRepeat) {
// single MindRecord op and nothing else
//
@ -435,10 +426,9 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordBlockReaderRepeat) {
std::shared_ptr<MindRecordOp> my_mindrecord_op;
MindRecordOp::Builder builder;
builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"})
.SetLoadDataset(true)
.SetRowsPerBuffer(3)
.SetNumMindRecordWorkers(4)
.SetColumnsToLoad(column_list);
.SetLoadDataset(true)
.SetNumMindRecordWorkers(4)
.SetColumnsToLoad(column_list);
rc = builder.Build(&my_mindrecord_op);
ASSERT_TRUE(rc.IsOk());
@ -449,8 +439,7 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordBlockReaderRepeat) {
uint32_t num_repeats = 2;
std::shared_ptr<RepeatOp> my_repeat_op;
rc = RepeatOp::Builder(num_repeats)
.Build(&my_repeat_op);
rc = RepeatOp::Builder(num_repeats).Build(&my_repeat_op);
EXPECT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_repeat_op);
EXPECT_TRUE(rc.IsOk());
@ -518,10 +507,9 @@ TEST_F(MindDataTestMindRecordOp, TestMindRecordInvalidColumnList) {
std::shared_ptr<MindRecordOp> my_mindrecord_op;
MindRecordOp::Builder builder;
builder.SetDatasetFile({mindrecord_root_path_ + "/testMindDataSet/testImageNetData/imagenet.mindrecord0"})
.SetLoadDataset(true)
.SetRowsPerBuffer(3)
.SetNumMindRecordWorkers(4)
.SetColumnsToLoad(column_list);
.SetLoadDataset(true)
.SetNumMindRecordWorkers(4)
.SetColumnsToLoad(column_list);
rc = builder.Build(&my_mindrecord_op);
ASSERT_TRUE(rc.IsError());
ASSERT_TRUE(rc.ToString().find_first_of("illegal column list") != std::string::npos);

View File

@ -42,7 +42,7 @@ using mindspore::MsLogLevel::ERROR;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::LogStream;
std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false, int rows_per_buf = 2);
std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false);
std::shared_ptr<RepeatOp> Repeat(int repeat_cnt);
@ -57,7 +57,7 @@ std::shared_ptr<MnistOp> CreateMnist(int64_t num_wrks, int64_t rows, int64_t con
MnistOp::Builder builder;
Status rc = builder.SetNumWorkers(num_wrks)
.SetDir(path)
.SetRowsPerBuffer(rows)
.SetOpConnectorSize(conns)
.SetSampler(std::move(sampler))
.Build(&so);

View File

@ -41,7 +41,7 @@ TEST_F(MindDataTestProjectOp, TestProjectProject) {
std::shared_ptr<TFReaderOp> my_tfreader_op;
TFReaderOp::Builder builder;
builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16).SetWorkerConnectorSize(16);
builder.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(16);
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
builder.SetDataSchema(std::move(schema));

View File

@ -76,11 +76,7 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic1) {
std::shared_ptr<RandomDataOp> myRandomDataOp;
RandomDataOp::Builder builder;
rc = builder.SetRowsPerBuffer(2)
.SetNumWorkers(1)
.SetDataSchema(std::move(testSchema))
.SetTotalRows(25)
.Build(&myRandomDataOp);
rc = builder.SetNumWorkers(1).SetDataSchema(std::move(testSchema)).SetTotalRows(25).Build(&myRandomDataOp);
EXPECT_TRUE(rc.IsOk());
rc = myTree->AssociateNode(myRandomDataOp);
@ -134,9 +130,7 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic2) {
std::shared_ptr<RandomDataOp> myRandomDataOp;
RandomDataOp::Builder builder;
rc = builder.SetRowsPerBuffer(2)
.SetNumWorkers(1)
.Build(&myRandomDataOp);
rc = builder.SetNumWorkers(1).Build(&myRandomDataOp);
EXPECT_TRUE(rc.IsOk());
rc = myTree->AssociateNode(myRandomDataOp);
@ -171,11 +165,7 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic3) {
std::shared_ptr<RandomDataOp> myRandomDataOp;
RandomDataOp::Builder builder;
rc = builder.SetRowsPerBuffer(2)
.SetNumWorkers(1)
.SetDataSchema(std::move(testSchema))
.SetTotalRows(10)
.Build(&myRandomDataOp);
rc = builder.SetNumWorkers(1).SetDataSchema(std::move(testSchema)).SetTotalRows(10).Build(&myRandomDataOp);
EXPECT_TRUE(rc.IsOk());
rc = myTree->AssociateNode(myRandomDataOp);
@ -235,11 +225,7 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic4) {
std::shared_ptr<RandomDataOp> myRandomDataOp;
RandomDataOp::Builder builder;
rc = builder.SetRowsPerBuffer(2)
.SetNumWorkers(1)
.SetDataSchema(std::move(testSchema))
.SetTotalRows(10)
.Build(&myRandomDataOp);
rc = builder.SetNumWorkers(1).SetDataSchema(std::move(testSchema)).SetTotalRows(10).Build(&myRandomDataOp);
EXPECT_TRUE(rc.IsOk());
rc = myTree->AssociateNode(myRandomDataOp);
@ -315,11 +301,7 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpBasic5) {
std::shared_ptr<RandomDataOp> myRandomDataOp;
RandomDataOp::Builder builder;
rc = builder.SetRowsPerBuffer(2)
.SetNumWorkers(4)
.SetDataSchema(std::move(testSchema))
.SetTotalRows(10)
.Build(&myRandomDataOp);
rc = builder.SetNumWorkers(4).SetDataSchema(std::move(testSchema)).SetTotalRows(10).Build(&myRandomDataOp);
EXPECT_TRUE(rc.IsOk());
rc = myTree->AssociateNode(myRandomDataOp);
@ -395,11 +377,7 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpTree1) {
std::shared_ptr<RandomDataOp> myRandomDataOp;
RandomDataOp::Builder builder;
rc = builder.SetRowsPerBuffer(2)
.SetNumWorkers(4)
.SetDataSchema(std::move(testSchema))
.SetTotalRows(10)
.Build(&myRandomDataOp);
rc = builder.SetNumWorkers(4).SetDataSchema(std::move(testSchema)).SetTotalRows(10).Build(&myRandomDataOp);
EXPECT_TRUE(rc.IsOk());
rc = myTree->AssociateNode(myRandomDataOp);
@ -407,9 +385,9 @@ TEST_F(MindDataTestRandomDataOp, RandomDataOpTree1) {
std::shared_ptr<ShuffleOp> myShuffleOp;
rc = ShuffleOp::Builder()
.SetRowsPerBuffer(2)
.SetShuffleSize(4)
.Build(&myShuffleOp);
.SetShuffleSize(4)
.Build(&myShuffleOp);
EXPECT_TRUE(rc.IsOk());
rc = myTree->AssociateNode(myShuffleOp);
EXPECT_TRUE(rc.IsOk());

View File

@ -53,10 +53,10 @@ TEST_F(MindDataTestRenameOp, TestRenameOpDefault) {
std::string dataset_path = datasets_root_path_ + "/test_tf_file_3_images/train-0000-of-0001.data";
std::shared_ptr<TFReaderOp> my_tfreader_op;
rc = TFReaderOp::Builder()
.SetDatasetFilesList({dataset_path})
.SetRowsPerBuffer(2)
.SetWorkerConnectorSize(16)
.SetNumWorkers(1)
.SetDatasetFilesList({dataset_path})
.SetWorkerConnectorSize(16)
.SetNumWorkers(1)
.Build(&my_tfreader_op);
EXPECT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_tfreader_op);

View File

@ -46,7 +46,7 @@ TEST_F(MindDataTestSentencePieceVocabOp, TestSentencePieceFromDatasetFuntions) {
std::shared_ptr<TextFileOp> file_op;
TextFileOp::Builder builder_file;
builder_file.SetTextFilesList({dataset_path}).SetRowsPerBuffer(1).SetNumWorkers(1).SetOpConnectorSize(2);
builder_file.SetTextFilesList({dataset_path}).SetNumWorkers(1).SetOpConnectorSize(2);
Status rc = builder_file.Build(&file_op);
ASSERT_TRUE(rc.IsOk());
@ -119,7 +119,7 @@ TEST_F(MindDataTestSentencePieceVocabOp, TestSentencePieceTokenizerFuntions) {
std::shared_ptr<TextFileOp> file_op;
TextFileOp::Builder builder_file;
builder_file.SetTextFilesList({dataset_path}).SetRowsPerBuffer(1).SetNumWorkers(1).SetOpConnectorSize(2);
builder_file.SetTextFilesList({dataset_path}).SetNumWorkers(1).SetOpConnectorSize(2);
Status rc = builder_file.Build(&file_op);
ASSERT_TRUE(rc.IsOk());

View File

@ -56,16 +56,16 @@ TEST_F(MindDataTestShuffleOp, TestShuffleBasic1) {
dataset_path = datasets_root_path_ + "/testDataset1/testDataset1.data";
std::shared_ptr<TFReaderOp> my_tfreader_op;
rc = TFReaderOp::Builder()
.SetDatasetFilesList({dataset_path})
.SetRowsPerBuffer(2)
.SetWorkerConnectorSize(16)
.SetNumWorkers(1)
.SetDatasetFilesList({dataset_path})
.SetWorkerConnectorSize(16)
.SetNumWorkers(1)
.Build(&my_tfreader_op);
EXPECT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_tfreader_op);
EXPECT_TRUE(rc.IsOk());
std::shared_ptr<ShuffleOp> my_shuffle_op;
rc = ShuffleOp::Builder().SetRowsPerBuffer(2).SetShuffleSize(4).Build(&my_shuffle_op);
rc = ShuffleOp::Builder().SetShuffleSize(4).Build(&my_shuffle_op);
EXPECT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_shuffle_op);
EXPECT_TRUE(rc.IsOk());
@ -130,7 +130,6 @@ TEST_F(MindDataTestShuffleOp, TestShuffleBasic2) {
std::shared_ptr<TFReaderOp> my_tfreader_op;
rc = TFReaderOp::Builder()
.SetDatasetFilesList({dataset_path})
.SetRowsPerBuffer(3)
.SetWorkerConnectorSize(16)
.SetNumWorkers(2)
.Build(&my_tfreader_op);
@ -138,7 +137,7 @@ TEST_F(MindDataTestShuffleOp, TestShuffleBasic2) {
rc = my_tree->AssociateNode(my_tfreader_op);
EXPECT_TRUE(rc.IsOk());
std::shared_ptr<ShuffleOp> my_shuffle_op;
rc = ShuffleOp::Builder().SetShuffleSize(4).SetShuffleSeed(100).SetRowsPerBuffer(3).Build(&my_shuffle_op);
rc = ShuffleOp::Builder().SetShuffleSize(4).SetShuffleSeed(100).Build(&my_shuffle_op);
EXPECT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_shuffle_op);
EXPECT_TRUE(rc.IsOk());
@ -201,14 +200,13 @@ TEST_F(MindDataTestShuffleOp, TestShuffleBasic3) {
std::shared_ptr<TFReaderOp> my_tfreader_op;
rc = TFReaderOp::Builder()
.SetDatasetFilesList({dataset_path})
.SetRowsPerBuffer(3)
.SetWorkerConnectorSize(16)
.SetNumWorkers(2)
.Build(&my_tfreader_op);
EXPECT_TRUE(rc.IsOk());
my_tree->AssociateNode(my_tfreader_op);
std::shared_ptr<ShuffleOp> my_shuffle_op;
rc = ShuffleOp::Builder().SetShuffleSize(100).SetRowsPerBuffer(3).Build(&my_shuffle_op);
rc = ShuffleOp::Builder().SetShuffleSize(100).Build(&my_shuffle_op);
EXPECT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_shuffle_op);
EXPECT_TRUE(rc.IsOk());
@ -275,7 +273,6 @@ TEST_F(MindDataTestShuffleOp, TestRepeatShuffle) {
std::shared_ptr<TFReaderOp> my_tfreader_op;
rc = TFReaderOp::Builder()
.SetDatasetFilesList({dataset_path})
.SetRowsPerBuffer(3)
.SetWorkerConnectorSize(16)
.SetNumWorkers(2)
.Build(&my_tfreader_op);
@ -286,7 +283,6 @@ TEST_F(MindDataTestShuffleOp, TestRepeatShuffle) {
rc = ShuffleOp::Builder()
.SetShuffleSize(4)
.SetShuffleSeed(100)
.SetRowsPerBuffer(3)
.SetReshuffleEachEpoch(true)
.Build(&my_shuffle_op);
EXPECT_TRUE(rc.IsOk());

View File

@ -35,7 +35,7 @@ TEST_F(MindDataTestSkipOp, TestSkipOpFuntions) {
std::shared_ptr<TFReaderOp> my_tfreader_op;
TFReaderOp::Builder builder;
builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16).SetWorkerConnectorSize(16);
builder.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(16);
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
builder.SetDataSchema(std::move(schema));

View File

@ -42,7 +42,7 @@ TEST_F(MindDataTestTakeOp, TestTakeProject) {
// TFReaderOp
std::shared_ptr<TFReaderOp> my_tfreader_op;
TFReaderOp::Builder builder;
builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16).SetWorkerConnectorSize(16);
builder.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(16);
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
builder.SetDataSchema(std::move(schema));

View File

@ -45,7 +45,7 @@ TEST_F(MindDataTestTextFileOp, TestTextFileBasic) {
std::shared_ptr<TextFileOp> op;
TextFileOp::Builder builder;
builder.SetTextFilesList({dataset_path}).SetRowsPerBuffer(16).SetOpConnectorSize(2);
builder.SetTextFilesList({dataset_path}).SetOpConnectorSize(2);
Status rc = builder.Build(&op);
ASSERT_TRUE(rc.IsOk());
@ -94,7 +94,7 @@ TEST_F(MindDataTestTextFileOp, TestTextFileFileNotExist) {
std::shared_ptr<TextFileOp> op;
TextFileOp::Builder builder;
builder.SetTextFilesList({dataset_path}).SetRowsPerBuffer(16).SetOpConnectorSize(2);
builder.SetTextFilesList({dataset_path}).SetOpConnectorSize(2);
Status rc = builder.Build(&op);
ASSERT_TRUE(rc.IsOk());

View File

@ -44,7 +44,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderBasic1) {
std::shared_ptr<TFReaderOp> my_tfreader_op;
TFReaderOp::Builder builder;
builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16);
builder.SetDatasetFilesList({dataset_path});
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
builder.SetDataSchema(std::move(schema));
@ -148,7 +148,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderSmallRowsPerBuffer) {
std::shared_ptr<TFReaderOp> my_tfreader_op;
TFReaderOp::Builder builder;
builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(1);
builder.SetDatasetFilesList({dataset_path});
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
builder.SetDataSchema(std::move(schema));
@ -200,7 +200,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderLargeQueueSize) {
std::shared_ptr<TFReaderOp> my_tfreader_op;
TFReaderOp::Builder builder;
builder.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(1).SetRowsPerBuffer(16);
builder.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(1);
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
builder.SetDataSchema(std::move(schema));
@ -252,10 +252,11 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderOneThread) {
std::shared_ptr<TFReaderOp> my_tfreader_op;
TFReaderOp::Builder builder;
builder.SetDatasetFilesList({dataset_path})
.SetRowsPerBuffer(16)
.SetNumWorkers(1);
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
builder
.SetDatasetFilesList({dataset_path})
.SetNumWorkers(1);
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
builder.SetDataSchema(std::move(schema));
Status rc = builder.Build(&my_tfreader_op);
@ -307,7 +308,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderRepeat) {
// TFReaderOp
std::shared_ptr<TFReaderOp> my_tfreader_op;
TFReaderOp::Builder builder;
builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16).SetWorkerConnectorSize(16);
builder.SetDatasetFilesList({dataset_path}).SetWorkerConnectorSize(16);
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
schema->LoadSchemaFile(datasets_root_path_ + "/testTFTestAllTypes/datasetSchema.json", {});
builder.SetDataSchema(std::move(schema));
@ -378,7 +379,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderSchemaConstructor) {
std::shared_ptr<TFReaderOp> my_tfreader_op;
TFReaderOp::Builder builder;
builder.SetDatasetFilesList({dataset_path + "/test.data"})
.SetRowsPerBuffer(16)
.SetNumWorkers(16)
.SetDataSchema(std::move(data_schema));
Status rc = builder.Build(&my_tfreader_op);
@ -605,7 +606,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderBasicNoSchema) {
std::shared_ptr<TFReaderOp> my_tfreader_op;
TFReaderOp::Builder builder;
builder.SetDatasetFilesList({dataset_path}).SetRowsPerBuffer(16);
builder.SetDatasetFilesList({dataset_path});
Status rc = builder.Build(&my_tfreader_op);
ASSERT_TRUE(rc.IsOk());
@ -697,7 +698,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderInvalidFiles) {
std::shared_ptr<TFReaderOp> my_tfreader_op;
TFReaderOp::Builder builder;
builder.SetDatasetFilesList({invalid_file, valid_file, schema_file}).SetRowsPerBuffer(16);
builder.SetDatasetFilesList({invalid_file, valid_file, schema_file});
std::unique_ptr<DataSchema> schema = std::make_unique<DataSchema>();
schema->LoadSchemaFile(schema_file, {});
@ -706,7 +707,7 @@ TEST_F(MindDataTestTFReaderOp, TestTFReaderInvalidFiles) {
Status rc = builder.Build(&my_tfreader_op);
ASSERT_TRUE(!rc.IsOk());
builder.SetDatasetFilesList({invalid_file, valid_file, schema_file, nonexistent_file}).SetRowsPerBuffer(16);
builder.SetDatasetFilesList({invalid_file, valid_file, schema_file, nonexistent_file});
schema = std::make_unique<DataSchema>();
schema->LoadSchemaFile(schema_file, {});

View File

@ -39,11 +39,11 @@
namespace common = mindspore::common;
using namespace mindspore::dataset;
using mindspore::MsLogLevel::ERROR;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::LogStream;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::MsLogLevel::ERROR;
std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false, int rows_per_buf = 2);
std::shared_ptr<BatchOp> Batch(int batch_size = 1, bool drop = false);
std::shared_ptr<ExecutionTree> Build(std::vector<std::shared_ptr<DatasetOp>> ops);
@ -61,8 +61,7 @@ TEST_F(MindDataTestVOCOp, TestVOCDetection) {
std::string task_mode("train");
std::shared_ptr<VOCOp> my_voc_op;
VOCOp::Builder builder;
Status rc = builder.SetDir(dataset_path).SetTask(task_type).SetUsage(task_mode)
.Build(&my_voc_op);
Status rc = builder.SetDir(dataset_path).SetTask(task_type).SetUsage(task_mode).Build(&my_voc_op);
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_voc_op);
@ -87,7 +86,7 @@ TEST_F(MindDataTestVOCOp, TestVOCDetection) {
while (!tensor_list.empty()) {
MS_LOG(DEBUG) << "Row display for row #: " << row_count << ".";
//Display the tensor by calling the printer on it
// Display the tensor by calling the printer on it
for (int i = 0; i < tensor_list.size(); i++) {
std::ostringstream ss;
ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
@ -112,8 +111,7 @@ TEST_F(MindDataTestVOCOp, TestVOCSegmentation) {
std::string task_mode("train");
std::shared_ptr<VOCOp> my_voc_op;
VOCOp::Builder builder;
Status rc = builder.SetDir(dataset_path).SetTask(task_type).SetUsage(task_mode)
.Build(&my_voc_op);
Status rc = builder.SetDir(dataset_path).SetTask(task_type).SetUsage(task_mode).Build(&my_voc_op);
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_voc_op);
@ -138,7 +136,7 @@ TEST_F(MindDataTestVOCOp, TestVOCSegmentation) {
while (!tensor_list.empty()) {
MS_LOG(DEBUG) << "Row display for row #: " << row_count << ".";
//Display the tensor by calling the printer on it
// Display the tensor by calling the printer on it
for (int i = 0; i < tensor_list.size(); i++) {
std::ostringstream ss;
ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;
@ -168,9 +166,7 @@ TEST_F(MindDataTestVOCOp, TestVOCClassIndex) {
std::shared_ptr<VOCOp> my_voc_op;
VOCOp::Builder builder;
Status rc =
builder.SetDir(dataset_path).SetTask(task_type).SetUsage(task_mode)
.SetClassIndex(class_index)
.Build(&my_voc_op);
builder.SetDir(dataset_path).SetTask(task_type).SetUsage(task_mode).SetClassIndex(class_index).Build(&my_voc_op);
ASSERT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_voc_op);
@ -195,7 +191,7 @@ TEST_F(MindDataTestVOCOp, TestVOCClassIndex) {
while (!tensor_list.empty()) {
MS_LOG(DEBUG) << "Row display for row #: " << row_count << ".";
//Display the tensor by calling the printer on it
// Display the tensor by calling the printer on it
for (int i = 0; i < tensor_list.size(); i++) {
std::ostringstream ss;
ss << "(" << tensor_list[i] << "): " << *tensor_list[i] << std::endl;

View File

@ -36,22 +36,21 @@
namespace common = mindspore::common;
using namespace mindspore::dataset;
using mindspore::MsLogLevel::INFO;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::LogStream;
using mindspore::ExceptionType::NoExceptionType;
using mindspore::MsLogLevel::INFO;
class MindDataTestZipOp : public UT::DatasetOpTesting {
};
class MindDataTestZipOp : public UT::DatasetOpTesting {};
TEST_F(MindDataTestZipOp, MindDataTestZipOpDefault) {
/* Tree:
*
*
* OpId(2) ZipOp
* / \
* OpId(0) TFReaderOp OpId(1) TFReaderOp
* Start with an empty execution tree
*/
/* Tree:
*
*
* OpId(2) ZipOp
* / \
* OpId(0) TFReaderOp OpId(1) TFReaderOp
* Start with an empty execution tree
*/
Status rc;
MS_LOG(INFO) << "UT test TestZipBasic.";
auto my_tree = std::make_shared<ExecutionTree>();
@ -61,21 +60,19 @@ TEST_F(MindDataTestZipOp, MindDataTestZipOpDefault) {
std::string dataset_path2 = datasets_root_path_ + "/testBatchDataset/test.data";
std::shared_ptr<TFReaderOp> my_tfreader_op;
rc = TFReaderOp::Builder()
.SetDatasetFilesList({dataset_path})
.SetRowsPerBuffer(2)
.SetWorkerConnectorSize(16)
.SetNumWorkers(1)
.Build(&my_tfreader_op);
.SetDatasetFilesList({dataset_path})
.SetWorkerConnectorSize(16)
.SetNumWorkers(1)
.Build(&my_tfreader_op);
EXPECT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_tfreader_op);
EXPECT_TRUE(rc.IsOk());
std::shared_ptr<TFReaderOp> my_tfreader_op2;
rc = TFReaderOp::Builder()
.SetDatasetFilesList({dataset_path2})
.SetRowsPerBuffer(2)
.SetWorkerConnectorSize(1)
.SetNumWorkers(1)
.Build(&my_tfreader_op2);
.SetDatasetFilesList({dataset_path2})
.SetWorkerConnectorSize(1)
.SetNumWorkers(1)
.Build(&my_tfreader_op2);
EXPECT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_tfreader_op2);
EXPECT_TRUE(rc.IsOk());
@ -123,20 +120,19 @@ TEST_F(MindDataTestZipOp, MindDataTestZipOpDefault) {
EXPECT_TRUE(rc.IsOk());
row_count++;
}
ASSERT_EQ(row_count, 3); // Should be 3 rows fetched
ASSERT_EQ(row_count, 3); // Should be 3 rows fetched
}
TEST_F(MindDataTestZipOp, MindDataTestZipOpRepeat) {
/* Tree:
* OpId(3) Repeat(3)
*
* OpId(2) ZipOp
* / \
* OpId(0) TFReaderOp OpId(1) TFReaderOp
*
* Start with an empty execution tree
*/
/* Tree:
* OpId(3) Repeat(3)
*
* OpId(2) ZipOp
* / \
* OpId(0) TFReaderOp OpId(1) TFReaderOp
*
* Start with an empty execution tree
*/
Status rc;
MS_LOG(INFO) << "UT test TestZipRepeat.";
auto my_tree = std::make_shared<ExecutionTree>();
@ -146,21 +142,21 @@ TEST_F(MindDataTestZipOp, MindDataTestZipOpRepeat) {
std::string dataset_path2 = datasets_root_path_ + "/testBatchDataset/test.data";
std::shared_ptr<TFReaderOp> my_tfreader_op;
rc = TFReaderOp::Builder()
.SetDatasetFilesList({dataset_path})
.SetRowsPerBuffer(2)
.SetWorkerConnectorSize(16)
.SetNumWorkers(1)
.Build(&my_tfreader_op);
.SetDatasetFilesList({dataset_path})
.SetWorkerConnectorSize(16)
.SetNumWorkers(1)
.Build(&my_tfreader_op);
EXPECT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_tfreader_op);
EXPECT_TRUE(rc.IsOk());
std::shared_ptr<TFReaderOp> my_tfreader_op2;
rc = TFReaderOp::Builder()
.SetDatasetFilesList({dataset_path2})
.SetRowsPerBuffer(2)
.SetWorkerConnectorSize(1)
.SetNumWorkers(1)
.Build(&my_tfreader_op2);
.SetDatasetFilesList({dataset_path2})
.SetWorkerConnectorSize(1)
.SetNumWorkers(1)
.Build(&my_tfreader_op2);
EXPECT_TRUE(rc.IsOk());
rc = my_tree->AssociateNode(my_tfreader_op2);
EXPECT_TRUE(rc.IsOk());
@ -221,5 +217,5 @@ TEST_F(MindDataTestZipOp, MindDataTestZipOpRepeat) {
EXPECT_TRUE(rc.IsOk());
row_count++;
}
ASSERT_EQ(row_count, 9); // Should be 9 rows fetched
ASSERT_EQ(row_count, 9); // Should be 9 rows fetched
}

View File

@ -43,21 +43,18 @@ def test_basic():
ds.config.load('../data/dataset/declient.cfg')
# assert ds.config.get_rows_per_buffer() == 32
assert ds.config.get_num_parallel_workers() == 8
# assert ds.config.get_worker_connector_size() == 16
assert ds.config.get_prefetch_size() == 16
assert ds.config.get_seed() == 5489
assert ds.config.get_monitor_sampling_interval() == 15
# ds.config.set_rows_per_buffer(1)
ds.config.set_num_parallel_workers(2)
# ds.config.set_worker_connector_size(3)
ds.config.set_prefetch_size(4)
ds.config.set_seed(5)
ds.config.set_monitor_sampling_interval(45)
# assert ds.config.get_rows_per_buffer() == 1
assert ds.config.get_num_parallel_workers() == 2
# assert ds.config.get_worker_connector_size() == 3
assert ds.config.get_prefetch_size() == 4