diff --git a/include/api/dual_abi_helper.h b/include/api/dual_abi_helper.h index 6bf9c6eec8c..f94a3daa882 100644 --- a/include/api/dual_abi_helper.h +++ b/include/api/dual_abi_helper.h @@ -16,11 +16,147 @@ #ifndef MINDSPORE_INCLUDE_API_DUAL_ABI_HELPER_H_ #define MINDSPORE_INCLUDE_API_DUAL_ABI_HELPER_H_ +#include +#include +#include +#include #include +#include +#include +#include #include namespace mindspore { inline std::vector StringToChar(const std::string &s) { return std::vector(s.begin(), s.end()); } + inline std::string CharToString(const std::vector &c) { return std::string(c.begin(), c.end()); } + +inline std::optional> OptionalStringToChar(const std::optional &s) { + std::optional> ret = std::vector(s->begin(), s->end()); + return (s == std::nullopt) ? std::nullopt : ret; +} + +inline std::optional OptionalCharToString(const std::optional> &c) { + std::optional ret = std::string(c->begin(), c->end()); + return (c == std::nullopt) ? std::nullopt : ret; +} + +inline std::pair, int32_t> PairStringToChar(const std::pair &s) { + return std::pair, int32_t>(std::vector(s.first.begin(), s.first.end()), s.second); +} + +inline std::pair PairCharToString(const std::pair, int32_t> &c) { + return std::pair(std::string(c.first.begin(), c.first.end()), c.second); +} + +inline std::vector> VectorStringToChar(const std::vector &s) { + std::vector> ret; + std::transform(s.begin(), s.end(), std::back_inserter(ret), + [](auto str) { return std::vector(str.begin(), str.end()); }); + return ret; +} + +inline std::vector VectorCharToString(const std::vector> &c) { + std::vector ret; + std::transform(c.begin(), c.end(), std::back_inserter(ret), + [](auto ch) { return std::string(ch.begin(), ch.end()); }); + return ret; +} + +inline std::set> SetStringToChar(const std::set &s) { + std::set> ret; + std::transform(s.begin(), s.end(), std::inserter(ret, ret.begin()), + [](auto str) { return std::vector(str.begin(), str.end()); }); + return ret; +} + +inline std::set SetCharToString(const std::set> &c) { + std::set ret; + std::transform(c.begin(), c.end(), std::inserter(ret, ret.begin()), + [](auto ch) { return std::string(ch.begin(), ch.end()); }); + return ret; +} + +inline std::map, int32_t> MapStringToChar(const std::map &s) { + std::map, int32_t> ret; + std::transform(s.begin(), s.end(), std::inserter(ret, ret.begin()), [](auto str) { + return std::pair, int32_t>(std::vector(str.first.begin(), str.first.end()), str.second); + }); + return ret; +} + +inline std::map MapCharToString(const std::map, int32_t> &c) { + std::map ret; + std::transform(c.begin(), c.end(), std::inserter(ret, ret.begin()), [](auto ch) { + return std::pair(std::string(ch.first.begin(), ch.first.end()), ch.second); + }); + return ret; +} + +inline std::map, std::vector> UnorderedMapStringToChar( + const std::unordered_map &s) { + std::map, std::vector> ret; + std::transform(s.begin(), s.end(), std::inserter(ret, ret.begin()), [](auto str) { + return std::pair, std::vector>(std::vector(str.first.begin(), str.first.end()), + std::vector(str.second.begin(), str.second.end())); + }); + return ret; +} + +inline std::unordered_map UnorderedMapCharToString( + const std::map, std::vector> &c) { + std::unordered_map ret; + std::transform(c.begin(), c.end(), std::inserter(ret, ret.begin()), [](auto ch) { + return std::pair(std::string(ch.first.begin(), ch.first.end()), + std::string(ch.second.begin(), ch.second.end())); + }); + return ret; +} + +inline std::vector, std::vector>> ClassIndexStringToChar( + const std::vector>> &s) { + std::vector, std::vector>> ret; + std::transform(s.begin(), s.end(), std::back_inserter(ret), [](auto str) { + return std::pair, std::vector>(std::vector(str.first.begin(), str.first.end()), + str.second); + }); + return ret; +} + +inline std::vector>> ClassIndexCharToString( + const std::vector, std::vector>> &c) { + std::vector>> ret; + std::transform(c.begin(), c.end(), std::back_inserter(ret), [](auto ch) { + return std::pair>(std::string(ch.first.begin(), ch.first.end()), ch.second); + }); + return ret; +} + +template +inline std::map, T> PadInfoStringToChar(const std::map &s_pad_info) { + std::map, T> ret; + std::transform(s_pad_info.begin(), s_pad_info.end(), std::inserter(ret, ret.begin()), [](auto str) { + return std::pair, T>(std::vector(str.first.begin(), str.first.end()), str.second); + }); + return ret; +} + +template +inline std::map PadInfoCharToString(const std::map, T> &c_pad_info) { + std::map ret; + std::transform(c_pad_info.begin(), c_pad_info.end(), std::inserter(ret, ret.begin()), [](auto ch) { + return std::pair(std::string(ch.first.begin(), ch.first.end()), ch.second); + }); + return ret; +} + +template +inline void TensorMapCharToString(const std::map, T> *c, std::unordered_map *s) { + for (auto ch : *c) { + auto key = std::string(ch.first.begin(), ch.first.end()); + auto val = ch.second; + s->insert(std::pair(key, val)); + } +} } // namespace mindspore #endif // MINDSPORE_INCLUDE_API_DUAL_ABI_HELPER_H_ diff --git a/mindspore/ccsrc/minddata/dataset/api/config.cc b/mindspore/ccsrc/minddata/dataset/api/config.cc index 0f5bfe9217e..ad636a09ec7 100644 --- a/mindspore/ccsrc/minddata/dataset/api/config.cc +++ b/mindspore/ccsrc/minddata/dataset/api/config.cc @@ -94,8 +94,8 @@ bool set_callback_timeback(int32_t timeout) { int32_t get_callback_timeout() { return _config->callback_timeout(); } // Function to load configurations from a file -bool load(std::string file) { - Status rc = _config->LoadFile(file); +bool load(const std::vector &file) { + Status rc = _config->LoadFile(CharToString(file)); if (rc.IsError()) { MS_LOG(ERROR) << rc << file; return false; diff --git a/mindspore/ccsrc/minddata/dataset/api/datasets.cc b/mindspore/ccsrc/minddata/dataset/api/datasets.cc index ef3c8b8b07d..fb6a03bdda7 100644 --- a/mindspore/ccsrc/minddata/dataset/api/datasets.cc +++ b/mindspore/ccsrc/minddata/dataset/api/datasets.cc @@ -102,15 +102,15 @@ namespace mindspore { namespace dataset { // Function to create the iterator, which will build and launch the execution tree. -std::shared_ptr Dataset::CreateIterator(std::vector columns, int32_t num_epochs) { +std::shared_ptr Dataset::CreateIteratorCharIF(std::vector> columns, int32_t num_epochs) { std::shared_ptr iter; try { auto ds = shared_from_this(); // The specified columns will be selected from the dataset and passed down the pipeline // in the order specified, other columns will be discarded. - if (!columns.empty()) { - ds = ds->Project(columns); + if (!VectorCharToString(columns).empty()) { + ds = ds->Project(VectorCharToString(columns)); } iter = std::make_shared(); @@ -131,8 +131,9 @@ std::shared_ptr Dataset::CreateIterator(std::vector colum #ifndef ENABLE_ANDROID // Function to return a transferred Node that transfers data through a device. -bool Dataset::DeviceQueue(std::string queue_name, std::string device_type, int32_t num_epochs, bool send_epoch_end, - int32_t total_batches, bool create_data_info_queue) { +bool Dataset::DeviceQueueCharIF(const std::vector &queue_name, const std::vector &device_type, + int32_t num_epochs, bool send_epoch_end, int32_t total_batches, + bool create_data_info_queue) { Status rc; // Build and launch tree @@ -144,8 +145,9 @@ bool Dataset::DeviceQueue(std::string queue_name, std::string device_type, int32 } // Add TransferNode IR on top of dataset - auto ds = std::make_shared(shared_from_this()->IRNode(), queue_name, device_type, send_epoch_end, - total_batches, create_data_info_queue); + auto ds = + std::make_shared(shared_from_this()->IRNode(), CharToString(queue_name), CharToString(device_type), + send_epoch_end, total_batches, create_data_info_queue); // Get ToDevice consumer auto consumer = std::make_unique(num_epochs); @@ -168,7 +170,8 @@ bool Dataset::DeviceQueue(std::string queue_name, std::string device_type, int32 } // Function to create the saver, which will build and launch the execution tree and save data -bool Dataset::Save(std::string dataset_path, int32_t num_files, std::string dataset_type) { +bool Dataset::SaveCharIF(const std::vector &dataset_path, int32_t num_files, + const std::vector &dataset_type) { Status rc; // Build and launch tree auto ds = shared_from_this(); @@ -180,7 +183,7 @@ bool Dataset::Save(std::string dataset_path, int32_t num_files, std::string data } // Get SaveToDisk consumer - auto consumer = std::make_unique(dataset_path, num_files, dataset_type); + auto consumer = std::make_unique(CharToString(dataset_path), num_files, CharToString(dataset_type)); rc = consumer->ValidateParams(); if (rc.IsError()) { MS_LOG(ERROR) << "CreateSaver failed." << rc; @@ -252,365 +255,32 @@ int64_t Dataset::GetNumClasses() { return num_classes; } -std::vector Dataset::GetColumnNames() { +std::vector> Dataset::GetColumnNamesCharIF() { std::vector col_names; std::unique_ptr runtime_context = std::make_unique(); RETURN_SECOND_IF_ERROR(runtime_context->Init(), {}); RETURN_SECOND_IF_ERROR(tree_getters_->Init(this->IRNode()), {}); RETURN_SECOND_IF_ERROR(tree_getters_->GetColumnNames(&col_names), {}); - return col_names; + return VectorStringToChar(col_names); } -std::vector>> Dataset::GetClassIndexing() { +std::vector, std::vector>> Dataset::GetClassIndexingCharIF() { std::vector>> output_class_indexing; std::unique_ptr runtime_context = std::make_unique(); RETURN_SECOND_IF_ERROR(runtime_context->Init(), {}); RETURN_SECOND_IF_ERROR(tree_getters_->Init(this->IRNode()), {}); RETURN_SECOND_IF_ERROR(tree_getters_->GetClassIndexing(&output_class_indexing), {}); - return output_class_indexing; + return ClassIndexStringToChar(output_class_indexing); } /// \brief Function to create a SchemaObj /// \param[in] schema_file Path of schema file /// \return Shared pointer to the current schema -std::shared_ptr Schema(const std::string &schema_file) { - auto schema = std::make_shared(schema_file); - +std::shared_ptr SchemaCharIF(const std::vector &schema_file) { + auto schema = std::make_shared(CharToString(schema_file)); return schema->Init() ? schema : nullptr; } -// FUNCTIONS TO CREATE DATASETS FOR LEAF CLASSES -// (In alphabetical order) - -// Function to create a AlbumDataset. -std::shared_ptr Album(const std::string &dataset_dir, const std::string &data_schema, - const std::vector &column_names, bool decode, - const std::shared_ptr &sampler, - const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_dir, data_schema, column_names, decode, sampler, cache); - - return ds; -} -// Function to create a AlbumDataset. -std::shared_ptr Album(const std::string &dataset_dir, const std::string &data_schema, - const std::vector &column_names, bool decode, Sampler *sampler, - const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_dir, data_schema, column_names, decode, sampler, cache); - - return ds; -} -// Function to create a AlbumDataset. -std::shared_ptr Album(const std::string &dataset_dir, const std::string &data_schema, - const std::vector &column_names, bool decode, - const std::reference_wrapper sampler, - const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_dir, data_schema, column_names, decode, sampler, cache); - - return ds; -} - -#ifndef ENABLE_ANDROID -// Function to create a CelebADataset. -std::shared_ptr CelebA(const std::string &dataset_dir, const std::string &usage, - const std::shared_ptr &sampler, bool decode, - const std::set &extensions, - const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_dir, usage, sampler, decode, extensions, cache); - - return ds; -} -// Function to create a CelebADataset. -std::shared_ptr CelebA(const std::string &dataset_dir, const std::string &usage, Sampler *sampler, - bool decode, const std::set &extensions, - const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_dir, usage, sampler, decode, extensions, cache); - - return ds; -} -// Function to create a CelebADataset. -std::shared_ptr CelebA(const std::string &dataset_dir, const std::string &usage, - const std::reference_wrapper sampler, bool decode, - const std::set &extensions, - const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_dir, usage, sampler, decode, extensions, cache); - - return ds; -} - -// Function to create a Cifar10Dataset. -std::shared_ptr Cifar10(const std::string &dataset_dir, const std::string &usage, - const std::shared_ptr &sampler, - const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_dir, usage, sampler, cache); - - return ds; -} -// Function to create a Cifar10Dataset. -std::shared_ptr Cifar10(const std::string &dataset_dir, const std::string &usage, Sampler *sampler, - const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_dir, usage, sampler, cache); - - return ds; -} -// Function to create a Cifar10Dataset. -std::shared_ptr Cifar10(const std::string &dataset_dir, const std::string &usage, - const std::reference_wrapper sampler, - const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_dir, usage, sampler, cache); - - return ds; -} - -// Function to create a Cifar100Dataset. -std::shared_ptr Cifar100(const std::string &dataset_dir, const std::string &usage, - const std::shared_ptr &sampler, - const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_dir, usage, sampler, cache); - - return ds; -} -// Function to create a Cifar100Dataset. -std::shared_ptr Cifar100(const std::string &dataset_dir, const std::string &usage, Sampler *sampler, - const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_dir, usage, sampler, cache); - - return ds; -} -// Function to create a Cifar100Dataset. -std::shared_ptr Cifar100(const std::string &dataset_dir, const std::string &usage, - const std::reference_wrapper sampler, - const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_dir, usage, sampler, cache); - - return ds; -} - -// Function to create a CLUEDataset. -std::shared_ptr CLUE(const std::vector &clue_files, const std::string &task, - const std::string &usage, int64_t num_samples, ShuffleMode shuffle, - int32_t num_shards, int32_t shard_id, const std::shared_ptr &cache) { - auto ds = std::make_shared(clue_files, task, usage, num_samples, shuffle, num_shards, shard_id, cache); - - return ds; -} - -// Function to create a CocoDataset. -std::shared_ptr Coco(const std::string &dataset_dir, const std::string &annotation_file, - const std::string &task, const bool &decode, const std::shared_ptr &sampler, - const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_dir, annotation_file, task, decode, sampler, cache); - - return ds; -} -// Function to create a CocoDataset. -std::shared_ptr Coco(const std::string &dataset_dir, const std::string &annotation_file, - const std::string &task, const bool &decode, Sampler *sampler, - const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_dir, annotation_file, task, decode, sampler, cache); - - return ds; -} -// Function to create a CocoDataset. -std::shared_ptr Coco(const std::string &dataset_dir, const std::string &annotation_file, - const std::string &task, const bool &decode, - const std::reference_wrapper sampler, - const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_dir, annotation_file, task, decode, sampler, cache); - - return ds; -} - -// Function to create a CSVDataset. -std::shared_ptr CSV(const std::vector &dataset_files, char field_delim, - const std::vector> &column_defaults, - const std::vector &column_names, int64_t num_samples, ShuffleMode shuffle, - int32_t num_shards, int32_t shard_id, const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_files, field_delim, column_defaults, column_names, num_samples, - shuffle, num_shards, shard_id, cache); - - return ds; -} - -// Function to create a ImageFolderDataset. -std::shared_ptr ImageFolder(const std::string &dataset_dir, bool decode, - const std::shared_ptr &sampler, - const std::set &extensions, - const std::map &class_indexing, - const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_dir, decode, sampler, extensions, class_indexing, cache); - - return ds; -} -// Function to create a ImageFolderDataset. -std::shared_ptr ImageFolder(const std::string &dataset_dir, bool decode, Sampler *sampler, - const std::set &extensions, - const std::map &class_indexing, - const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_dir, decode, sampler, extensions, class_indexing, cache); - - return ds; -} -// Function to create a ImageFolderDataset. -std::shared_ptr ImageFolder(const std::string &dataset_dir, bool decode, - const std::reference_wrapper sampler, - const std::set &extensions, - const std::map &class_indexing, - const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_dir, decode, sampler, extensions, class_indexing, cache); - - return ds; -} - -// Function to create a ManifestDataset. -std::shared_ptr Manifest(const std::string &dataset_file, const std::string &usage, - const std::shared_ptr &sampler, - const std::map &class_indexing, bool decode, - const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_file, usage, sampler, class_indexing, decode, cache); - - return ds; -} -// Function to create a ManifestDataset. -std::shared_ptr Manifest(const std::string &dataset_file, const std::string &usage, Sampler *sampler, - const std::map &class_indexing, bool decode, - const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_file, usage, sampler, class_indexing, decode, cache); - - return ds; -} -// Function to create a ManifestDataset. -std::shared_ptr Manifest(const std::string &dataset_file, const std::string &usage, - const std::reference_wrapper sampler, - const std::map &class_indexing, bool decode, - const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_file, usage, sampler, class_indexing, decode, cache); - - return ds; -} - -// Function to create a MindDataDataset. -std::shared_ptr MindData(const std::string &dataset_file, const std::vector &columns_list, - const std::shared_ptr &sampler, nlohmann::json padded_sample, - int64_t num_padded) { - auto ds = std::make_shared(dataset_file, columns_list, sampler, padded_sample, num_padded); - - return ds; -} -// Function to create a MindDataDataset. -std::shared_ptr MindData(const std::string &dataset_file, const std::vector &columns_list, - Sampler *sampler, nlohmann::json padded_sample, int64_t num_padded) { - auto ds = std::make_shared(dataset_file, columns_list, sampler, padded_sample, num_padded); - - return ds; -} -// Function to create a MindDataDataset. -std::shared_ptr MindData(const std::string &dataset_file, const std::vector &columns_list, - const std::reference_wrapper sampler, nlohmann::json padded_sample, - int64_t num_padded) { - auto ds = std::make_shared(dataset_file, columns_list, sampler, padded_sample, num_padded); - - return ds; -} - -// Function to create a MindDataDataset. -std::shared_ptr MindData(const std::vector &dataset_files, - const std::vector &columns_list, - const std::shared_ptr &sampler, nlohmann::json padded_sample, - int64_t num_padded) { - auto ds = std::make_shared(dataset_files, columns_list, sampler, padded_sample, num_padded); - - return ds; -} -// Function to create a MindDataDataset. -std::shared_ptr MindData(const std::vector &dataset_files, - const std::vector &columns_list, Sampler *sampler, - nlohmann::json padded_sample, int64_t num_padded) { - auto ds = std::make_shared(dataset_files, columns_list, sampler, padded_sample, num_padded); - - return ds; -} -// Function to create a MindDataDataset. -std::shared_ptr MindData(const std::vector &dataset_files, - const std::vector &columns_list, - const std::reference_wrapper sampler, nlohmann::json padded_sample, - int64_t num_padded) { - auto ds = std::make_shared(dataset_files, columns_list, sampler, padded_sample, num_padded); - - return ds; -} -#endif -// Function to create a MnistDataset. -std::shared_ptr Mnist(const std::string &dataset_dir, const std::string &usage, - const std::shared_ptr &sampler, - const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_dir, usage, sampler, cache); - - return ds; -} -// Function to create a MnistDataset. -std::shared_ptr Mnist(const std::string &dataset_dir, const std::string &usage, Sampler *sampler, - const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_dir, usage, sampler, cache); - - return ds; -} -// Function to create a MnistDataset. -std::shared_ptr Mnist(const std::string &dataset_dir, const std::string &usage, - const std::reference_wrapper sampler, - const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_dir, usage, sampler, cache); - - return ds; -} - -#ifndef ENABLE_ANDROID -// Function to overload "+" operator to concat two datasets -std::shared_ptr operator+(const std::shared_ptr &datasets1, - const std::shared_ptr &datasets2) { - return std::make_shared(std::vector({datasets1, datasets2})); -} - -// Function to create a TextFileDataset. -std::shared_ptr TextFile(const std::vector &dataset_files, int64_t num_samples, - ShuffleMode shuffle, int32_t num_shards, int32_t shard_id, - const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_files, num_samples, shuffle, num_shards, shard_id, cache); - - return ds; -} - -// Function to create a VOCDataset. -std::shared_ptr VOC(const std::string &dataset_dir, const std::string &task, const std::string &usage, - const std::map &class_indexing, bool decode, - const std::shared_ptr &sampler, const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_dir, task, usage, class_indexing, decode, sampler, cache); - - return ds; -} -// Function to create a VOCDataset. -std::shared_ptr VOC(const std::string &dataset_dir, const std::string &task, const std::string &usage, - const std::map &class_indexing, bool decode, Sampler *sampler, - const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_dir, task, usage, class_indexing, decode, sampler, cache); - - return ds; -} -// Function to create a VOCDataset. -std::shared_ptr VOC(const std::string &dataset_dir, const std::string &task, const std::string &usage, - const std::map &class_indexing, bool decode, - const std::reference_wrapper sampler, - const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_dir, task, usage, class_indexing, decode, sampler, cache); - - return ds; -} - -// Function to create a ZipDatset. -std::shared_ptr Zip(const std::vector> &datasets) { - auto ds = std::make_shared(datasets); - return ds; -} -#endif // FUNCTIONS TO CREATE DATASETS FOR DATASET OPS // (In alphabetical order) @@ -624,18 +294,18 @@ BatchDataset::BatchDataset(std::shared_ptr input, int32_t batch_size, b #ifndef ENABLE_ANDROID // Function to create a BucketBatchByLength dataset BucketBatchByLengthDataset::BucketBatchByLengthDataset( - std::shared_ptr input, const std::vector &column_names, + std::shared_ptr input, const std::vector> &column_names, const std::vector &bucket_boundaries, const std::vector &bucket_batch_sizes, std::function element_length_function, - const std::map>> &pad_info, bool pad_to_bucket_boundary, - bool drop_remainder) { + const std::map, std::pair>> &pad_info, + bool pad_to_bucket_boundary, bool drop_remainder) { std::shared_ptr c_func = nullptr; if (element_length_function != nullptr) { c_func = std::make_shared(element_length_function); } - auto ds = - std::make_shared(input->IRNode(), column_names, bucket_boundaries, bucket_batch_sizes, - c_func, pad_info, pad_to_bucket_boundary, drop_remainder); + auto ds = std::make_shared( + input->IRNode(), VectorCharToString(column_names), bucket_boundaries, bucket_batch_sizes, c_func, + PadInfoCharToString(pad_info), pad_to_bucket_boundary, drop_remainder); ir_node_ = std::static_pointer_cast(ds); } @@ -653,34 +323,37 @@ ConcatDataset::ConcatDataset(const std::vector> &datase } FilterDataset::FilterDataset(std::shared_ptr input, std::function predicate, - const std::vector &input_columns) { + const std::vector> &input_columns) { std::shared_ptr c_func = nullptr; if (predicate) c_func = std::make_shared(predicate); - auto ds = std::make_shared(input->IRNode(), c_func, input_columns); + auto ds = std::make_shared(input->IRNode(), c_func, VectorCharToString(input_columns)); ir_node_ = std::static_pointer_cast(ds); } #endif MapDataset::MapDataset(std::shared_ptr input, std::vector> operations, - const std::vector &input_columns, const std::vector &output_columns, - const std::vector &project_columns, const std::shared_ptr &cache, - std::vector> callbacks) { - auto ds = std::make_shared(input->IRNode(), operations, input_columns, output_columns, project_columns, - cache, callbacks); + const std::vector> &input_columns, + const std::vector> &output_columns, + const std::vector> &project_columns, + const std::shared_ptr &cache, std::vector> callbacks) { + auto ds = std::make_shared(input->IRNode(), operations, VectorCharToString(input_columns), + VectorCharToString(output_columns), VectorCharToString(project_columns), cache, + callbacks); ir_node_ = std::static_pointer_cast(ds); } -ProjectDataset::ProjectDataset(std::shared_ptr input, const std::vector &columns) { - auto ds = std::make_shared(input->IRNode(), columns); +ProjectDataset::ProjectDataset(std::shared_ptr input, const std::vector> &columns) { + auto ds = std::make_shared(input->IRNode(), VectorCharToString(columns)); ir_node_ = std::static_pointer_cast(ds); } #ifndef ENABLE_ANDROID -RenameDataset::RenameDataset(std::shared_ptr input, const std::vector &input_columns, - const std::vector &output_columns) { - auto ds = std::make_shared(input->IRNode(), input_columns, output_columns); +RenameDataset::RenameDataset(std::shared_ptr input, const std::vector> &input_columns, + const std::vector> &output_columns) { + auto ds = std::make_shared(input->IRNode(), VectorCharToString(input_columns), + VectorCharToString(output_columns)); ir_node_ = std::static_pointer_cast(ds); } @@ -749,12 +422,12 @@ std::shared_ptr Dataset::SetNumWorkers(int32_t num_workers) { } #ifndef ENABLE_ANDROID -std::shared_ptr Dataset::BuildSentencePieceVocab( - const std::vector &col_names, int32_t vocab_size, float character_coverage, - SentencePieceModel model_type, const std::unordered_map ¶ms) { +std::shared_ptr Dataset::BuildSentencePieceVocabCharIF( + const std::vector> &col_names, int32_t vocab_size, float character_coverage, + SentencePieceModel model_type, const std::map, std::vector> ¶ms) { auto vocab = std::make_shared(); - auto ds = std::make_shared(IRNode(), vocab, col_names, vocab_size, character_coverage, - model_type, params); + auto ds = std::make_shared(IRNode(), vocab, VectorCharToString(col_names), vocab_size, + character_coverage, model_type, UnorderedMapCharToString(params)); std::unique_ptr runtime_context = std::make_unique(); Status rc = runtime_context->Init(); @@ -781,12 +454,13 @@ std::shared_ptr Dataset::BuildSentencePieceVocab( return vocab; } -std::shared_ptr Dataset::BuildVocab(const std::vector &columns, - const std::pair &freq_range, int64_t top_k, - const std::vector &special_tokens, bool special_first) { +std::shared_ptr Dataset::BuildVocabCharIF(const std::vector> &columns, + const std::pair &freq_range, int64_t top_k, + const std::vector> &special_tokens, + bool special_first) { auto vocab = std::make_shared(); - auto ds = - std::make_shared(IRNode(), vocab, columns, freq_range, top_k, special_tokens, special_first); + auto ds = std::make_shared(IRNode(), vocab, VectorCharToString(columns), freq_range, top_k, + VectorCharToString(special_tokens), special_first); std::unique_ptr runtime_context = std::make_unique(); Status rc = runtime_context->Init(); @@ -818,18 +492,29 @@ std::shared_ptr Dataset::Batch(int32_t batch_size, bool drop_remai return std::make_shared(shared_from_this(), batch_size, drop_remainder); } -SchemaObj::SchemaObj(const std::string &schema_file) : schema_file_(schema_file), num_rows_(0), dataset_type_("") {} +struct SchemaObj::Data { + int32_t num_rows_; + std::string dataset_type_; + std::string schema_file_; + nlohmann::json columns_; +}; + +SchemaObj::SchemaObj(const std::vector &schema_file) : data_(std::make_shared()) { + data_->schema_file_ = CharToString(schema_file); + data_->dataset_type_ = ""; + data_->num_rows_ = 0; +} // SchemaObj Init function Status SchemaObj::Init() { - if (!schema_file_.empty()) { - Path schema_file(schema_file_); + if (!data_->schema_file_.empty()) { + Path schema_file(data_->schema_file_); CHECK_FAIL_RETURN_UNEXPECTED(schema_file.Exists(), - "The file " + schema_file_ + " does not exist or permission denied!"); + "The file " + data_->schema_file_ + " does not exist or permission denied!"); nlohmann::json js; try { - std::ifstream in(schema_file_); + std::ifstream in(data_->schema_file_); in >> js; CHECK_FAIL_RETURN_UNEXPECTED(js.find("columns") != js.end(), "\"columns\" node is required in the schema json file."); @@ -843,65 +528,73 @@ Status SchemaObj::Init() { } // Function to add a column to schema with a mstype de_type and known shape -Status SchemaObj::add_column(const std::string &name, TypeId de_type, const std::vector &shape) { +Status SchemaObj::add_column_char(const std::vector &name, TypeId de_type, const std::vector &shape) { DataType data_type = dataset::MSTypeToDEType(de_type); - return add_column(name, data_type.ToString(), shape); + return add_column_char(name, StringToChar(data_type.ToString()), shape); } // Function to add a column to schema with a string de_type and known shape -Status SchemaObj::add_column(const std::string &name, const std::string &de_type, const std::vector &shape) { - DataType data_type(de_type); +Status SchemaObj::add_column_char(const std::vector &name, const std::vector &de_type, + const std::vector &shape) { + DataType data_type(CharToString(de_type)); CHECK_FAIL_RETURN_UNEXPECTED(data_type != DataType::DE_UNKNOWN, "Type is unknown."); nlohmann::json new_column; - new_column["name"] = name; + new_column["name"] = CharToString(name); new_column["type"] = data_type.ToString(); new_column["shape"] = shape; new_column["rank"] = shape.size(); - columns_.push_back(new_column); + data_->columns_.push_back(new_column); return Status::OK(); } // Function to add a column to schema with a mstype de_type and without shape -Status SchemaObj::add_column(const std::string &name, TypeId de_type) { +Status SchemaObj::add_column_char(const std::vector &name, TypeId de_type) { DataType data_type = dataset::MSTypeToDEType(de_type); - return add_column(name, data_type.ToString()); + return add_column_char(name, StringToChar(data_type.ToString())); } // Function to add a column to schema with a string de_type and without shape -Status SchemaObj::add_column(const std::string &name, const std::string &de_type) { - DataType data_type(de_type); +Status SchemaObj::add_column_char(const std::vector &name, const std::vector &de_type) { + DataType data_type(CharToString(de_type)); CHECK_FAIL_RETURN_UNEXPECTED(data_type != DataType::DE_UNKNOWN, "Type is unknown."); nlohmann::json new_column; - new_column["name"] = name; + new_column["name"] = CharToString(name); new_column["type"] = data_type.ToString(); new_column["rank"] = 1; - columns_.push_back(new_column); + data_->columns_.push_back(new_column); return Status::OK(); } -std::string SchemaObj::to_json() { +const std::vector SchemaObj::to_json_char() { nlohmann::json json_file; - json_file["columns"] = columns_; - if (dataset_type_ != "") { - json_file["datasetType"] = dataset_type_; + json_file["columns"] = data_->columns_; + std::string str_dataset_type_(data_->dataset_type_); + if (str_dataset_type_ != "") { + json_file["datasetType"] = str_dataset_type_; } - if (num_rows_ > 0) { - json_file["numRows"] = num_rows_; + if (data_->num_rows_ > 0) { + json_file["numRows"] = data_->num_rows_; } - return json_file.dump(2); + return StringToChar(json_file.dump(2)); } +void SchemaObj::set_dataset_type(std::string dataset_type) { data_->dataset_type_ = dataset_type.data(); } + +void SchemaObj::set_num_rows(int32_t num_rows) { data_->num_rows_ = num_rows; } + +int32_t SchemaObj::get_num_rows() const { return data_->num_rows_; } + Status SchemaObj::parse_column(nlohmann::json columns) { std::string name, de_type; std::vector shape; - columns_.clear(); + data_->columns_.clear(); if (columns.type() == nlohmann::json::value_t::array) { // reference to python list for (auto column : columns) { @@ -950,28 +643,29 @@ Status SchemaObj::parse_column(nlohmann::json columns) { Status SchemaObj::from_json(nlohmann::json json_obj) { for (const auto &it_child : json_obj.items()) { if (it_child.key() == "datasetType") { - dataset_type_ = it_child.value(); + std::string str_dataset_type_ = it_child.value(); + data_->dataset_type_ = str_dataset_type_.data(); } else if (it_child.key() == "numRows") { - num_rows_ = it_child.value(); + data_->num_rows_ = it_child.value(); } else if (it_child.key() == "columns") { RETURN_IF_NOT_OK(parse_column(it_child.value())); } else { RETURN_STATUS_SYNTAX_ERROR("Unknown field " + it_child.key()); } } - if (columns_.empty()) { + if (data_->columns_.empty()) { RETURN_STATUS_SYNTAX_ERROR("Columns are missing."); } - if (num_rows_ < 0) { + if (data_->num_rows_ < 0) { RETURN_STATUS_SYNTAX_ERROR("numRows must be greater than or equal to 0"); } return Status::OK(); } -Status SchemaObj::FromJSONString(const std::string &json_string) { +Status SchemaObj::FromJSONStringCharIF(const std::vector &json_string) { try { - nlohmann::json js = nlohmann::json::parse(json_string); + nlohmann::json js = nlohmann::json::parse(CharToString(json_string)); CHECK_FAIL_RETURN_UNEXPECTED(js.find("columns") != js.end(), "\"columns\" node is required in the schema json JSON."); RETURN_IF_NOT_OK(from_json(js)); @@ -983,9 +677,9 @@ Status SchemaObj::FromJSONString(const std::string &json_string) { return Status::OK(); } -Status SchemaObj::ParseColumnString(const std::string &json_string) { +Status SchemaObj::ParseColumnStringCharIF(const std::vector &json_string) { try { - nlohmann::json js = nlohmann::json::parse(json_string); + nlohmann::json js = nlohmann::json::parse(CharToString(json_string)); RETURN_IF_NOT_OK(parse_column(js)); } catch (const std::exception &err) { std::string err_msg = "ParseColumnString: JSON string failed to parse: "; @@ -999,322 +693,363 @@ Status SchemaObj::ParseColumnString(const std::string &json_string) { #ifndef ENABLE_ANDROID -std::shared_ptr CreateDatasetCache(session_id_type id, uint64_t mem_sz, bool spill, - std::optional hostname, std::optional port, - std::optional num_connections, - std::optional prefetch_sz) { +std::shared_ptr CreateDatasetCacheCharIF(session_id_type id, uint64_t mem_sz, bool spill, + std::optional> hostname, + std::optional port, + std::optional num_connections, + std::optional prefetch_sz) { auto cache = std::make_shared(id, mem_sz, spill, hostname, port, num_connections, prefetch_sz); return cache; } #endif -AlbumDataset::AlbumDataset(const std::string &dataset_dir, const std::string &data_schema, - const std::vector &column_names, bool decode, +AlbumDataset::AlbumDataset(const std::vector &dataset_dir, const std::vector &data_schema, + const std::vector> &column_names, bool decode, const std::shared_ptr &sampler, const std::shared_ptr &cache) { auto sampler_obj = sampler ? sampler->Parse() : nullptr; - auto ds = std::make_shared(dataset_dir, data_schema, column_names, decode, sampler_obj, cache); + auto ds = std::make_shared(CharToString(dataset_dir), CharToString(data_schema), + VectorCharToString(column_names), decode, sampler_obj, cache); ir_node_ = std::static_pointer_cast(ds); } -AlbumDataset::AlbumDataset(const std::string &dataset_dir, const std::string &data_schema, - const std::vector &column_names, bool decode, Sampler *sampler, +AlbumDataset::AlbumDataset(const std::vector &dataset_dir, const std::vector &data_schema, + const std::vector> &column_names, bool decode, Sampler *sampler, const std::shared_ptr &cache) { auto sampler_obj = sampler ? sampler->Parse() : nullptr; - auto ds = std::make_shared(dataset_dir, data_schema, column_names, decode, sampler_obj, cache); + auto ds = std::make_shared(CharToString(dataset_dir), CharToString(data_schema), + VectorCharToString(column_names), decode, sampler_obj, cache); ir_node_ = std::static_pointer_cast(ds); } -AlbumDataset::AlbumDataset(const std::string &dataset_dir, const std::string &data_schema, - const std::vector &column_names, bool decode, +AlbumDataset::AlbumDataset(const std::vector &dataset_dir, const std::vector &data_schema, + const std::vector> &column_names, bool decode, const std::reference_wrapper sampler, const std::shared_ptr &cache) { auto sampler_obj = sampler.get().Parse(); - auto ds = std::make_shared(dataset_dir, data_schema, column_names, decode, sampler_obj, cache); + auto ds = std::make_shared(CharToString(dataset_dir), CharToString(data_schema), + VectorCharToString(column_names), decode, sampler_obj, cache); ir_node_ = std::static_pointer_cast(ds); } #ifndef ENABLE_ANDROID -CelebADataset::CelebADataset(const std::string &dataset_dir, const std::string &usage, +CelebADataset::CelebADataset(const std::vector &dataset_dir, const std::vector &usage, const std::shared_ptr &sampler, bool decode, - const std::set &extensions, const std::shared_ptr &cache) { + const std::set> &extensions, + const std::shared_ptr &cache) { auto sampler_obj = sampler ? sampler->Parse() : nullptr; - auto ds = std::make_shared(dataset_dir, usage, sampler_obj, decode, extensions, cache); + auto ds = std::make_shared(CharToString(dataset_dir), CharToString(usage), sampler_obj, decode, + SetCharToString(extensions), cache); ir_node_ = std::static_pointer_cast(ds); } -CelebADataset::CelebADataset(const std::string &dataset_dir, const std::string &usage, Sampler *sampler, bool decode, - const std::set &extensions, const std::shared_ptr &cache) { +CelebADataset::CelebADataset(const std::vector &dataset_dir, const std::vector &usage, Sampler *sampler, + bool decode, const std::set> &extensions, + const std::shared_ptr &cache) { auto sampler_obj = sampler ? sampler->Parse() : nullptr; - auto ds = std::make_shared(dataset_dir, usage, sampler_obj, decode, extensions, cache); + auto ds = std::make_shared(CharToString(dataset_dir), CharToString(usage), sampler_obj, decode, + SetCharToString(extensions), cache); ir_node_ = std::static_pointer_cast(ds); } -CelebADataset::CelebADataset(const std::string &dataset_dir, const std::string &usage, +CelebADataset::CelebADataset(const std::vector &dataset_dir, const std::vector &usage, const std::reference_wrapper sampler, bool decode, - const std::set &extensions, const std::shared_ptr &cache) { + const std::set> &extensions, + const std::shared_ptr &cache) { auto sampler_obj = sampler.get().Parse(); - auto ds = std::make_shared(dataset_dir, usage, sampler_obj, decode, extensions, cache); + auto ds = std::make_shared(CharToString(dataset_dir), CharToString(usage), sampler_obj, decode, + SetCharToString(extensions), cache); ir_node_ = std::static_pointer_cast(ds); } -Cifar10Dataset::Cifar10Dataset(const std::string &dataset_dir, const std::string &usage, +Cifar10Dataset::Cifar10Dataset(const std::vector &dataset_dir, const std::vector &usage, const std::shared_ptr &sampler, const std::shared_ptr &cache) { auto sampler_obj = sampler ? sampler->Parse() : nullptr; - auto ds = std::make_shared(dataset_dir, usage, sampler_obj, cache); + auto ds = std::make_shared(CharToString(dataset_dir), CharToString(usage), sampler_obj, cache); ir_node_ = std::static_pointer_cast(ds); } -Cifar10Dataset::Cifar10Dataset(const std::string &dataset_dir, const std::string &usage, Sampler *sampler, +Cifar10Dataset::Cifar10Dataset(const std::vector &dataset_dir, const std::vector &usage, Sampler *sampler, const std::shared_ptr &cache) { auto sampler_obj = sampler ? sampler->Parse() : nullptr; - auto ds = std::make_shared(dataset_dir, usage, sampler_obj, cache); + auto ds = std::make_shared(CharToString(dataset_dir), CharToString(usage), sampler_obj, cache); ir_node_ = std::static_pointer_cast(ds); } -Cifar10Dataset::Cifar10Dataset(const std::string &dataset_dir, const std::string &usage, +Cifar10Dataset::Cifar10Dataset(const std::vector &dataset_dir, const std::vector &usage, const std::reference_wrapper sampler, const std::shared_ptr &cache) { auto sampler_obj = sampler.get().Parse(); - auto ds = std::make_shared(dataset_dir, usage, sampler_obj, cache); + auto ds = std::make_shared(CharToString(dataset_dir), CharToString(usage), sampler_obj, cache); ir_node_ = std::static_pointer_cast(ds); } -Cifar100Dataset::Cifar100Dataset(const std::string &dataset_dir, const std::string &usage, +Cifar100Dataset::Cifar100Dataset(const std::vector &dataset_dir, const std::vector &usage, const std::shared_ptr &sampler, const std::shared_ptr &cache) { auto sampler_obj = sampler ? sampler->Parse() : nullptr; - auto ds = std::make_shared(dataset_dir, usage, sampler_obj, cache); + auto ds = std::make_shared(CharToString(dataset_dir), CharToString(usage), sampler_obj, cache); ir_node_ = std::static_pointer_cast(ds); } -Cifar100Dataset::Cifar100Dataset(const std::string &dataset_dir, const std::string &usage, Sampler *sampler, +Cifar100Dataset::Cifar100Dataset(const std::vector &dataset_dir, const std::vector &usage, Sampler *sampler, const std::shared_ptr &cache) { auto sampler_obj = sampler ? sampler->Parse() : nullptr; - auto ds = std::make_shared(dataset_dir, usage, sampler_obj, cache); + auto ds = std::make_shared(CharToString(dataset_dir), CharToString(usage), sampler_obj, cache); ir_node_ = std::static_pointer_cast(ds); } -Cifar100Dataset::Cifar100Dataset(const std::string &dataset_dir, const std::string &usage, +Cifar100Dataset::Cifar100Dataset(const std::vector &dataset_dir, const std::vector &usage, const std::reference_wrapper sampler, const std::shared_ptr &cache) { auto sampler_obj = sampler.get().Parse(); - auto ds = std::make_shared(dataset_dir, usage, sampler_obj, cache); + auto ds = std::make_shared(CharToString(dataset_dir), CharToString(usage), sampler_obj, cache); ir_node_ = std::static_pointer_cast(ds); } -CLUEDataset::CLUEDataset(const std::vector &dataset_files, const std::string &task, - const std::string &usage, int64_t num_samples, ShuffleMode shuffle, int32_t num_shards, +CLUEDataset::CLUEDataset(const std::vector> &dataset_files, const std::vector &task, + const std::vector &usage, int64_t num_samples, ShuffleMode shuffle, int32_t num_shards, int32_t shard_id, const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_files, task, usage, num_samples, shuffle, num_shards, shard_id, cache); + auto ds = std::make_shared(VectorCharToString(dataset_files), CharToString(task), CharToString(usage), + num_samples, shuffle, num_shards, shard_id, cache); ir_node_ = std::static_pointer_cast(ds); } -CocoDataset::CocoDataset(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task, - const bool &decode, const std::shared_ptr &sampler, +CocoDataset::CocoDataset(const std::vector &dataset_dir, const std::vector &annotation_file, + const std::vector &task, const bool &decode, const std::shared_ptr &sampler, const std::shared_ptr &cache) { auto sampler_obj = sampler ? sampler->Parse() : nullptr; - auto ds = std::make_shared(dataset_dir, annotation_file, task, decode, sampler_obj, cache); + auto ds = std::make_shared(CharToString(dataset_dir), CharToString(annotation_file), CharToString(task), + decode, sampler_obj, cache); ir_node_ = std::static_pointer_cast(ds); } -CocoDataset::CocoDataset(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task, - const bool &decode, Sampler *sampler, const std::shared_ptr &cache) { - auto sampler_obj = sampler ? sampler->Parse() : nullptr; - auto ds = std::make_shared(dataset_dir, annotation_file, task, decode, sampler_obj, cache); - ir_node_ = std::static_pointer_cast(ds); -} -CocoDataset::CocoDataset(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task, - const bool &decode, const std::reference_wrapper sampler, +CocoDataset::CocoDataset(const std::vector &dataset_dir, const std::vector &annotation_file, + const std::vector &task, const bool &decode, Sampler *sampler, const std::shared_ptr &cache) { + auto sampler_obj = sampler ? sampler->Parse() : nullptr; + auto ds = std::make_shared(CharToString(dataset_dir), CharToString(annotation_file), CharToString(task), + decode, sampler_obj, cache); + ir_node_ = std::static_pointer_cast(ds); +} +CocoDataset::CocoDataset(const std::vector &dataset_dir, const std::vector &annotation_file, + const std::vector &task, const bool &decode, + const std::reference_wrapper sampler, const std::shared_ptr &cache) { auto sampler_obj = sampler.get().Parse(); - auto ds = std::make_shared(dataset_dir, annotation_file, task, decode, sampler_obj, cache); + auto ds = std::make_shared(CharToString(dataset_dir), CharToString(annotation_file), CharToString(task), + decode, sampler_obj, cache); ir_node_ = std::static_pointer_cast(ds); } -CSVDataset::CSVDataset(const std::vector &dataset_files, char field_delim, +CSVDataset::CSVDataset(const std::vector> &dataset_files, char field_delim, const std::vector> &column_defaults, - const std::vector &column_names, int64_t num_samples, ShuffleMode shuffle, + const std::vector> &column_names, int64_t num_samples, ShuffleMode shuffle, int32_t num_shards, int32_t shard_id, const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_files, field_delim, column_defaults, column_names, num_samples, shuffle, - num_shards, shard_id, cache); + auto ds = + std::make_shared(VectorCharToString(dataset_files), field_delim, column_defaults, + VectorCharToString(column_names), num_samples, shuffle, num_shards, shard_id, cache); ir_node_ = std::static_pointer_cast(ds); } -ImageFolderDataset::ImageFolderDataset(const std::string &dataset_dir, bool decode, - const std::shared_ptr &sampler, const std::set &extensions, - const std::map &class_indexing, +ImageFolderDataset::ImageFolderDataset(const std::vector &dataset_dir, bool decode, + const std::shared_ptr &sampler, + const std::set> &extensions, + const std::map, int32_t> &class_indexing, const std::shared_ptr &cache) { // This arg exists in ImageFolderOp, but not externalized (in Python API). The default value is false. bool recursive = false; // Create logical representation of ImageFolderDataset. auto sampler_obj = sampler ? sampler->Parse() : nullptr; - auto ds = - std::make_shared(dataset_dir, decode, sampler_obj, recursive, extensions, class_indexing, cache); + auto ds = std::make_shared(CharToString(dataset_dir), decode, sampler_obj, recursive, + SetCharToString(extensions), MapCharToString(class_indexing), cache); ir_node_ = std::static_pointer_cast(ds); } -ImageFolderDataset::ImageFolderDataset(const std::string &dataset_dir, bool decode, Sampler *sampler, - const std::set &extensions, - const std::map &class_indexing, + +ImageFolderDataset::ImageFolderDataset(const std::vector &dataset_dir, bool decode, Sampler *sampler, + const std::set> &extensions, + const std::map, int32_t> &class_indexing, const std::shared_ptr &cache) { // This arg exists in ImageFolderOp, but not externalized (in Python API). The default value is false. bool recursive = false; // Create logical representation of ImageFolderDataset. auto sampler_obj = sampler ? sampler->Parse() : nullptr; - auto ds = - std::make_shared(dataset_dir, decode, sampler_obj, recursive, extensions, class_indexing, cache); + auto ds = std::make_shared(CharToString(dataset_dir), decode, sampler_obj, recursive, + SetCharToString(extensions), MapCharToString(class_indexing), cache); ir_node_ = std::static_pointer_cast(ds); } -ImageFolderDataset::ImageFolderDataset(const std::string &dataset_dir, bool decode, + +ImageFolderDataset::ImageFolderDataset(const std::vector &dataset_dir, bool decode, const std::reference_wrapper sampler, - const std::set &extensions, - const std::map &class_indexing, + const std::set> &extensions, + const std::map, int32_t> &class_indexing, const std::shared_ptr &cache) { // This arg exists in ImageFolderOp, but not externalized (in Python API). The default value is false. bool recursive = false; // Create logical representation of ImageFolderDataset. auto sampler_obj = sampler.get().Parse(); - auto ds = - std::make_shared(dataset_dir, decode, sampler_obj, recursive, extensions, class_indexing, cache); + auto ds = std::make_shared(CharToString(dataset_dir), decode, sampler_obj, recursive, + SetCharToString(extensions), MapCharToString(class_indexing), cache); ir_node_ = std::static_pointer_cast(ds); } -ManifestDataset::ManifestDataset(const std::string &dataset_file, const std::string &usage, +ManifestDataset::ManifestDataset(const std::vector &dataset_file, const std::vector &usage, const std::shared_ptr &sampler, - const std::map &class_indexing, bool decode, + const std::map, int32_t> &class_indexing, bool decode, const std::shared_ptr &cache) { auto sampler_obj = sampler ? sampler->Parse() : nullptr; - auto ds = std::make_shared(dataset_file, usage, sampler_obj, class_indexing, decode, cache); + auto ds = std::make_shared(CharToString(dataset_file), CharToString(usage), sampler_obj, + MapCharToString(class_indexing), decode, cache); ir_node_ = std::static_pointer_cast(ds); } -ManifestDataset::ManifestDataset(const std::string &dataset_file, const std::string &usage, Sampler *sampler, - const std::map &class_indexing, bool decode, - const std::shared_ptr &cache) { +ManifestDataset::ManifestDataset(const std::vector &dataset_file, const std::vector &usage, + Sampler *sampler, const std::map, int32_t> &class_indexing, + bool decode, const std::shared_ptr &cache) { auto sampler_obj = sampler ? sampler->Parse() : nullptr; - auto ds = std::make_shared(dataset_file, usage, sampler_obj, class_indexing, decode, cache); + auto ds = std::make_shared(CharToString(dataset_file), CharToString(usage), sampler_obj, + MapCharToString(class_indexing), decode, cache); ir_node_ = std::static_pointer_cast(ds); } -ManifestDataset::ManifestDataset(const std::string &dataset_file, const std::string &usage, +ManifestDataset::ManifestDataset(const std::vector &dataset_file, const std::vector &usage, const std::reference_wrapper sampler, - const std::map &class_indexing, bool decode, + const std::map, int32_t> &class_indexing, bool decode, const std::shared_ptr &cache) { auto sampler_obj = sampler.get().Parse(); - auto ds = std::make_shared(dataset_file, usage, sampler_obj, class_indexing, decode, cache); + auto ds = std::make_shared(CharToString(dataset_file), CharToString(usage), sampler_obj, + MapCharToString(class_indexing), decode, cache); ir_node_ = std::static_pointer_cast(ds); } -MindDataDataset::MindDataDataset(const std::string &dataset_file, const std::vector &columns_list, +MindDataDataset::MindDataDataset(const std::vector &dataset_file, + const std::vector> &columns_list, const std::shared_ptr &sampler, nlohmann::json padded_sample, int64_t num_padded) { auto sampler_obj = sampler ? sampler->Parse() : nullptr; - auto ds = std::make_shared(dataset_file, columns_list, sampler_obj, padded_sample, num_padded); + auto ds = std::make_shared(CharToString(dataset_file), VectorCharToString(columns_list), sampler_obj, + padded_sample, num_padded); ir_node_ = std::static_pointer_cast(ds); } -MindDataDataset::MindDataDataset(const std::string &dataset_file, const std::vector &columns_list, - Sampler *sampler, nlohmann::json padded_sample, int64_t num_padded) { +MindDataDataset::MindDataDataset(const std::vector &dataset_file, + const std::vector> &columns_list, Sampler *sampler, + nlohmann::json padded_sample, int64_t num_padded) { auto sampler_obj = sampler ? sampler->Parse() : nullptr; - auto ds = std::make_shared(dataset_file, columns_list, sampler_obj, padded_sample, num_padded); + auto ds = std::make_shared(CharToString(dataset_file), VectorCharToString(columns_list), sampler_obj, + padded_sample, num_padded); ir_node_ = std::static_pointer_cast(ds); } -MindDataDataset::MindDataDataset(const std::string &dataset_file, const std::vector &columns_list, +MindDataDataset::MindDataDataset(const std::vector &dataset_file, + const std::vector> &columns_list, const std::reference_wrapper sampler, nlohmann::json padded_sample, int64_t num_padded) { auto sampler_obj = sampler.get().Parse(); - auto ds = std::make_shared(dataset_file, columns_list, sampler_obj, padded_sample, num_padded); + auto ds = std::make_shared(CharToString(dataset_file), VectorCharToString(columns_list), sampler_obj, + padded_sample, num_padded); ir_node_ = std::static_pointer_cast(ds); } -MindDataDataset::MindDataDataset(const std::vector &dataset_files, - const std::vector &columns_list, const std::shared_ptr &sampler, +MindDataDataset::MindDataDataset(const std::vector> &dataset_files, + const std::vector> &columns_list, + const std::shared_ptr &sampler, nlohmann::json padded_sample, + int64_t num_padded) { + auto sampler_obj = sampler ? sampler->Parse() : nullptr; + auto ds = std::make_shared(VectorCharToString(dataset_files), VectorCharToString(columns_list), + sampler_obj, padded_sample, num_padded); + ir_node_ = std::static_pointer_cast(ds); +} +MindDataDataset::MindDataDataset(const std::vector> &dataset_files, + const std::vector> &columns_list, Sampler *sampler, nlohmann::json padded_sample, int64_t num_padded) { auto sampler_obj = sampler ? sampler->Parse() : nullptr; - auto ds = std::make_shared(dataset_files, columns_list, sampler_obj, padded_sample, num_padded); + auto ds = std::make_shared(VectorCharToString(dataset_files), VectorCharToString(columns_list), + sampler_obj, padded_sample, num_padded); ir_node_ = std::static_pointer_cast(ds); } -MindDataDataset::MindDataDataset(const std::vector &dataset_files, - const std::vector &columns_list, Sampler *sampler, - nlohmann::json padded_sample, int64_t num_padded) { - auto sampler_obj = sampler ? sampler->Parse() : nullptr; - auto ds = std::make_shared(dataset_files, columns_list, sampler_obj, padded_sample, num_padded); - ir_node_ = std::static_pointer_cast(ds); -} -MindDataDataset::MindDataDataset(const std::vector &dataset_files, - const std::vector &columns_list, +MindDataDataset::MindDataDataset(const std::vector> &dataset_files, + const std::vector> &columns_list, const std::reference_wrapper sampler, nlohmann::json padded_sample, int64_t num_padded) { auto sampler_obj = sampler.get().Parse(); - auto ds = std::make_shared(dataset_files, columns_list, sampler_obj, padded_sample, num_padded); + auto ds = std::make_shared(VectorCharToString(dataset_files), VectorCharToString(columns_list), + sampler_obj, padded_sample, num_padded); ir_node_ = std::static_pointer_cast(ds); } #endif -MnistDataset::MnistDataset(const std::string &dataset_dir, const std::string &usage, +MnistDataset::MnistDataset(const std::vector &dataset_dir, const std::vector &usage, const std::shared_ptr &sampler, const std::shared_ptr &cache) { auto sampler_obj = sampler ? sampler->Parse() : nullptr; - auto ds = std::make_shared(dataset_dir, usage, sampler_obj, cache); + auto ds = std::make_shared(CharToString(dataset_dir), CharToString(usage), sampler_obj, cache); ir_node_ = std::static_pointer_cast(ds); } -MnistDataset::MnistDataset(const std::string &dataset_dir, const std::string &usage, Sampler *sampler, +MnistDataset::MnistDataset(const std::vector &dataset_dir, const std::vector &usage, Sampler *sampler, const std::shared_ptr &cache) { auto sampler_obj = sampler ? sampler->Parse() : nullptr; - auto ds = std::make_shared(dataset_dir, usage, sampler_obj, cache); + auto ds = std::make_shared(CharToString(dataset_dir), CharToString(usage), sampler_obj, cache); ir_node_ = std::static_pointer_cast(ds); } -MnistDataset::MnistDataset(const std::string &dataset_dir, const std::string &usage, +MnistDataset::MnistDataset(const std::vector &dataset_dir, const std::vector &usage, const std::reference_wrapper sampler, const std::shared_ptr &cache) { auto sampler_obj = sampler.get().Parse(); - auto ds = std::make_shared(dataset_dir, usage, sampler_obj, cache); + auto ds = std::make_shared(CharToString(dataset_dir), CharToString(usage), sampler_obj, cache); ir_node_ = std::static_pointer_cast(ds); } #ifndef ENABLE_ANDROID -TextFileDataset::TextFileDataset(const std::vector &dataset_files, int64_t num_samples, +TextFileDataset::TextFileDataset(const std::vector> &dataset_files, int64_t num_samples, ShuffleMode shuffle, int32_t num_shards, int32_t shard_id, const std::shared_ptr &cache) { - auto ds = std::make_shared(dataset_files, num_samples, shuffle, num_shards, shard_id, cache); + auto ds = std::make_shared(VectorCharToString(dataset_files), num_samples, shuffle, num_shards, + shard_id, cache); ir_node_ = std::static_pointer_cast(ds); } -VOCDataset::VOCDataset(const std::string &dataset_dir, const std::string &task, const std::string &usage, - const std::map &class_indexing, bool decode, - const std::shared_ptr &sampler, const std::shared_ptr &cache) { - auto sampler_obj = sampler ? sampler->Parse() : nullptr; - auto ds = std::make_shared(dataset_dir, task, usage, class_indexing, decode, sampler_obj, cache); - ir_node_ = std::static_pointer_cast(ds); -} -VOCDataset::VOCDataset(const std::string &dataset_dir, const std::string &task, const std::string &usage, - const std::map &class_indexing, bool decode, Sampler *sampler, +VOCDataset::VOCDataset(const std::vector &dataset_dir, const std::vector &task, + const std::vector &usage, const std::map, int32_t> &class_indexing, + bool decode, const std::shared_ptr &sampler, const std::shared_ptr &cache) { auto sampler_obj = sampler ? sampler->Parse() : nullptr; - auto ds = std::make_shared(dataset_dir, task, usage, class_indexing, decode, sampler_obj, cache); + auto ds = std::make_shared(CharToString(dataset_dir), CharToString(task), CharToString(usage), + MapCharToString(class_indexing), decode, sampler_obj, cache); ir_node_ = std::static_pointer_cast(ds); } -VOCDataset::VOCDataset(const std::string &dataset_dir, const std::string &task, const std::string &usage, - const std::map &class_indexing, bool decode, - const std::reference_wrapper sampler, const std::shared_ptr &cache) { +VOCDataset::VOCDataset(const std::vector &dataset_dir, const std::vector &task, + const std::vector &usage, const std::map, int32_t> &class_indexing, + bool decode, Sampler *sampler, const std::shared_ptr &cache) { + auto sampler_obj = sampler ? sampler->Parse() : nullptr; + auto ds = std::make_shared(CharToString(dataset_dir), CharToString(task), CharToString(usage), + MapCharToString(class_indexing), decode, sampler_obj, cache); + ir_node_ = std::static_pointer_cast(ds); +} +VOCDataset::VOCDataset(const std::vector &dataset_dir, const std::vector &task, + const std::vector &usage, const std::map, int32_t> &class_indexing, + bool decode, const std::reference_wrapper sampler, + const std::shared_ptr &cache) { auto sampler_obj = sampler.get().Parse(); - auto ds = std::make_shared(dataset_dir, task, usage, class_indexing, decode, sampler_obj, cache); + auto ds = std::make_shared(CharToString(dataset_dir), CharToString(task), CharToString(usage), + MapCharToString(class_indexing), decode, sampler_obj, cache); ir_node_ = std::static_pointer_cast(ds); -} +} // namespace dataset RandomDataDataset::RandomDataDataset(const int32_t &total_rows, std::shared_ptr schema, - const std::vector &columns_list, + const std::vector> &columns_list, std::shared_ptr cache) { - auto ds = std::make_shared(total_rows, std::move(schema), std::move(columns_list), cache); + auto ds = std::make_shared(total_rows, std::move(schema), VectorCharToString(columns_list), cache); ir_node_ = std::static_pointer_cast(ds); } -RandomDataDataset::RandomDataDataset(const int32_t &total_rows, std::string schema_path, - const std::vector &columns_list, +RandomDataDataset::RandomDataDataset(const int32_t &total_rows, const std::vector &schema_path, + const std::vector> &columns_list, std::shared_ptr cache) { - auto ds = std::make_shared(total_rows, std::move(schema_path), std::move(columns_list), cache); + auto ds = + std::make_shared(total_rows, CharToString(schema_path), VectorCharToString(columns_list), cache); ir_node_ = std::static_pointer_cast(ds); } -TFRecordDataset::TFRecordDataset(const std::vector &dataset_files, std::string schema, - const std::vector &columns_list, int64_t num_samples, ShuffleMode shuffle, - int32_t num_shards, int32_t shard_id, bool shard_equal_rows, +TFRecordDataset::TFRecordDataset(const std::vector> &dataset_files, const std::vector &schema, + const std::vector> &columns_list, int64_t num_samples, + ShuffleMode shuffle, int32_t num_shards, int32_t shard_id, bool shard_equal_rows, std::shared_ptr cache) { - auto ds = std::make_shared(dataset_files, schema, columns_list, num_samples, shuffle, num_shards, - shard_id, shard_equal_rows, cache); + auto ds = std::make_shared(VectorCharToString(dataset_files), CharToString(schema), + VectorCharToString(columns_list), num_samples, shuffle, num_shards, shard_id, + shard_equal_rows, cache); ir_node_ = std::static_pointer_cast(ds); } -TFRecordDataset::TFRecordDataset(const std::vector &dataset_files, std::shared_ptr schema, - const std::vector &columns_list, int64_t num_samples, ShuffleMode shuffle, - int32_t num_shards, int32_t shard_id, bool shard_equal_rows, +TFRecordDataset::TFRecordDataset(const std::vector> &dataset_files, std::shared_ptr schema, + const std::vector> &columns_list, int64_t num_samples, + ShuffleMode shuffle, int32_t num_shards, int32_t shard_id, bool shard_equal_rows, std::shared_ptr cache) { - auto ds = std::make_shared(dataset_files, schema, columns_list, num_samples, shuffle, num_shards, - shard_id, shard_equal_rows, cache); + // std::cout << "SchemaObj.to_string2 " << schema->to_json() << std::endl; + auto ds = std::make_shared(VectorCharToString(dataset_files), schema, VectorCharToString(columns_list), + num_samples, shuffle, num_shards, shard_id, shard_equal_rows, cache); ir_node_ = std::static_pointer_cast(ds); } diff --git a/mindspore/ccsrc/minddata/dataset/api/iterator.cc b/mindspore/ccsrc/minddata/dataset/api/iterator.cc index 44cf9ea9311..5ba8e83f449 100644 --- a/mindspore/ccsrc/minddata/dataset/api/iterator.cc +++ b/mindspore/ccsrc/minddata/dataset/api/iterator.cc @@ -26,7 +26,7 @@ Iterator::Iterator() : consumer_(nullptr) {} Iterator::~Iterator() { Stop(); } // Get the next row from the data pipeline. -Status Iterator::GetNextRow(MSTensorMap *row) { +Status Iterator::GetNextRowCharIF(MSTensorMapChar *row) { // Clean data buffer row->clear(); std::unordered_map> md_map; @@ -38,7 +38,8 @@ Status Iterator::GetNextRow(MSTensorMap *row) { } for (auto de_tensor : md_map) { CHECK_FAIL_RETURN_UNEXPECTED(de_tensor.second->HasData(), "Apply transform failed, output tensor has no data"); - row->insert(std::make_pair(de_tensor.first, mindspore::MSTensor(std::make_shared(de_tensor.second)))); + std::vector col_name(de_tensor.first.begin(), de_tensor.first.end()); + row->insert(std::make_pair(col_name, mindspore::MSTensor(std::make_shared(de_tensor.second)))); } return Status::OK(); diff --git a/mindspore/ccsrc/minddata/dataset/api/text.cc b/mindspore/ccsrc/minddata/dataset/api/text.cc index 1ccf177fd5b..1918b79513f 100644 --- a/mindspore/ccsrc/minddata/dataset/api/text.cc +++ b/mindspore/ccsrc/minddata/dataset/api/text.cc @@ -31,38 +31,66 @@ namespace text { #ifndef _WIN32 // BasicTokenizer +struct BasicTokenizer::Data { + Data(bool lower_case, bool keep_whitespace, const NormalizeForm normalize_form, bool preserve_unused_token, + bool with_offsets) + : lower_case_(lower_case), + keep_whitespace_(keep_whitespace), + normalize_form_(normalize_form), + preserve_unused_token_(preserve_unused_token), + with_offsets_(with_offsets) {} + bool lower_case_; + bool keep_whitespace_; + NormalizeForm normalize_form_; + bool preserve_unused_token_; + bool with_offsets_; +}; + BasicTokenizer::BasicTokenizer(bool lower_case, bool keep_whitespace, const NormalizeForm normalize_form, bool preserve_unused_token, bool with_offsets) - : lower_case_(lower_case), - keep_whitespace_(keep_whitespace), - normalize_form_(normalize_form), - preserve_unused_token_(preserve_unused_token), - with_offsets_(with_offsets) {} + : data_(std::make_shared(lower_case, keep_whitespace, normalize_form, preserve_unused_token, with_offsets)) {} std::shared_ptr BasicTokenizer::Parse() { - return std::make_shared(lower_case_, keep_whitespace_, normalize_form_, - preserve_unused_token_, with_offsets_); + return std::make_shared(data_->lower_case_, data_->keep_whitespace_, data_->normalize_form_, + data_->preserve_unused_token_, data_->with_offsets_); } // BertTokenizer -BertTokenizer::BertTokenizer(const std::shared_ptr &vocab, const std::string &suffix_indicator, - int32_t max_bytes_per_token, const std::string &unknown_token, bool lower_case, +struct BertTokenizer::Data { + Data(const std::shared_ptr &vocab, const std::vector &suffix_indicator, int32_t max_bytes_per_token, + const std::vector &unknown_token, bool lower_case, bool keep_whitespace, + const NormalizeForm normalize_form, bool preserve_unused_token, bool with_offsets) + : vocab_(vocab), + suffix_indicator_(CharToString(suffix_indicator)), + max_bytes_per_token_(max_bytes_per_token), + unknown_token_(CharToString(unknown_token)), + lower_case_(lower_case), + keep_whitespace_(keep_whitespace), + normalize_form_(normalize_form), + preserve_unused_token_(preserve_unused_token), + with_offsets_(with_offsets) {} + std::shared_ptr vocab_; + std::string suffix_indicator_; + int32_t max_bytes_per_token_; + std::string unknown_token_; + bool lower_case_; + bool keep_whitespace_; + NormalizeForm normalize_form_; + bool preserve_unused_token_; + bool with_offsets_; +}; + +BertTokenizer::BertTokenizer(const std::shared_ptr &vocab, const std::vector &suffix_indicator, + int32_t max_bytes_per_token, const std::vector &unknown_token, bool lower_case, bool keep_whitespace, const NormalizeForm normalize_form, bool preserve_unused_token, bool with_offsets) - : vocab_(vocab), - suffix_indicator_(suffix_indicator), - max_bytes_per_token_(max_bytes_per_token), - unknown_token_(unknown_token), - lower_case_(lower_case), - keep_whitespace_(keep_whitespace), - normalize_form_(normalize_form), - preserve_unused_token_(preserve_unused_token), - with_offsets_(with_offsets) {} + : data_(std::make_shared(vocab, suffix_indicator, max_bytes_per_token, unknown_token, lower_case, + keep_whitespace, normalize_form, preserve_unused_token, with_offsets)) {} std::shared_ptr BertTokenizer::Parse() { - return std::make_shared(vocab_, suffix_indicator_, max_bytes_per_token_, unknown_token_, - lower_case_, keep_whitespace_, normalize_form_, - preserve_unused_token_, with_offsets_); + return std::make_shared( + data_->vocab_, data_->suffix_indicator_, data_->max_bytes_per_token_, data_->unknown_token_, data_->lower_case_, + data_->keep_whitespace_, data_->normalize_form_, data_->preserve_unused_token_, data_->with_offsets_); } // CaseFold @@ -72,14 +100,28 @@ std::shared_ptr CaseFold::Parse() { return std::make_shared &hmm_path, const std::vector &mp_path, const JiebaMode &mode, bool with_offsets) + : hmm_path_(CharToString(hmm_path)), + mp_path_(CharToString(mp_path)), + mode_(mode), + with_offsets_(with_offsets), + words_list_({}) {} + std::string hmm_path_; + std::string mp_path_; + JiebaMode mode_; + bool with_offsets_; + std::vector> words_list_; +}; + +JiebaTokenizer::JiebaTokenizer(const std::vector &hmm_path, const std::vector &mp_path, + const JiebaMode &mode, bool with_offsets) + : data_(std::make_shared(hmm_path, mp_path, mode, with_offsets)) {} std::shared_ptr JiebaTokenizer::Parse() { std::shared_ptr jieba_tokenizer = - std::make_shared(hmm_path_, mp_path_, mode_, with_offsets_); - for (auto &word : words_list_) { + std::make_shared(data_->hmm_path_, data_->mp_path_, data_->mode_, data_->with_offsets_); + for (auto &word : data_->words_list_) { Status rc = jieba_tokenizer->AddWord(word.first, word.second); if (rc.IsError()) { MS_LOG(ERROR) << rc; @@ -100,109 +142,199 @@ Status JiebaTokenizer::AddWord(const std::string &word, int64_t freq) { MS_LOG(ERROR) << err_msg; RETURN_STATUS_SYNTAX_ERROR(err_msg); } - words_list_.emplace_back(word, freq); + data_->words_list_.emplace_back(word, freq); return Status::OK(); } // Lookup -Lookup::Lookup(const std::shared_ptr &vocab, const std::optional &unknown_token, - const std::string &data_type) - : vocab_(vocab), unknown_token_(unknown_token), data_type_(data_type) {} +struct Lookup::Data { + Data(const std::shared_ptr &vocab, const std::optional> &unknown_token, + const std::vector &data_type) + : vocab_(vocab), unknown_token_(OptionalCharToString(unknown_token)), data_type_(CharToString(data_type)) {} + std::shared_ptr vocab_; + std::optional unknown_token_; + std::string data_type_; +}; + +Lookup::Lookup(const std::shared_ptr &vocab, const std::optional> &unknown_token, + const std::vector &data_type) + : data_(std::make_shared(vocab, unknown_token, data_type)) {} std::shared_ptr Lookup::Parse() { - return std::make_shared(vocab_, unknown_token_, data_type_); + return std::make_shared(data_->vocab_, data_->unknown_token_, data_->data_type_); } // Ngram -Ngram::Ngram(const std::vector &ngrams, const std::pair &left_pad, - const std::pair &right_pad, const std::string &separator) - : ngrams_(ngrams), left_pad_(left_pad), right_pad_(right_pad), separator_(separator) {} +struct Ngram::Data { + Data(const std::vector &ngrams, const std::pair, int32_t> &left_pad, + const std::pair, int32_t> &right_pad, const std::vector &separator) + : ngrams_(ngrams), + left_pad_(PairCharToString(left_pad)), + right_pad_(PairCharToString(right_pad)), + separator_(CharToString(separator)) {} + std::vector ngrams_; + std::pair left_pad_; + std::pair right_pad_; + std::string separator_; +}; + +Ngram::Ngram(const std::vector &ngrams, const std::pair, int32_t> &left_pad, + const std::pair, int32_t> &right_pad, const std::vector &separator) + : data_(std::make_shared(ngrams, left_pad, right_pad, separator)) {} std::shared_ptr Ngram::Parse() { - return std::make_shared(ngrams_, left_pad_, right_pad_, separator_); + return std::make_shared(data_->ngrams_, data_->left_pad_, data_->right_pad_, data_->separator_); } #ifndef _WIN32 // NormalizeUTF8 -NormalizeUTF8::NormalizeUTF8(NormalizeForm normalize_form) : normalize_form_(normalize_form) {} +struct NormalizeUTF8::Data { + explicit Data(NormalizeForm normalize_form) : normalize_form_(normalize_form) {} + NormalizeForm normalize_form_; +}; + +NormalizeUTF8::NormalizeUTF8(NormalizeForm normalize_form) : data_(std::make_shared(normalize_form)) {} std::shared_ptr NormalizeUTF8::Parse() { - return std::make_shared(normalize_form_); + return std::make_shared(data_->normalize_form_); } // RegexReplace -RegexReplace::RegexReplace(std::string pattern, std::string replace, bool replace_all) - : pattern_(pattern), replace_(replace), replace_all_(replace_all) {} +struct RegexReplace::Data { + Data(const std::vector &pattern, const std::vector &replace, bool replace_all) + : pattern_(CharToString(pattern)), replace_(CharToString(replace)), replace_all_(replace_all) {} + std::string pattern_; + std::string replace_; + bool replace_all_; +}; + +RegexReplace::RegexReplace(const std::vector &pattern, const std::vector &replace, bool replace_all) + : data_(std::make_shared(pattern, replace, replace_all)) {} std::shared_ptr RegexReplace::Parse() { - return std::make_shared(pattern_, replace_, replace_all_); + return std::make_shared(data_->pattern_, data_->replace_, data_->replace_all_); } // RegexTokenizer -RegexTokenizer::RegexTokenizer(std::string delim_pattern, std::string keep_delim_pattern, bool with_offsets) - : delim_pattern_(delim_pattern), keep_delim_pattern_(keep_delim_pattern), with_offsets_(with_offsets) {} +struct RegexTokenizer::Data { + Data(const std::vector &delim_pattern, const std::vector &keep_delim_pattern, bool with_offsets) + : delim_pattern_(CharToString(delim_pattern)), + keep_delim_pattern_(CharToString(keep_delim_pattern)), + with_offsets_(with_offsets) {} + std::string delim_pattern_; + std::string keep_delim_pattern_; + bool with_offsets_; +}; + +RegexTokenizer::RegexTokenizer(const std::vector &delim_pattern, const std::vector &keep_delim_pattern, + bool with_offsets) + : data_(std::make_shared(delim_pattern, keep_delim_pattern, with_offsets)) {} std::shared_ptr RegexTokenizer::Parse() { - return std::make_shared(delim_pattern_, keep_delim_pattern_, with_offsets_); + return std::make_shared(data_->delim_pattern_, data_->keep_delim_pattern_, + data_->with_offsets_); } #endif // SentencePieceTokenizer +struct SentencePieceTokenizer::Data { + Data(const std::shared_ptr &vocab, SPieceTokenizerOutType out_type) + : vocab_(vocab), out_type_(out_type) {} + Data(const std::vector &vocab_path, SPieceTokenizerOutType out_type) + : vocab_path_(CharToString(vocab_path)), out_type_(out_type) {} + std::shared_ptr vocab_; + std::string vocab_path_; + SPieceTokenizerLoadType load_type_; + SPieceTokenizerOutType out_type_; +}; + SentencePieceTokenizer::SentencePieceTokenizer(const std::shared_ptr &vocab, SPieceTokenizerOutType out_type) - : vocab_(vocab), out_type_(out_type) {} + : data_(std::make_shared(vocab, out_type)) {} -SentencePieceTokenizer::SentencePieceTokenizer(const std::string &vocab_path, SPieceTokenizerOutType out_type) - : vocab_path_(vocab_path), out_type_(out_type) {} +SentencePieceTokenizer::SentencePieceTokenizer(const std::vector &vocab_path, SPieceTokenizerOutType out_type) + : data_(std::make_shared(vocab_path, out_type)) {} std::shared_ptr SentencePieceTokenizer::Parse() { - if (vocab_ != nullptr) { - return std::make_shared(vocab_, out_type_); + if (data_->vocab_ != nullptr) { + return std::make_shared(data_->vocab_, data_->out_type_); } else { - return std::make_shared(vocab_path_, out_type_); + return std::make_shared(data_->vocab_path_, data_->out_type_); } } // SlidingWindow -SlidingWindow::SlidingWindow(const int32_t width, const int32_t axis) : width_(width), axis_(axis) {} +struct SlidingWindow::Data { + Data(const int32_t width, const int32_t axis) : width_(width), axis_(axis) {} + int32_t width_; + int32_t axis_; +}; + +SlidingWindow::SlidingWindow(const int32_t width, const int32_t axis) : data_(std::make_shared(width, axis)) {} std::shared_ptr SlidingWindow::Parse() { - return std::make_shared(width_, axis_); + return std::make_shared(data_->width_, data_->axis_); } // ToNumber -ToNumber::ToNumber(const std::string &data_type) : data_type_(data_type) {} +struct ToNumber::Data { + explicit Data(const std::vector &data_type) : data_type_(CharToString(data_type)) {} + std::string data_type_; +}; -std::shared_ptr ToNumber::Parse() { return std::make_shared(data_type_); } +ToNumber::ToNumber(const std::vector &data_type) : data_(std::make_shared(data_type)) {} + +std::shared_ptr ToNumber::Parse() { return std::make_shared(data_->data_type_); } // TruncateSequencePair -TruncateSequencePair::TruncateSequencePair(int32_t max_length) : max_length_(max_length) {} +struct TruncateSequencePair::Data { + explicit Data(int32_t max_length) : max_length_(max_length) {} + int32_t max_length_; +}; + +TruncateSequencePair::TruncateSequencePair(int32_t max_length) : data_(std::make_shared(max_length)) {} std::shared_ptr TruncateSequencePair::Parse() { - return std::make_shared(max_length_); + return std::make_shared(data_->max_length_); } // UnicodeCharTokenizer -UnicodeCharTokenizer::UnicodeCharTokenizer(bool with_offsets) : with_offsets_(with_offsets) {} +struct UnicodeCharTokenizer::Data { + explicit Data(bool with_offsets) : with_offsets_(with_offsets) {} + bool with_offsets_; +}; + +UnicodeCharTokenizer::UnicodeCharTokenizer(bool with_offsets) : data_(std::make_shared(with_offsets)) {} std::shared_ptr UnicodeCharTokenizer::Parse() { - return std::make_shared(with_offsets_); + return std::make_shared(data_->with_offsets_); } #ifndef _WIN32 // UnicodeScriptTokenizer +struct UnicodeScriptTokenizer::Data { + Data(bool keep_whitespace, bool with_offsets) : keep_whitespace_(keep_whitespace), with_offsets_(with_offsets) {} + bool keep_whitespace_; + bool with_offsets_; +}; + UnicodeScriptTokenizer::UnicodeScriptTokenizer(bool keep_whitespace, bool with_offsets) - : keep_whitespace_(keep_whitespace), with_offsets_(with_offsets) {} + : data_(std::make_shared(keep_whitespace, with_offsets)) {} std::shared_ptr UnicodeScriptTokenizer::Parse() { - return std::make_shared(keep_whitespace_, with_offsets_); + return std::make_shared(data_->keep_whitespace_, data_->with_offsets_); } // WhitespaceTokenizer -WhitespaceTokenizer::WhitespaceTokenizer(bool with_offsets) : with_offsets_(with_offsets) {} +struct WhitespaceTokenizer::Data { + explicit Data(bool with_offsets) : with_offsets_(with_offsets) {} + bool with_offsets_; +}; + +WhitespaceTokenizer::WhitespaceTokenizer(bool with_offsets) : data_(std::make_shared(with_offsets)) {} std::shared_ptr WhitespaceTokenizer::Parse() { - return std::make_shared(with_offsets_); + return std::make_shared(data_->with_offsets_); } #endif } // namespace text diff --git a/mindspore/ccsrc/minddata/dataset/api/transforms.cc b/mindspore/ccsrc/minddata/dataset/api/transforms.cc index 269d209ecc1..39638350ceb 100644 --- a/mindspore/ccsrc/minddata/dataset/api/transforms.cc +++ b/mindspore/ccsrc/minddata/dataset/api/transforms.cc @@ -30,25 +30,30 @@ namespace transforms { // (In alphabetical order) // Constructor to Compose. -Compose::Compose(const std::vector &transforms) { +struct Compose::Data { + std::vector> transforms_; +}; + +Compose::Compose(const std::vector &transforms) : data_(std::make_shared()) { (void)std::transform( - transforms.begin(), transforms.end(), std::back_inserter(transforms_), + transforms.begin(), transforms.end(), std::back_inserter(data_->transforms_), [](TensorTransform *op) -> std::shared_ptr { return op != nullptr ? op->Parse() : nullptr; }); } -Compose::Compose(const std::vector> &transforms) { - (void)std::transform(transforms.begin(), transforms.end(), std::back_inserter(transforms_), +Compose::Compose(const std::vector> &transforms) : data_(std::make_shared()) { + (void)std::transform(transforms.begin(), transforms.end(), std::back_inserter(data_->transforms_), [](std::shared_ptr op) -> std::shared_ptr { return op != nullptr ? op->Parse() : nullptr; }); } -Compose::Compose(const std::vector> &transforms) { - (void)std::transform(transforms.begin(), transforms.end(), std::back_inserter(transforms_), +Compose::Compose(const std::vector> &transforms) + : data_(std::make_shared()) { + (void)std::transform(transforms.begin(), transforms.end(), std::back_inserter(data_->transforms_), [](TensorTransform &op) -> std::shared_ptr { return op.Parse(); }); } -std::shared_ptr Compose::Parse() { return std::make_shared(transforms_); } +std::shared_ptr Compose::Parse() { return std::make_shared(data_->transforms_); } // Constructor to Duplicate Duplicate::Duplicate() {} @@ -56,59 +61,87 @@ Duplicate::Duplicate() {} std::shared_ptr Duplicate::Parse() { return std::make_shared(); } // Constructor to OneHot -OneHot::OneHot(int32_t num_classes) : num_classes_(num_classes) {} +struct OneHot::Data { + explicit Data(int32_t num_classes) : num_classes_(num_classes) {} + float num_classes_; +}; -std::shared_ptr OneHot::Parse() { return std::make_shared(num_classes_); } +OneHot::OneHot(int32_t num_classes) : data_(std::make_shared(num_classes)) {} + +std::shared_ptr OneHot::Parse() { return std::make_shared(data_->num_classes_); } // Constructor to RandomApply. -RandomApply::RandomApply(const std::vector &transforms, double prob) : prob_(prob) { +struct RandomApply::Data { + std::vector> transforms_; + double prob_; +}; + +RandomApply::RandomApply(const std::vector &transforms, double prob) + : data_(std::make_shared()) { (void)std::transform( - transforms.begin(), transforms.end(), std::back_inserter(transforms_), + transforms.begin(), transforms.end(), std::back_inserter(data_->transforms_), [](TensorTransform *op) -> std::shared_ptr { return op != nullptr ? op->Parse() : nullptr; }); + data_->prob_ = prob; } -RandomApply::RandomApply(const std::vector> &transforms, double prob) : prob_(prob) { - (void)std::transform(transforms.begin(), transforms.end(), std::back_inserter(transforms_), +RandomApply::RandomApply(const std::vector> &transforms, double prob) + : data_(std::make_shared()) { + (void)std::transform(transforms.begin(), transforms.end(), std::back_inserter(data_->transforms_), [](std::shared_ptr op) -> std::shared_ptr { return op != nullptr ? op->Parse() : nullptr; }); + data_->prob_ = prob; } RandomApply::RandomApply(const std::vector> &transforms, double prob) - : prob_(prob) { - (void)std::transform(transforms.begin(), transforms.end(), std::back_inserter(transforms_), + : data_(std::make_shared()) { + (void)std::transform(transforms.begin(), transforms.end(), std::back_inserter(data_->transforms_), [](TensorTransform &op) -> std::shared_ptr { return op.Parse(); }); + data_->prob_ = prob; } std::shared_ptr RandomApply::Parse() { - return std::make_shared(transforms_, prob_); + return std::make_shared(data_->transforms_, data_->prob_); } // Constructor to RandomChoice. -RandomChoice::RandomChoice(const std::vector &transforms) { +struct RandomChoice::Data { + std::vector> transforms_; +}; + +RandomChoice::RandomChoice(const std::vector &transforms) : data_(std::make_shared()) { (void)std::transform( - transforms.begin(), transforms.end(), std::back_inserter(transforms_), + transforms.begin(), transforms.end(), std::back_inserter(data_->transforms_), [](TensorTransform *op) -> std::shared_ptr { return op != nullptr ? op->Parse() : nullptr; }); } -RandomChoice::RandomChoice(const std::vector> &transforms) { - (void)std::transform(transforms.begin(), transforms.end(), std::back_inserter(transforms_), +RandomChoice::RandomChoice(const std::vector> &transforms) + : data_(std::make_shared()) { + (void)std::transform(transforms.begin(), transforms.end(), std::back_inserter(data_->transforms_), [](std::shared_ptr op) -> std::shared_ptr { return op != nullptr ? op->Parse() : nullptr; }); } -RandomChoice::RandomChoice(const std::vector> &transforms) { - (void)std::transform(transforms.begin(), transforms.end(), std::back_inserter(transforms_), +RandomChoice::RandomChoice(const std::vector> &transforms) + : data_(std::make_shared()) { + (void)std::transform(transforms.begin(), transforms.end(), std::back_inserter(data_->transforms_), [](TensorTransform &op) -> std::shared_ptr { return op.Parse(); }); } -std::shared_ptr RandomChoice::Parse() { return std::make_shared(transforms_); } +std::shared_ptr RandomChoice::Parse() { + return std::make_shared(data_->transforms_); +} // Constructor to TypeCast -TypeCast::TypeCast(std::string data_type) : data_type_(data_type) {} +struct TypeCast::Data { + explicit Data(const std::vector &data_type) : data_type_(CharToString(data_type)) {} + std::string data_type_; +}; -std::shared_ptr TypeCast::Parse() { return std::make_shared(data_type_); } +TypeCast::TypeCast(const std::vector &data_type) : data_(std::make_shared(data_type)) {} + +std::shared_ptr TypeCast::Parse() { return std::make_shared(data_->data_type_); } // Constructor to Unique Unique::Unique() {} diff --git a/mindspore/ccsrc/minddata/dataset/api/vision.cc b/mindspore/ccsrc/minddata/dataset/api/vision.cc index 5ae28268f03..7c1d6b82a75 100644 --- a/mindspore/ccsrc/minddata/dataset/api/vision.cc +++ b/mindspore/ccsrc/minddata/dataset/api/vision.cc @@ -42,85 +42,153 @@ namespace vision { // CONSTRUCTORS FOR API CLASSES TO CREATE VISION TENSOR TRANSFORM OPERATIONS // (In alphabetical order) +// Affine Transform Operation. +struct Affine::Data { + Data(float_t degrees, const std::vector &translation, float scale, const std::vector &shear, + InterpolationMode interpolation, const std::vector &fill_value) + : degrees_(degrees), + translation_(translation), + scale_(scale), + shear_(shear), + interpolation_(interpolation), + fill_value_(fill_value) {} + float degrees_; + std::vector translation_; + float scale_; + std::vector shear_; + InterpolationMode interpolation_; + std::vector fill_value_; +}; + Affine::Affine(float_t degrees, const std::vector &translation, float scale, const std::vector &shear, InterpolationMode interpolation, const std::vector &fill_value) - : degrees_(degrees), - translation_(translation), - scale_(scale), - shear_(shear), - interpolation_(interpolation), - fill_value_(fill_value) {} + : data_(std::make_shared(degrees, translation, scale, shear, interpolation, fill_value)) {} std::shared_ptr Affine::Parse() { - return std::make_shared(degrees_, translation_, scale_, shear_, interpolation_, fill_value_); + return std::make_shared(data_->degrees_, data_->translation_, data_->scale_, data_->shear_, + data_->interpolation_, data_->fill_value_); } // AutoContrast Transform Operation. -AutoContrast::AutoContrast(float cutoff, std::vector ignore) : cutoff_(cutoff), ignore_(ignore) {} +struct AutoContrast::Data { + Data(float cutoff, const std::vector &ignore) : cutoff_(cutoff), ignore_(ignore) {} + float cutoff_; + std::vector ignore_; +}; + +AutoContrast::AutoContrast(float cutoff, std::vector ignore) + : data_(std::make_shared(cutoff, ignore)) {} std::shared_ptr AutoContrast::Parse() { - return std::make_shared(cutoff_, ignore_); + return std::make_shared(data_->cutoff_, data_->ignore_); } // BoundingBoxAugment Transform Operation. -BoundingBoxAugment::BoundingBoxAugment(TensorTransform *transform, float ratio) : ratio_(ratio) { - transform_ = transform ? transform->Parse() : nullptr; +struct BoundingBoxAugment::Data { + std::shared_ptr transform_; + float ratio_; +}; + +BoundingBoxAugment::BoundingBoxAugment(TensorTransform *transform, float ratio) : data_(std::make_shared()) { + data_->transform_ = transform ? transform->Parse() : nullptr; + data_->ratio_ = ratio; } -BoundingBoxAugment::BoundingBoxAugment(const std::shared_ptr &transform, float ratio) : ratio_(ratio) { - transform_ = transform ? transform->Parse() : nullptr; +BoundingBoxAugment::BoundingBoxAugment(const std::shared_ptr &transform, float ratio) + : data_(std::make_shared()) { + data_->transform_ = transform ? transform->Parse() : nullptr; + data_->ratio_ = ratio; } BoundingBoxAugment::BoundingBoxAugment(const std::reference_wrapper transform, float ratio) - : ratio_(ratio) { - transform_ = transform.get().Parse(); + : data_(std::make_shared()) { + data_->transform_ = transform.get().Parse(); + data_->ratio_ = ratio; } std::shared_ptr BoundingBoxAugment::Parse() { - return std::make_shared(transform_, ratio_); + return std::make_shared(data_->transform_, data_->ratio_); } #endif // not ENABLE_ANDROID // CenterCrop Transform Operation. -CenterCrop::CenterCrop(std::vector size) : size_(size) {} +struct CenterCrop::Data { + explicit Data(const std::vector &size) : size_(size) {} + std::vector size_; +}; -std::shared_ptr CenterCrop::Parse() { return std::make_shared(size_); } +CenterCrop::CenterCrop(std::vector size) : data_(std::make_shared(size)) {} + +std::shared_ptr CenterCrop::Parse() { return std::make_shared(data_->size_); } std::shared_ptr CenterCrop::Parse(const MapTargetDevice &env) { if (env == MapTargetDevice::kAscend310) { #ifdef ENABLE_ACL std::vector usize_; - usize_.reserve(size_.size()); - std::transform(size_.begin(), size_.end(), std::back_inserter(usize_), [](int32_t i) { return (uint32_t)i; }); + usize_.reserve(data_->size_.size()); + std::transform(data_->size_.begin(), data_->size_.end(), std::back_inserter(usize_), + [](int32_t i) { return (uint32_t)i; }); return std::make_shared(usize_); #endif // ENABLE_ACL } - return std::make_shared(size_); + return std::make_shared(data_->size_); } // Crop Transform Operation. -Crop::Crop(std::vector coordinates, std::vector size) : coordinates_(coordinates), size_(size) {} +struct Crop::Data { + Data(const std::vector &coordinates, const std::vector &size) + : coordinates_(coordinates), size_(size) {} + std::vector coordinates_; + std::vector size_; +}; -std::shared_ptr Crop::Parse() { return std::make_shared(coordinates_, size_); } +Crop::Crop(std::vector coordinates, std::vector size) + : data_(std::make_shared(coordinates, size)) {} + +std::shared_ptr Crop::Parse() { + return std::make_shared(data_->coordinates_, data_->size_); +} #ifndef ENABLE_ANDROID // CutMixBatch Transform Operation. +struct CutMixBatch::Data { + Data(ImageBatchFormat image_batch_format, float alpha, float prob) + : image_batch_format_(image_batch_format), alpha_(alpha), prob_(prob) {} + float alpha_; + float prob_; + ImageBatchFormat image_batch_format_; +}; + CutMixBatch::CutMixBatch(ImageBatchFormat image_batch_format, float alpha, float prob) - : image_batch_format_(image_batch_format), alpha_(alpha), prob_(prob) {} + : data_(std::make_shared(image_batch_format, alpha, prob)) {} std::shared_ptr CutMixBatch::Parse() { - return std::make_shared(image_batch_format_, alpha_, prob_); + return std::make_shared(data_->image_batch_format_, data_->alpha_, data_->prob_); } // CutOutOp. -CutOut::CutOut(int32_t length, int32_t num_patches) : length_(length), num_patches_(num_patches) {} +struct CutOut::Data { + Data(int32_t length, int32_t num_patches) : length_(length), num_patches_(num_patches) {} + int32_t length_; + int32_t num_patches_; +}; -std::shared_ptr CutOut::Parse() { return std::make_shared(length_, num_patches_); } +CutOut::CutOut(int32_t length, int32_t num_patches) : data_(std::make_shared(length, num_patches)) {} + +std::shared_ptr CutOut::Parse() { + return std::make_shared(data_->length_, data_->num_patches_); +} #endif // not ENABLE_ANDROID // Decode Transform Operation. -Decode::Decode(bool rgb) : rgb_(rgb) {} -std::shared_ptr Decode::Parse() { return std::make_shared(rgb_); } +struct Decode::Data { + explicit Data(bool rgb) : rgb_(rgb) {} + bool rgb_; +}; + +Decode::Decode(bool rgb) : data_(std::make_shared(rgb)) {} + +std::shared_ptr Decode::Parse() { return std::make_shared(data_->rgb_); } std::shared_ptr Decode::Parse(const MapTargetDevice &env) { if (env == MapTargetDevice::kAscend310) { @@ -128,31 +196,42 @@ std::shared_ptr Decode::Parse(const MapTargetDevice &env) { return std::make_shared(); #endif // ENABLE_ACL } - return std::make_shared(rgb_); + return std::make_shared(data_->rgb_); } #ifdef ENABLE_ACL // DvppDecodeResize Transform Operation. -DvppDecodeResizeJpeg::DvppDecodeResizeJpeg(std::vector resize) : resize_(resize) {} +struct DvppDecodeResizeJpeg::Data { + explicit Data(const std::vector &resize) : resize_(resize) {} + std::vector resize_; +}; + +DvppDecodeResizeJpeg::DvppDecodeResizeJpeg(std::vector resize) : data_(std::make_shared(resize)) {} std::shared_ptr DvppDecodeResizeJpeg::Parse() { - return std::make_shared(resize_); + return std::make_shared(data_->resize_); } std::shared_ptr DvppDecodeResizeJpeg::Parse(const MapTargetDevice &env) { - return std::make_shared(resize_); + return std::make_shared(data_->resize_); } // DvppDecodeResizeCrop Transform Operation. +struct DvppDecodeResizeCropJpeg::Data { + Data(const std::vector &crop, const std::vector &resize) : crop_(crop), resize_(resize) {} + std::vector crop_; + std::vector resize_; +}; + DvppDecodeResizeCropJpeg::DvppDecodeResizeCropJpeg(std::vector crop, std::vector resize) - : crop_(crop), resize_(resize) {} + : data_(std::make_shared(crop, resize)) {} std::shared_ptr DvppDecodeResizeCropJpeg::Parse() { - return std::make_shared(crop_, resize_); + return std::make_shared(data_->crop_, data_->resize_); } std::shared_ptr DvppDecodeResizeCropJpeg::Parse(const MapTargetDevice &env) { - return std::make_shared(crop_, resize_); + return std::make_shared(data_->crop_, data_->resize_); } // DvppDecodePng Transform Operation. @@ -181,174 +260,339 @@ Invert::Invert() {} std::shared_ptr Invert::Parse() { return std::make_shared(); } // MixUpBatch Transform Operation. -MixUpBatch::MixUpBatch(float alpha) : alpha_(alpha) {} +struct MixUpBatch::Data { + explicit Data(float alpha) : alpha_(alpha) {} + float alpha_; +}; -std::shared_ptr MixUpBatch::Parse() { return std::make_shared(alpha_); } +MixUpBatch::MixUpBatch(float alpha) : data_(std::make_shared(alpha)) {} + +std::shared_ptr MixUpBatch::Parse() { return std::make_shared(data_->alpha_); } #endif // not ENABLE_ANDROID // Normalize Transform Operation. -Normalize::Normalize(std::vector mean, std::vector std) : mean_(mean), std_(std) {} +struct Normalize::Data { + Data(const std::vector &mean, const std::vector &std) : mean_(mean), std_(std) {} + std::vector mean_; + std::vector std_; +}; -std::shared_ptr Normalize::Parse() { return std::make_shared(mean_, std_); } +Normalize::Normalize(std::vector mean, std::vector std) : data_(std::make_shared(mean, std)) {} + +std::shared_ptr Normalize::Parse() { + return std::make_shared(data_->mean_, data_->std_); +} std::shared_ptr Normalize::Parse(const MapTargetDevice &env) { if (env == MapTargetDevice::kAscend310) { #ifdef ENABLE_ACL - return std::make_shared(mean_, std_); + return std::make_shared(data_->mean_, data_->std_); #endif } - return std::make_shared(mean_, std_); + return std::make_shared(data_->mean_, data_->std_); } #ifndef ENABLE_ANDROID // NormalizePad Transform Operation. -NormalizePad::NormalizePad(const std::vector &mean, const std::vector &std, const std::string &dtype) - : mean_(mean), std_(std), dtype_(dtype) {} +struct NormalizePad::Data { + Data(const std::vector &mean, const std::vector &std, const std::string &dtype) + : mean_(mean), std_(std), dtype_(dtype) {} + std::vector mean_; + std::vector std_; + std::string dtype_; +}; + +NormalizePad::NormalizePad(const std::vector &mean, const std::vector &std, + const std::vector &dtype) + : data_(std::make_shared(mean, std, CharToString(dtype))) {} std::shared_ptr NormalizePad::Parse() { - return std::make_shared(mean_, std_, dtype_); + return std::make_shared(data_->mean_, data_->std_, data_->dtype_); } // Pad Transform Operation. +struct Pad::Data { + Data(const std::vector &padding, const std::vector &fill_value, BorderType padding_mode) + : padding_(padding), fill_value_(fill_value), padding_mode_(padding_mode) {} + std::vector padding_; + std::vector fill_value_; + BorderType padding_mode_; +}; + Pad::Pad(std::vector padding, std::vector fill_value, BorderType padding_mode) - : padding_(padding), fill_value_(fill_value), padding_mode_(padding_mode) {} + : data_(std::make_shared(padding, fill_value, padding_mode)) {} std::shared_ptr Pad::Parse() { - return std::make_shared(padding_, fill_value_, padding_mode_); + return std::make_shared(data_->padding_, data_->fill_value_, data_->padding_mode_); } // RandomAffine Transform Operation. +struct RandomAffine::Data { + Data(const std::vector °rees, const std::vector &translate_range, + const std::vector &scale_range, const std::vector &shear_ranges, + InterpolationMode interpolation, const std::vector &fill_value) + : degrees_(degrees), + translate_range_(translate_range), + scale_range_(scale_range), + shear_ranges_(shear_ranges), + interpolation_(interpolation), + fill_value_(fill_value) {} + std::vector degrees_; // min_degree, max_degree + std::vector translate_range_; // maximum x translation percentage, maximum y translation percentage + std::vector scale_range_; // min_scale, max_scale + std::vector shear_ranges_; // min_x_shear, max_x_shear, min_y_shear, max_y_shear + InterpolationMode interpolation_; + std::vector fill_value_; +}; + RandomAffine::RandomAffine(const std::vector °rees, const std::vector &translate_range, const std::vector &scale_range, const std::vector &shear_ranges, InterpolationMode interpolation, const std::vector &fill_value) - : degrees_(degrees), - translate_range_(translate_range), - scale_range_(scale_range), - shear_ranges_(shear_ranges), - interpolation_(interpolation), - fill_value_(fill_value) {} + : data_(std::make_shared(degrees, translate_range, scale_range, shear_ranges, interpolation, fill_value)) {} std::shared_ptr RandomAffine::Parse() { - return std::make_shared(degrees_, translate_range_, scale_range_, shear_ranges_, - interpolation_, fill_value_); + return std::make_shared(data_->degrees_, data_->translate_range_, data_->scale_range_, + data_->shear_ranges_, data_->interpolation_, data_->fill_value_); } // RandomColor Transform Operation. -RandomColor::RandomColor(float t_lb, float t_ub) : t_lb_(t_lb), t_ub_(t_ub) {} +struct RandomColor::Data { + Data(float t_lb, float t_ub) : t_lb_(t_lb), t_ub_(t_ub) {} + float t_lb_; + float t_ub_; +}; -std::shared_ptr RandomColor::Parse() { return std::make_shared(t_lb_, t_ub_); } +RandomColor::RandomColor(float t_lb, float t_ub) : data_(std::make_shared(t_lb, t_ub)) {} + +std::shared_ptr RandomColor::Parse() { + return std::make_shared(data_->t_lb_, data_->t_ub_); +} // RandomColorAdjust Transform Operation. +struct RandomColorAdjust::Data { + Data(const std::vector &brightness, const std::vector &contrast, const std::vector &saturation, + const std::vector &hue) + : brightness_(brightness), contrast_(contrast), saturation_(saturation), hue_(hue) {} + std::vector brightness_; + std::vector contrast_; + std::vector saturation_; + std::vector hue_; +}; + RandomColorAdjust::RandomColorAdjust(std::vector brightness, std::vector contrast, std::vector saturation, std::vector hue) - : brightness_(brightness), contrast_(contrast), saturation_(saturation), hue_(hue) {} + : data_(std::make_shared(brightness, contrast, saturation, hue)) {} + std::shared_ptr RandomColorAdjust::Parse() { - return std::make_shared(brightness_, contrast_, saturation_, hue_); + return std::make_shared(data_->brightness_, data_->contrast_, data_->saturation_, + data_->hue_); } // RandomCrop Transform Operation. +struct RandomCrop::Data { + Data(const std::vector &size, const std::vector &padding, bool pad_if_needed, + const std::vector &fill_value, BorderType padding_mode) + : size_(size), + padding_(padding), + pad_if_needed_(pad_if_needed), + fill_value_(fill_value), + padding_mode_(padding_mode) {} + std::vector size_; + std::vector padding_; + bool pad_if_needed_; + std::vector fill_value_; + BorderType padding_mode_; +}; + RandomCrop::RandomCrop(std::vector size, std::vector padding, bool pad_if_needed, std::vector fill_value, BorderType padding_mode) - : size_(size), - padding_(padding), - pad_if_needed_(pad_if_needed), - fill_value_(fill_value), - padding_mode_(padding_mode) {} + : data_(std::make_shared(size, padding, pad_if_needed, fill_value, padding_mode)) {} std::shared_ptr RandomCrop::Parse() { - return std::make_shared(size_, padding_, pad_if_needed_, fill_value_, padding_mode_); + return std::make_shared(data_->size_, data_->padding_, data_->pad_if_needed_, data_->fill_value_, + data_->padding_mode_); } // RandomCropDecodeResize Transform Operation. +struct RandomCropDecodeResize::Data { + Data(const std::vector &size, const std::vector &scale, const std::vector &ratio, + InterpolationMode interpolation, int32_t max_attempts) + : size_(size), scale_(scale), ratio_(ratio), interpolation_(interpolation), max_attempts_(max_attempts) {} + std::vector size_; + std::vector scale_; + std::vector ratio_; + InterpolationMode interpolation_; + int32_t max_attempts_; +}; + RandomCropDecodeResize::RandomCropDecodeResize(std::vector size, std::vector scale, std::vector ratio, InterpolationMode interpolation, int32_t max_attempts) - : size_(size), scale_(scale), ratio_(ratio), interpolation_(interpolation), max_attempts_(max_attempts) {} + : data_(std::make_shared(size, scale, ratio, interpolation, max_attempts)) {} std::shared_ptr RandomCropDecodeResize::Parse() { - return std::make_shared(size_, scale_, ratio_, interpolation_, max_attempts_); + return std::make_shared(data_->size_, data_->scale_, data_->ratio_, + data_->interpolation_, data_->max_attempts_); } // RandomCropWithBBox Transform Operation. +struct RandomCropWithBBox::Data { + Data(const std::vector &size, const std::vector &padding, bool pad_if_needed, + const std::vector &fill_value, BorderType padding_mode) + : size_(size), + padding_(padding), + pad_if_needed_(pad_if_needed), + fill_value_(fill_value), + padding_mode_(padding_mode) {} + std::vector size_; + std::vector padding_; + bool pad_if_needed_; + std::vector fill_value_; + BorderType padding_mode_; +}; + RandomCropWithBBox::RandomCropWithBBox(std::vector size, std::vector padding, bool pad_if_needed, std::vector fill_value, BorderType padding_mode) - : size_(size), - padding_(padding), - pad_if_needed_(pad_if_needed), - fill_value_(fill_value), - padding_mode_(padding_mode) {} + : data_(std::make_shared(size, padding, pad_if_needed, fill_value, padding_mode)) {} std::shared_ptr RandomCropWithBBox::Parse() { - return std::make_shared(size_, padding_, pad_if_needed_, fill_value_, padding_mode_); + return std::make_shared(data_->size_, data_->padding_, data_->pad_if_needed_, + data_->fill_value_, data_->padding_mode_); } // RandomHorizontalFlip. -RandomHorizontalFlip::RandomHorizontalFlip(float prob) : probability_(prob) {} +struct RandomHorizontalFlip::Data { + explicit Data(float prob) : probability_(prob) {} + float probability_; +}; + +RandomHorizontalFlip::RandomHorizontalFlip(float prob) : data_(std::make_shared(prob)) {} std::shared_ptr RandomHorizontalFlip::Parse() { - return std::make_shared(probability_); + return std::make_shared(data_->probability_); } // RandomHorizontalFlipWithBBox -RandomHorizontalFlipWithBBox::RandomHorizontalFlipWithBBox(float prob) : probability_(prob) {} +struct RandomHorizontalFlipWithBBox::Data { + explicit Data(float prob) : probability_(prob) {} + float probability_; +}; + +RandomHorizontalFlipWithBBox::RandomHorizontalFlipWithBBox(float prob) : data_(std::make_shared(prob)) {} std::shared_ptr RandomHorizontalFlipWithBBox::Parse() { - return std::make_shared(probability_); + return std::make_shared(data_->probability_); } // RandomPosterize Transform Operation. -RandomPosterize::RandomPosterize(const std::vector &bit_range) : bit_range_(bit_range) {} +struct RandomPosterize::Data { + explicit Data(const std::vector &bit_range) : bit_range_(bit_range) {} + std::vector bit_range_; +}; + +RandomPosterize::RandomPosterize(const std::vector &bit_range) : data_(std::make_shared(bit_range)) {} std::shared_ptr RandomPosterize::Parse() { - return std::make_shared(bit_range_); + return std::make_shared(data_->bit_range_); } // RandomResize Transform Operation. -RandomResize::RandomResize(std::vector size) : size_(size) {} +struct RandomResize::Data { + explicit Data(const std::vector &size) : size_(size) {} + std::vector size_; +}; -std::shared_ptr RandomResize::Parse() { return std::make_shared(size_); } +RandomResize::RandomResize(std::vector size) : data_(std::make_shared(size)) {} + +std::shared_ptr RandomResize::Parse() { return std::make_shared(data_->size_); } // RandomResizeWithBBox Transform Operation. -RandomResizeWithBBox::RandomResizeWithBBox(std::vector size) : size_(size) {} +struct RandomResizeWithBBox::Data { + explicit Data(const std::vector &size) : size_(size) {} + std::vector size_; +}; + +RandomResizeWithBBox::RandomResizeWithBBox(std::vector size) : data_(std::make_shared(size)) {} std::shared_ptr RandomResizeWithBBox::Parse() { - return std::make_shared(size_); + return std::make_shared(data_->size_); } // RandomResizedCrop Transform Operation. +struct RandomResizedCrop::Data { + Data(const std::vector &size, const std::vector &scale, const std::vector &ratio, + InterpolationMode interpolation, int32_t max_attempts) + : size_(size), scale_(scale), ratio_(ratio), interpolation_(interpolation), max_attempts_(max_attempts) {} + std::vector size_; + std::vector scale_; + std::vector ratio_; + InterpolationMode interpolation_; + int32_t max_attempts_; +}; + RandomResizedCrop::RandomResizedCrop(std::vector size, std::vector scale, std::vector ratio, InterpolationMode interpolation, int32_t max_attempts) - : size_(size), scale_(scale), ratio_(ratio), interpolation_(interpolation), max_attempts_(max_attempts) {} + : data_(std::make_shared(size, scale, ratio, interpolation, max_attempts)) {} std::shared_ptr RandomResizedCrop::Parse() { - return std::make_shared(size_, scale_, ratio_, interpolation_, max_attempts_); + return std::make_shared(data_->size_, data_->scale_, data_->ratio_, data_->interpolation_, + data_->max_attempts_); } // RandomResizedCrop Transform Operation. +struct RandomResizedCropWithBBox::Data { + Data(const std::vector &size, const std::vector &scale, const std::vector &ratio, + InterpolationMode interpolation, int32_t max_attempts) + : size_(size), scale_(scale), ratio_(ratio), interpolation_(interpolation), max_attempts_(max_attempts) {} + std::vector size_; + std::vector scale_; + std::vector ratio_; + InterpolationMode interpolation_; + int32_t max_attempts_; +}; + RandomResizedCropWithBBox::RandomResizedCropWithBBox(std::vector size, std::vector scale, std::vector ratio, InterpolationMode interpolation, int32_t max_attempts) - : size_(size), scale_(scale), ratio_(ratio), interpolation_(interpolation), max_attempts_(max_attempts) {} + : data_(std::make_shared(size, scale, ratio, interpolation, max_attempts)) {} std::shared_ptr RandomResizedCropWithBBox::Parse() { - return std::make_shared(size_, scale_, ratio_, interpolation_, max_attempts_); + return std::make_shared(data_->size_, data_->scale_, data_->ratio_, + data_->interpolation_, data_->max_attempts_); } // RandomRotation Transform Operation. +struct RandomRotation::Data { + Data(const std::vector °rees, InterpolationMode interpolation_mode, bool expand, + const std::vector ¢er, const std::vector &fill_value) + : degrees_(degrees), + interpolation_mode_(interpolation_mode), + expand_(expand), + center_(center), + fill_value_(fill_value) {} + std::vector degrees_; + InterpolationMode interpolation_mode_; + std::vector center_; + bool expand_; + std::vector fill_value_; +}; + RandomRotation::RandomRotation(std::vector degrees, InterpolationMode interpolation_mode, bool expand, std::vector center, std::vector fill_value) - : degrees_(degrees), - interpolation_mode_(interpolation_mode), - expand_(expand), - center_(center), - fill_value_(fill_value) {} + : data_(std::make_shared(degrees, interpolation_mode, expand, center, fill_value)) {} std::shared_ptr RandomRotation::Parse() { - return std::make_shared(degrees_, interpolation_mode_, expand_, center_, fill_value_); + return std::make_shared(data_->degrees_, data_->interpolation_mode_, data_->expand_, + data_->center_, data_->fill_value_); } // RandomSelectSubpolicy Transform Operation. -RandomSelectSubpolicy::RandomSelectSubpolicy(std::vector>> policy) { +struct RandomSelectSubpolicy::Data { + std::vector, double>>> policy_; +}; + +RandomSelectSubpolicy::RandomSelectSubpolicy(std::vector>> policy) + : data_(std::make_shared()) { for (int32_t i = 0; i < policy.size(); i++) { std::vector, double>> subpolicy; @@ -358,12 +602,13 @@ RandomSelectSubpolicy::RandomSelectSubpolicy(std::vectorpolicy_.emplace_back(subpolicy); } } RandomSelectSubpolicy::RandomSelectSubpolicy( - std::vector, double>>> policy) { + std::vector, double>>> policy) + : data_(std::make_shared()) { for (int32_t i = 0; i < policy.size(); i++) { std::vector, double>> subpolicy; @@ -373,12 +618,13 @@ RandomSelectSubpolicy::RandomSelectSubpolicy( double prob = policy[i][j].second; subpolicy.emplace_back(std::move(std::make_pair(operation, prob))); } - policy_.emplace_back(subpolicy); + data_->policy_.emplace_back(subpolicy); } } RandomSelectSubpolicy::RandomSelectSubpolicy( - std::vector, double>>> policy) { + std::vector, double>>> policy) + : data_(std::make_shared()) { for (int32_t i = 0; i < policy.size(); i++) { std::vector, double>> subpolicy; @@ -388,64 +634,102 @@ RandomSelectSubpolicy::RandomSelectSubpolicy( double prob = policy[i][j].second; subpolicy.emplace_back(std::move(std::make_pair(operation, prob))); } - policy_.emplace_back(subpolicy); + data_->policy_.emplace_back(subpolicy); } } std::shared_ptr RandomSelectSubpolicy::Parse() { - return std::make_shared(policy_); + return std::make_shared(data_->policy_); } // RandomSharpness Transform Operation. -RandomSharpness::RandomSharpness(std::vector degrees) : degrees_(degrees) {} +struct RandomSharpness::Data { + explicit Data(const std::vector °rees) : degrees_(degrees) {} + std::vector degrees_; +}; + +RandomSharpness::RandomSharpness(std::vector degrees) : data_(std::make_shared(degrees)) {} std::shared_ptr RandomSharpness::Parse() { - return std::make_shared(degrees_); + return std::make_shared(data_->degrees_); } // RandomSolarize Transform Operation. -RandomSolarize::RandomSolarize(std::vector threshold) : threshold_(threshold) {} +struct RandomSolarize::Data { + explicit Data(const std::vector &threshold) : threshold_(threshold) {} + std::vector threshold_; +}; + +RandomSolarize::RandomSolarize(std::vector threshold) : data_(std::make_shared(threshold)) {} std::shared_ptr RandomSolarize::Parse() { - return std::make_shared(threshold_); + return std::make_shared(data_->threshold_); } // RandomVerticalFlip Transform Operation. -RandomVerticalFlip::RandomVerticalFlip(float prob) : probability_(prob) {} +struct RandomVerticalFlip::Data { + explicit Data(float prob) : probability_(prob) {} + float probability_; +}; + +RandomVerticalFlip::RandomVerticalFlip(float prob) : data_(std::make_shared(prob)) {} std::shared_ptr RandomVerticalFlip::Parse() { - return std::make_shared(probability_); + return std::make_shared(data_->probability_); } // RandomVerticalFlipWithBBox Transform Operation. -RandomVerticalFlipWithBBox::RandomVerticalFlipWithBBox(float prob) : probability_(prob) {} +struct RandomVerticalFlipWithBBox::Data { + explicit Data(float prob) : probability_(prob) {} + float probability_; +}; + +RandomVerticalFlipWithBBox::RandomVerticalFlipWithBBox(float prob) : data_(std::make_shared(prob)) {} std::shared_ptr RandomVerticalFlipWithBBox::Parse() { - return std::make_shared(probability_); + return std::make_shared(data_->probability_); } // Rescale Transform Operation. -Rescale::Rescale(float rescale, float shift) : rescale_(rescale), shift_(shift) {} +struct Rescale::Data { + Data(float rescale, float shift) : rescale_(rescale), shift_(shift) {} + float rescale_; + float shift_; +}; -std::shared_ptr Rescale::Parse() { return std::make_shared(rescale_, shift_); } +Rescale::Rescale(float rescale, float shift) : data_(std::make_shared(rescale, shift)) {} + +std::shared_ptr Rescale::Parse() { + return std::make_shared(data_->rescale_, data_->shift_); +} #endif // not ENABLE_ANDROID // Resize Transform Operation. -Resize::Resize(std::vector size, InterpolationMode interpolation) - : size_(size), interpolation_(interpolation) {} +struct Resize::Data { + Data(const std::vector &size, InterpolationMode interpolation) + : size_(size), interpolation_(interpolation) {} + std::vector size_; + InterpolationMode interpolation_; +}; -std::shared_ptr Resize::Parse() { return std::make_shared(size_, interpolation_); } +Resize::Resize(std::vector size, InterpolationMode interpolation) + : data_(std::make_shared(size, interpolation)) {} + +std::shared_ptr Resize::Parse() { + return std::make_shared(data_->size_, data_->interpolation_); +} std::shared_ptr Resize::Parse(const MapTargetDevice &env) { if (env == MapTargetDevice::kAscend310) { #ifdef ENABLE_ACL std::vector usize_; - usize_.reserve(size_.size()); - std::transform(size_.begin(), size_.end(), std::back_inserter(usize_), [](int32_t i) { return (uint32_t)i; }); + usize_.reserve(data_->size_.size()); + std::transform(data_->size_.begin(), data_->size_.end(), std::back_inserter(usize_), + [](int32_t i) { return (uint32_t)i; }); return std::make_shared(usize_); #endif // ENABLE_ACL } - return std::make_shared(size_, interpolation_); + return std::make_shared(data_->size_, data_->interpolation_); } #ifdef ENABLE_ANDROID @@ -457,11 +741,18 @@ std::shared_ptr Rotate::Parse() { return std::make_shared &size, InterpolationMode interpolation) + : size_(size), interpolation_(interpolation) {} + std::vector size_; + InterpolationMode interpolation_; +}; + ResizeWithBBox::ResizeWithBBox(std::vector size, InterpolationMode interpolation) - : size_(size), interpolation_(interpolation) {} + : data_(std::make_shared(size, interpolation)) {} std::shared_ptr ResizeWithBBox::Parse() { - return std::make_shared(size_, interpolation_); + return std::make_shared(data_->size_, data_->interpolation_); } // RgbaToBgr Transform Operation. @@ -475,19 +766,36 @@ RGBA2RGB::RGBA2RGB() {} std::shared_ptr RGBA2RGB::Parse() { return std::make_shared(); } // SoftDvppDecodeRandomCropResizeJpeg Transform Operation. +struct SoftDvppDecodeRandomCropResizeJpeg::Data { + Data(const std::vector &size, const std::vector &scale, const std::vector &ratio, + int32_t max_attempts) + : size_(size), scale_(scale), ratio_(ratio), max_attempts_(max_attempts) {} + std::vector size_; + std::vector scale_; + std::vector ratio_; + int32_t max_attempts_; +}; + SoftDvppDecodeRandomCropResizeJpeg::SoftDvppDecodeRandomCropResizeJpeg(std::vector size, std::vector scale, std::vector ratio, int32_t max_attempts) - : size_(size), scale_(scale), ratio_(ratio), max_attempts_(max_attempts) {} + : data_(std::make_shared(size, scale, ratio, max_attempts)) {} + std::shared_ptr SoftDvppDecodeRandomCropResizeJpeg::Parse() { - return std::make_shared(size_, scale_, ratio_, max_attempts_); + return std::make_shared(data_->size_, data_->scale_, data_->ratio_, + data_->max_attempts_); } // SoftDvppDecodeResizeJpeg Transform Operation. -SoftDvppDecodeResizeJpeg::SoftDvppDecodeResizeJpeg(std::vector size) : size_(size) {} +struct SoftDvppDecodeResizeJpeg::Data { + explicit Data(const std::vector &size) : size_(size) {} + std::vector size_; +}; + +SoftDvppDecodeResizeJpeg::SoftDvppDecodeResizeJpeg(std::vector size) : data_(std::make_shared(size)) {} std::shared_ptr SoftDvppDecodeResizeJpeg::Parse() { - return std::make_shared(size_); + return std::make_shared(data_->size_); } // SwapRedBlue Transform Operation. @@ -496,27 +804,36 @@ SwapRedBlue::SwapRedBlue() {} std::shared_ptr SwapRedBlue::Parse() { return std::make_shared(); } // UniformAug Transform Operation. -UniformAugment::UniformAugment(const std::vector &transforms, int32_t num_ops) : num_ops_(num_ops) { +struct UniformAugment::Data { + std::vector> transforms_; + int32_t num_ops_; +}; + +UniformAugment::UniformAugment(const std::vector &transforms, int32_t num_ops) + : data_(std::make_shared()) { (void)std::transform( - transforms.begin(), transforms.end(), std::back_inserter(transforms_), + transforms.begin(), transforms.end(), std::back_inserter(data_->transforms_), [](TensorTransform *op) -> std::shared_ptr { return op ? op->Parse() : nullptr; }); + data_->num_ops_ = num_ops; } UniformAugment::UniformAugment(const std::vector> &transforms, int32_t num_ops) - : num_ops_(num_ops) { + : data_(std::make_shared()) { (void)std::transform( - transforms.begin(), transforms.end(), std::back_inserter(transforms_), + transforms.begin(), transforms.end(), std::back_inserter(data_->transforms_), [](std::shared_ptr op) -> std::shared_ptr { return op ? op->Parse() : nullptr; }); + data_->num_ops_ = num_ops; } UniformAugment::UniformAugment(const std::vector> &transforms, int32_t num_ops) - : num_ops_(num_ops) { - (void)std::transform(transforms.begin(), transforms.end(), std::back_inserter(transforms_), + : data_(std::make_shared()) { + (void)std::transform(transforms.begin(), transforms.end(), std::back_inserter(data_->transforms_), [](TensorTransform &op) -> std::shared_ptr { return op.Parse(); }); + data_->num_ops_ = num_ops; } std::shared_ptr UniformAugment::Parse() { - return std::make_shared(transforms_, num_ops_); + return std::make_shared(data_->transforms_, data_->num_ops_); } #endif // not ENABLE_ANDROID diff --git a/mindspore/ccsrc/minddata/dataset/engine/ir/cache/dataset_cache_impl.h b/mindspore/ccsrc/minddata/dataset/engine/ir/cache/dataset_cache_impl.h index f1a0e0b768f..6843eb3183e 100644 --- a/mindspore/ccsrc/minddata/dataset/engine/ir/cache/dataset_cache_impl.h +++ b/mindspore/ccsrc/minddata/dataset/engine/ir/cache/dataset_cache_impl.h @@ -20,6 +20,8 @@ #include #include #include +#include +#include "include/api/dual_abi_helper.h" #include "minddata/dataset/engine/cache/cache_client.h" #include "minddata/dataset/engine/datasetops/cache_op.h" #include "minddata/dataset/engine/ir/cache/dataset_cache.h" @@ -39,13 +41,13 @@ class DatasetCacheImpl : public DatasetCache { /// \param port optional port (default=50052). /// \param num_connections optional number of connections (default=12). /// \param prefetch_sz optional prefetch size (default=20). - DatasetCacheImpl(session_id_type id, uint64_t mem_sz, bool spill, std::optional hostname, + DatasetCacheImpl(session_id_type id, uint64_t mem_sz, bool spill, std::optional> hostname, std::optional port, std::optional num_connections, std::optional prefetch_sz) : session_id_(id), cache_mem_sz_(mem_sz), spill_(spill), - hostname_(std::move(hostname)), + hostname_(OptionalCharToString(hostname)), port_(std::move(port)), num_connections_(std::move(num_connections)), prefetch_sz_(std::move(prefetch_sz)) {} diff --git a/mindspore/ccsrc/minddata/dataset/include/config.h b/mindspore/ccsrc/minddata/dataset/include/config.h index 36b6860f695..7c0c35ad993 100644 --- a/mindspore/ccsrc/minddata/dataset/include/config.h +++ b/mindspore/ccsrc/minddata/dataset/include/config.h @@ -19,6 +19,8 @@ #include #include +#include +#include "include/api/dual_abi_helper.h" namespace mindspore { namespace dataset { @@ -70,7 +72,12 @@ int32_t get_callback_timeout(); /// \brief Function to load configuration from a file. /// \param[in] file path of the configuration file to be loaded. -bool load(std::string file); +/// \note This api exists because std::string will constrained by ABI compile macro but char don't. +bool load(const std::vector &file); + +/// \brief Function to load configuration from a file. +/// \param[in] file path of the configuration file to be loaded. +inline bool load(std::string file) { return load(StringToChar(file)); } } // namespace config } // namespace dataset diff --git a/mindspore/ccsrc/minddata/dataset/include/datasets.h b/mindspore/ccsrc/minddata/dataset/include/datasets.h index 4cfa0086abf..9907f32be30 100644 --- a/mindspore/ccsrc/minddata/dataset/include/datasets.h +++ b/mindspore/ccsrc/minddata/dataset/include/datasets.h @@ -30,6 +30,7 @@ #include #include +#include "include/api/dual_abi_helper.h" #include "minddata/dataset/include/iterator.h" #include "minddata/dataset/include/samplers.h" #include "minddata/dataset/include/tensor.h" @@ -130,11 +131,13 @@ class Dataset : public std::enable_shared_from_this { /// \brief Gets the column names /// \return Names of the columns. If failed, return an empty vector - std::vector GetColumnNames(); + std::vector GetColumnNames() { return VectorCharToString(GetColumnNamesCharIF()); } /// \brief Gets the class indexing /// \return a map of ClassIndexing. If failed, return an empty map - std::vector>> GetClassIndexing(); + std::vector>> GetClassIndexing() { + return ClassIndexCharToString(GetClassIndexingCharIF()); + } /// \brief Setter function for runtime number of workers /// \param[in] num_workers The number of threads in this operator @@ -146,7 +149,9 @@ class Dataset : public std::enable_shared_from_this { /// \param[in] num_epochs Number of epochs to run through the pipeline, default -1 which means infinite epochs. /// An empty row is returned at the end of each epoch /// \return Shared pointer to the Iterator - std::shared_ptr CreateIterator(std::vector columns = {}, int32_t num_epochs = -1); + std::shared_ptr CreateIterator(std::vector columns = {}, int32_t num_epochs = -1) { + return CreateIteratorCharIF(VectorStringToChar(columns), num_epochs); + } #ifndef ENABLE_ANDROID /// \brief Function to transfer data through a device. @@ -161,7 +166,10 @@ class Dataset : public std::enable_shared_from_this { /// of data or not(default=false). /// \return Returns true if no error encountered else false. bool DeviceQueue(std::string queue_name = "", std::string device_type = "", int32_t num_epochs = -1, - bool send_epoch_end = true, int32_t total_batches = 0, bool create_data_info_queue = false); + bool send_epoch_end = true, int32_t total_batches = 0, bool create_data_info_queue = false) { + return DeviceQueueCharIF(StringToChar(queue_name), StringToChar(device_type), num_epochs, send_epoch_end, + total_batches, create_data_info_queue); + } /// \brief Function to create a Saver to save the dynamic data processed by the dataset pipeline /// \note Usage restrictions: @@ -175,7 +183,9 @@ class Dataset : public std::enable_shared_from_this { /// \param[in] num_files Number of dataset files (default=1) /// \param[in] file_type Dataset format (default="mindrecord") /// \return Returns true if no error encountered else false - bool Save(std::string dataset_path, int32_t num_files = 1, std::string dataset_type = "mindrecord"); + bool Save(std::string dataset_path, int32_t num_files = 1, std::string dataset_type = "mindrecord") { + return SaveCharIF(StringToChar(dataset_path), num_files, StringToChar(dataset_type)); + } #endif /// \brief Function to create a BatchDataset @@ -221,9 +231,9 @@ class Dataset : public std::enable_shared_from_this { std::function element_length_function = nullptr, const std::map>> &pad_info = {}, bool pad_to_bucket_boundary = false, bool drop_remainder = false) { - return std::make_shared(shared_from_this(), column_names, bucket_boundaries, - bucket_batch_sizes, element_length_function, pad_info, - pad_to_bucket_boundary, drop_remainder); + return std::make_shared( + shared_from_this(), VectorStringToChar(column_names), bucket_boundaries, bucket_batch_sizes, + element_length_function, PadInfoStringToChar(pad_info), pad_to_bucket_boundary, drop_remainder); } /// \brief Function to create a SentencePieceVocab from source dataset @@ -238,7 +248,10 @@ class Dataset : public std::enable_shared_from_this { /// \param[in] params A vector contains more option parameters of sentencepiece library std::shared_ptr BuildSentencePieceVocab( const std::vector &col_names, int32_t vocab_size, float character_coverage, - SentencePieceModel model_type, const std::unordered_map ¶ms); + SentencePieceModel model_type, const std::unordered_map ¶ms) { + return BuildSentencePieceVocabCharIF(VectorStringToChar(col_names), vocab_size, character_coverage, model_type, + UnorderedMapStringToChar(params)); + } /// \brief Function to create a Vocab from source dataset /// \notes Build a vocab from a dataset. This would collect all the unique words in a dataset and return a vocab @@ -256,7 +269,10 @@ class Dataset : public std::enable_shared_from_this { std::shared_ptr BuildVocab(const std::vector &columns = {}, const std::pair &freq_range = {0, kDeMaxFreq}, int64_t top_k = kDeMaxTopk, const std::vector &special_tokens = {}, - bool special_first = true); + bool special_first = true) { + return BuildVocabCharIF(VectorStringToChar(columns), freq_range, top_k, VectorStringToChar(special_tokens), + special_first); + } /// \brief Function to create a ConcatDataset /// \notes Concat the datasets in the input @@ -275,7 +291,7 @@ class Dataset : public std::enable_shared_from_this { /// \return Shared pointer to the current FilterNode std::shared_ptr Filter(std::function predicate, const std::vector &input_columns = {}) { - return std::make_shared(shared_from_this(), predicate, input_columns); + return std::make_shared(shared_from_this(), predicate, VectorStringToChar(input_columns)); } #endif @@ -305,8 +321,9 @@ class Dataset : public std::enable_shared_from_this { (void)std::transform( operations.begin(), operations.end(), std::back_inserter(transform_ops), [](TensorTransform *op) -> std::shared_ptr { return op != nullptr ? op->Parse() : nullptr; }); - return std::make_shared(shared_from_this(), transform_ops, input_columns, output_columns, - project_columns, cache, callbacks); + return std::make_shared(shared_from_this(), transform_ops, VectorStringToChar(input_columns), + VectorStringToChar(output_columns), VectorStringToChar(project_columns), cache, + callbacks); } std::shared_ptr Map(std::vector> operations, @@ -320,8 +337,9 @@ class Dataset : public std::enable_shared_from_this { [](std::shared_ptr op) -> std::shared_ptr { return op != nullptr ? op->Parse() : nullptr; }); - return std::make_shared(shared_from_this(), transform_ops, input_columns, output_columns, - project_columns, cache, callbacks); + return std::make_shared(shared_from_this(), transform_ops, VectorStringToChar(input_columns), + VectorStringToChar(output_columns), VectorStringToChar(project_columns), cache, + callbacks); } std::shared_ptr Map(const std::vector> operations, @@ -333,8 +351,9 @@ class Dataset : public std::enable_shared_from_this { std::vector> transform_ops; (void)std::transform(operations.begin(), operations.end(), std::back_inserter(transform_ops), [](TensorTransform &op) -> std::shared_ptr { return op.Parse(); }); - return std::make_shared(shared_from_this(), transform_ops, input_columns, output_columns, - project_columns, cache, callbacks); + return std::make_shared(shared_from_this(), transform_ops, VectorStringToChar(input_columns), + VectorStringToChar(output_columns), VectorStringToChar(project_columns), cache, + callbacks); } /// \brief Function to create a Project Dataset @@ -342,7 +361,7 @@ class Dataset : public std::enable_shared_from_this { /// \param[in] columns The name of columns to project /// \return Shared pointer to the current Dataset std::shared_ptr Project(const std::vector &columns) { - return std::make_shared(shared_from_this(), columns); + return std::make_shared(shared_from_this(), VectorStringToChar(columns)); } #ifndef ENABLE_ANDROID @@ -353,7 +372,8 @@ class Dataset : public std::enable_shared_from_this { /// \return Shared pointer to the current Dataset std::shared_ptr Rename(const std::vector &input_columns, const std::vector &output_columns) { - return std::make_shared(shared_from_this(), input_columns, output_columns); + return std::make_shared(shared_from_this(), VectorStringToChar(input_columns), + VectorStringToChar(output_columns)); } #endif /// \brief Function to create a RepeatDataset @@ -404,12 +424,39 @@ class Dataset : public std::enable_shared_from_this { protected: std::shared_ptr tree_getters_; std::shared_ptr ir_node_; + + private: + // Char interface(CharIF) of GetColumnNames + std::vector> GetColumnNamesCharIF(); + + // Char interface(CharIF) of GetClassIndexing + std::vector, std::vector>> GetClassIndexingCharIF(); + + // Char interface(CharIF) of CreateIterator + std::shared_ptr CreateIteratorCharIF(std::vector> columns, int32_t num_epochs); + + // Char interface(CharIF) of DeviceQueue + bool DeviceQueueCharIF(const std::vector &queue_name, const std::vector &device_type, int32_t num_epochs, + bool send_epoch_end, int32_t total_batches, bool create_data_info_queue); + + // Char interface(CharIF) of Save + bool SaveCharIF(const std::vector &dataset_path, int32_t num_files, const std::vector &dataset_type); + + // Char interface(CharIF) of BuildSentencePieceVocab + std::shared_ptr BuildSentencePieceVocabCharIF( + const std::vector> &col_names, int32_t vocab_size, float character_coverage, + SentencePieceModel model_type, const std::map, std::vector> ¶ms); + + // Char interface(CharIF) of BuildVocab + std::shared_ptr BuildVocabCharIF(const std::vector> &columns, + const std::pair &freq_range, int64_t top_k, + const std::vector> &special_tokens, bool special_first); }; class SchemaObj { public: /// \brief Constructor - explicit SchemaObj(const std::string &schema_file = ""); + explicit SchemaObj(const std::string &schema_file = "") : SchemaObj(StringToChar(schema_file)) {} /// \brief Destructor ~SchemaObj() = default; @@ -422,54 +469,62 @@ class SchemaObj { /// \param[in] name Name of the column. /// \param[in] de_type Data type of the column(TypeId). /// \return Status code - Status add_column(const std::string &name, TypeId de_type); + Status add_column(const std::string &name, TypeId de_type) { return add_column_char(StringToChar(name), de_type); } /// \brief Add new column to the schema with unknown shape of rank 1 /// \param[in] name Name of the column. /// \param[in] de_type Data type of the column(std::string). /// \param[in] shape Shape of the column. /// \return Status code - Status add_column(const std::string &name, const std::string &de_type); + Status add_column(const std::string &name, const std::string &de_type) { + return add_column_char(StringToChar(name), StringToChar(de_type)); + } /// \brief Add new column to the schema /// \param[in] name Name of the column. /// \param[in] de_type Data type of the column(TypeId). /// \param[in] shape Shape of the column. /// \return Status code - Status add_column(const std::string &name, TypeId de_type, const std::vector &shape); + Status add_column(const std::string &name, TypeId de_type, const std::vector &shape) { + return add_column_char(StringToChar(name), de_type, shape); + } /// \brief Add new column to the schema /// \param[in] name Name of the column. /// \param[in] de_type Data type of the column(std::string). /// \param[in] shape Shape of the column. /// \return Status code - Status add_column(const std::string &name, const std::string &de_type, const std::vector &shape); + Status add_column(const std::string &name, const std::string &de_type, const std::vector &shape) { + return add_column_char(StringToChar(name), StringToChar(de_type), shape); + } /// \brief Get a JSON string of the schema /// \return JSON string of the schema - std::string to_json(); + std::string to_json() { return CharToString(to_json_char()); } /// \brief Get a JSON string of the schema std::string to_string() { return to_json(); } /// \brief Set a new value to dataset_type - inline void set_dataset_type(std::string dataset_type) { dataset_type_ = std::move(dataset_type); } + void set_dataset_type(std::string dataset_type); /// \brief Set a new value to num_rows - inline void set_num_rows(int32_t num_rows) { num_rows_ = num_rows; } + void set_num_rows(int32_t num_rows); /// \brief Get the current num_rows - inline int32_t get_num_rows() const { return num_rows_; } + int32_t get_num_rows() const; /// \brief Get schema file from JSON file /// \param[in] json_string Name of JSON file to be parsed. /// \return Status code - Status FromJSONString(const std::string &json_string); + Status FromJSONString(const std::string &json_string) { return FromJSONStringCharIF(StringToChar(json_string)); } /// \brief Parse and add column information /// \param[in] json_string Name of JSON string for column dataset attribute information, decoded from schema file. /// \return Status code - Status ParseColumnString(const std::string &json_string); + Status ParseColumnString(const std::string &json_string) { + return ParseColumnStringCharIF(StringToChar(json_string)); + } private: /// \brief Parse the columns and add them to columns @@ -483,10 +538,30 @@ class SchemaObj { /// \return Status code Status from_json(nlohmann::json json_obj); - int32_t num_rows_; - std::string dataset_type_; - std::string schema_file_; - nlohmann::json columns_; + // Char constructor of SchemaObj + explicit SchemaObj(const std::vector &schema_file); + + // Char interface of add_column + Status add_column_char(const std::vector &name, TypeId de_type); + + Status add_column_char(const std::vector &name, const std::vector &de_type); + + Status add_column_char(const std::vector &name, TypeId de_type, const std::vector &shape); + + Status add_column_char(const std::vector &name, const std::vector &de_type, + const std::vector &shape); + + // Char interface of to_json + const std::vector to_json_char(); + + // Char interface of FromJSONString + Status FromJSONStringCharIF(const std::vector &json_string); + + // Char interface of ParseColumnString + Status ParseColumnStringCharIF(const std::vector &json_string); + + struct Data; + std::shared_ptr data_; }; class BatchDataset : public Dataset { @@ -499,10 +574,10 @@ class BatchDataset : public Dataset { class BucketBatchByLengthDataset : public Dataset { public: BucketBatchByLengthDataset( - std::shared_ptr input, const std::vector &column_names, + std::shared_ptr input, const std::vector> &column_names, const std::vector &bucket_boundaries, const std::vector &bucket_batch_sizes, std::function element_length_function = nullptr, - const std::map>> &pad_info = {}, + const std::map, std::pair>> &pad_info = {}, bool pad_to_bucket_boundary = false, bool drop_remainder = false); ~BucketBatchByLengthDataset() = default; }; @@ -516,7 +591,7 @@ class ConcatDataset : public Dataset { class FilterDataset : public Dataset { public: FilterDataset(std::shared_ptr input, std::function predicate, - const std::vector &input_columns); + const std::vector> &input_columns); ~FilterDataset() = default; }; #endif @@ -524,23 +599,23 @@ class FilterDataset : public Dataset { class MapDataset : public Dataset { public: MapDataset(std::shared_ptr input, std::vector> operations, - const std::vector &input_columns, const std::vector &output_columns, - const std::vector &project_columns, const std::shared_ptr &cache, + const std::vector> &input_columns, const std::vector> &output_columns, + const std::vector> &project_columns, const std::shared_ptr &cache, std::vector> callbacks); ~MapDataset() = default; }; class ProjectDataset : public Dataset { public: - ProjectDataset(std::shared_ptr input, const std::vector &columns); + ProjectDataset(std::shared_ptr input, const std::vector> &columns); ~ProjectDataset() = default; }; #ifndef ENABLE_ANDROID class RenameDataset : public Dataset { public: - RenameDataset(std::shared_ptr input, const std::vector &input_columns, - const std::vector &output_columns); + RenameDataset(std::shared_ptr input, const std::vector> &input_columns, + const std::vector> &output_columns); ~RenameDataset() = default; }; #endif @@ -576,23 +651,31 @@ class ZipDataset : public Dataset { ~ZipDataset() = default; }; #endif + +/// \brief Function to create a SchemaObj +/// \param[in] schema_file Path of schema file +/// \note This api exists because std::string will constrained by ABI compile macro but char don't. +/// \return Shared pointer to the current schema +std::shared_ptr SchemaCharIF(const std::vector &schema_file); + /// \brief Function to create a SchemaObj /// \param[in] schema_file Path of schema file /// \return Shared pointer to the current schema -std::shared_ptr Schema(const std::string &schema_file = ""); +inline std::shared_ptr Schema(const std::string &schema_file = "") { + return SchemaCharIF(StringToChar(schema_file)); +} class AlbumDataset : public Dataset { public: - AlbumDataset(const std::string &dataset_dir, const std::string &data_schema, - const std::vector &column_names = {}, bool decode = false, - const std::shared_ptr &sampler = std::make_shared(), - const std::shared_ptr &cache = nullptr); - AlbumDataset(const std::string &dataset_dir, const std::string &data_schema, - const std::vector &column_names, bool decode, Sampler *sampler, - const std::shared_ptr &cache = nullptr); - AlbumDataset(const std::string &dataset_dir, const std::string &data_schema, - const std::vector &column_names, bool decode, const std::reference_wrapper sampler, - const std::shared_ptr &cache = nullptr); + AlbumDataset(const std::vector &dataset_dir, const std::vector &data_schema, + const std::vector> &column_names, bool decode, const std::shared_ptr &sampler, + const std::shared_ptr &cache); + AlbumDataset(const std::vector &dataset_dir, const std::vector &data_schema, + const std::vector> &column_names, bool decode, Sampler *sampler, + const std::shared_ptr &cache); + AlbumDataset(const std::vector &dataset_dir, const std::vector &data_schema, + const std::vector> &column_names, bool decode, + const std::reference_wrapper sampler, const std::shared_ptr &cache); ~AlbumDataset() = default; }; @@ -608,10 +691,13 @@ class AlbumDataset : public Dataset { /// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current Dataset -std::shared_ptr Album(const std::string &dataset_dir, const std::string &data_schema, - const std::vector &column_names = {}, bool decode = false, - const std::shared_ptr &sampler = std::make_shared(), - const std::shared_ptr &cache = nullptr); +inline std::shared_ptr Album(const std::string &dataset_dir, const std::string &data_schema, + const std::vector &column_names = {}, bool decode = false, + const std::shared_ptr &sampler = std::make_shared(), + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(data_schema), + VectorStringToChar(column_names), decode, sampler, cache); +} /// \brief Function to create an AlbumDataset /// \notes The generated dataset is specified through setting a schema /// \param[in] dataset_dir Path to the root directory that contains the dataset @@ -621,9 +707,12 @@ std::shared_ptr Album(const std::string &dataset_dir, const std::s /// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset. /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current Dataset -std::shared_ptr Album(const std::string &dataset_dir, const std::string &data_schema, - const std::vector &column_names, bool decode, Sampler *sampler, - const std::shared_ptr &cache = nullptr); +inline std::shared_ptr Album(const std::string &dataset_dir, const std::string &data_schema, + const std::vector &column_names, bool decode, Sampler *sampler, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(data_schema), + VectorStringToChar(column_names), decode, sampler, cache); +} /// \brief Function to create an AlbumDataset /// \notes The generated dataset is specified through setting a schema /// \param[in] dataset_dir Path to the root directory that contains the dataset @@ -633,25 +722,26 @@ std::shared_ptr Album(const std::string &dataset_dir, const std::s /// \param[in] sampler Sampler object used to choose samples from the dataset. /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current Dataset -std::shared_ptr Album(const std::string &dataset_dir, const std::string &data_schema, - const std::vector &column_names, bool decode, - const std::reference_wrapper sampler, - const std::shared_ptr &cache = nullptr); +inline std::shared_ptr Album(const std::string &dataset_dir, const std::string &data_schema, + const std::vector &column_names, bool decode, + const std::reference_wrapper sampler, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(data_schema), + VectorStringToChar(column_names), decode, sampler, cache); +} #ifndef ENABLE_ANDROID class CelebADataset : public Dataset { public: - explicit CelebADataset(const std::string &dataset_dir, const std::string &usage = "all", - const std::shared_ptr &sampler = std::make_shared(), - bool decode = false, const std::set &extensions = {}, - const std::shared_ptr &cache = nullptr); - explicit CelebADataset(const std::string &dataset_dir, const std::string &usage, Sampler *sampler, - bool decode = false, const std::set &extensions = {}, - const std::shared_ptr &cache = nullptr); - explicit CelebADataset(const std::string &dataset_dir, const std::string &usage, - const std::reference_wrapper sampler, bool decode = false, - const std::set &extensions = {}, - const std::shared_ptr &cache = nullptr); + explicit CelebADataset(const std::vector &dataset_dir, const std::vector &usage, + const std::shared_ptr &sampler, bool decode, + const std::set> &extensions, const std::shared_ptr &cache); + explicit CelebADataset(const std::vector &dataset_dir, const std::vector &usage, Sampler *sampler, + bool decode, const std::set> &extensions, + const std::shared_ptr &cache); + explicit CelebADataset(const std::vector &dataset_dir, const std::vector &usage, + const std::reference_wrapper sampler, bool decode, + const std::set> &extensions, const std::shared_ptr &cache); ~CelebADataset() = default; }; @@ -667,10 +757,14 @@ class CelebADataset : public Dataset { /// \param[in] extensions Set of file extensions to be included in the dataset (default={}). /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current Dataset -std::shared_ptr CelebA(const std::string &dataset_dir, const std::string &usage = "all", - const std::shared_ptr &sampler = std::make_shared(), - bool decode = false, const std::set &extensions = {}, - const std::shared_ptr &cache = nullptr); +inline std::shared_ptr CelebA( + const std::string &dataset_dir, const std::string &usage = "all", + const std::shared_ptr &sampler = std::make_shared(), bool decode = false, + const std::set &extensions = {}, const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, decode, + SetStringToChar(extensions), cache); +} + /// \brief Function to create a CelebADataset /// \notes The generated dataset has two columns ['image', 'attr']. /// The type of the image tensor is uint8. The attr tensor is uint32 and one hot type. @@ -681,9 +775,13 @@ std::shared_ptr CelebA(const std::string &dataset_dir, const std: /// \param[in] extensions Set of file extensions to be included in the dataset (default={}). /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current Dataset -std::shared_ptr CelebA(const std::string &dataset_dir, const std::string &usage, Sampler *sampler, - bool decode = false, const std::set &extensions = {}, - const std::shared_ptr &cache = nullptr); +inline std::shared_ptr CelebA(const std::string &dataset_dir, const std::string &usage, Sampler *sampler, + bool decode = false, const std::set &extensions = {}, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, decode, + SetStringToChar(extensions), cache); +} + /// \brief Function to create a CelebADataset /// \notes The generated dataset has two columns ['image', 'attr']. /// The type of the image tensor is uint8. The attr tensor is uint32 and one hot type. @@ -694,21 +792,22 @@ std::shared_ptr CelebA(const std::string &dataset_dir, const std: /// \param[in] extensions Set of file extensions to be included in the dataset (default={}). /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current Dataset -std::shared_ptr CelebA(const std::string &dataset_dir, const std::string &usage, - const std::reference_wrapper sampler, bool decode = false, - const std::set &extensions = {}, - const std::shared_ptr &cache = nullptr); +inline std::shared_ptr CelebA(const std::string &dataset_dir, const std::string &usage, + const std::reference_wrapper sampler, bool decode = false, + const std::set &extensions = {}, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, decode, + SetStringToChar(extensions), cache); +} class Cifar10Dataset : public Dataset { public: - explicit Cifar10Dataset(const std::string &dataset_dir, const std::string &usage = "all", - const std::shared_ptr &sampler = std::make_shared(), - const std::shared_ptr &cache = nullptr); - explicit Cifar10Dataset(const std::string &dataset_dir, const std::string &usage, Sampler *sampler, - const std::shared_ptr &cache = nullptr); - explicit Cifar10Dataset(const std::string &dataset_dir, const std::string &usage, - const std::reference_wrapper sampler, - const std::shared_ptr &cache = nullptr); + explicit Cifar10Dataset(const std::vector &dataset_dir, const std::vector &usage, + const std::shared_ptr &sampler, const std::shared_ptr &cache); + explicit Cifar10Dataset(const std::vector &dataset_dir, const std::vector &usage, Sampler *sampler, + const std::shared_ptr &cache); + explicit Cifar10Dataset(const std::vector &dataset_dir, const std::vector &usage, + const std::reference_wrapper sampler, const std::shared_ptr &cache); ~Cifar10Dataset() = default; }; @@ -721,9 +820,13 @@ class Cifar10Dataset : public Dataset { /// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current Dataset -std::shared_ptr Cifar10(const std::string &dataset_dir, const std::string &usage = "all", - const std::shared_ptr &sampler = std::make_shared(), - const std::shared_ptr &cache = nullptr); +inline std::shared_ptr Cifar10( + const std::string &dataset_dir, const std::string &usage = "all", + const std::shared_ptr &sampler = std::make_shared(), + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, cache); +} + /// \brief Function to create a Cifar10 Dataset /// \notes The generated dataset has two columns ["image", "label"] /// \param[in] dataset_dir Path to the root directory that contains the dataset @@ -731,8 +834,11 @@ std::shared_ptr Cifar10(const std::string &dataset_dir, const st /// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset. /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current Dataset -std::shared_ptr Cifar10(const std::string &dataset_dir, const std::string &usage, Sampler *sampler, - const std::shared_ptr &cache = nullptr); +inline std::shared_ptr Cifar10(const std::string &dataset_dir, const std::string &usage, + Sampler *sampler, const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, cache); +} + /// \brief Function to create a Cifar10 Dataset /// \notes The generated dataset has two columns ["image", "label"] /// \param[in] dataset_dir Path to the root directory that contains the dataset @@ -740,20 +846,20 @@ std::shared_ptr Cifar10(const std::string &dataset_dir, const st /// \param[in] sampler Sampler object used to choose samples from the dataset. /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current Dataset -std::shared_ptr Cifar10(const std::string &dataset_dir, const std::string &usage, - const std::reference_wrapper sampler, - const std::shared_ptr &cache = nullptr); +inline std::shared_ptr Cifar10(const std::string &dataset_dir, const std::string &usage, + const std::reference_wrapper sampler, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, cache); +} class Cifar100Dataset : public Dataset { public: - explicit Cifar100Dataset(const std::string &dataset_dir, const std::string &usage = "all", - const std::shared_ptr &sampler = std::make_shared(), - const std::shared_ptr &cache = nullptr); - explicit Cifar100Dataset(const std::string &dataset_dir, const std::string &usage, Sampler *sampler, - const std::shared_ptr &cache = nullptr); - explicit Cifar100Dataset(const std::string &dataset_dir, const std::string &usage, - const std::reference_wrapper sampler, - const std::shared_ptr &cache = nullptr); + explicit Cifar100Dataset(const std::vector &dataset_dir, const std::vector &usage, + const std::shared_ptr &sampler, const std::shared_ptr &cache); + explicit Cifar100Dataset(const std::vector &dataset_dir, const std::vector &usage, Sampler *sampler, + const std::shared_ptr &cache); + explicit Cifar100Dataset(const std::vector &dataset_dir, const std::vector &usage, + const std::reference_wrapper sampler, const std::shared_ptr &cache); ~Cifar100Dataset() = default; }; @@ -766,9 +872,13 @@ class Cifar100Dataset : public Dataset { /// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current Dataset -std::shared_ptr Cifar100(const std::string &dataset_dir, const std::string &usage = "all", - const std::shared_ptr &sampler = std::make_shared(), - const std::shared_ptr &cache = nullptr); +inline std::shared_ptr Cifar100( + const std::string &dataset_dir, const std::string &usage = "all", + const std::shared_ptr &sampler = std::make_shared(), + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, cache); +} + /// \brief Function to create a Cifar100 Dataset /// \notes The generated dataset has three columns ["image", "coarse_label", "fine_label"] /// \param[in] dataset_dir Path to the root directory that contains the dataset @@ -776,8 +886,12 @@ std::shared_ptr Cifar100(const std::string &dataset_dir, const /// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset. /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current Dataset -std::shared_ptr Cifar100(const std::string &dataset_dir, const std::string &usage, Sampler *sampler, - const std::shared_ptr &cache = nullptr); +inline std::shared_ptr Cifar100(const std::string &dataset_dir, const std::string &usage, + Sampler *sampler, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, cache); +} + /// \brief Function to create a Cifar100 Dataset /// \notes The generated dataset has three columns ["image", "coarse_label", "fine_label"] /// \param[in] dataset_dir Path to the root directory that contains the dataset @@ -785,16 +899,17 @@ std::shared_ptr Cifar100(const std::string &dataset_dir, const /// \param[in] sampler Sampler object used to choose samples from the dataset. /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current Dataset -std::shared_ptr Cifar100(const std::string &dataset_dir, const std::string &usage, - const std::reference_wrapper sampler, - const std::shared_ptr &cache = nullptr); +inline std::shared_ptr Cifar100(const std::string &dataset_dir, const std::string &usage, + const std::reference_wrapper sampler, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, cache); +} class CLUEDataset : public Dataset { public: - explicit CLUEDataset(const std::vector &dataset_files, const std::string &task = "AFQMC", - const std::string &usage = "train", int64_t num_samples = 0, - ShuffleMode shuffle = ShuffleMode::kGlobal, int32_t num_shards = 1, int32_t shard_id = 0, - const std::shared_ptr &cache = nullptr); + explicit CLUEDataset(const std::vector> &dataset_files, const std::vector &task, + const std::vector &usage, int64_t num_samples, ShuffleMode shuffle, int32_t num_shards, + int32_t shard_id, const std::shared_ptr &cache); ~CLUEDataset() = default; }; @@ -816,21 +931,26 @@ class CLUEDataset : public Dataset { /// specified only when num_shards is also specified. (Default = 0) /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current CLUEDataset -std::shared_ptr CLUE(const std::vector &dataset_files, const std::string &task = "AFQMC", - const std::string &usage = "train", int64_t num_samples = 0, - ShuffleMode shuffle = ShuffleMode::kGlobal, int32_t num_shards = 1, - int32_t shard_id = 0, const std::shared_ptr &cache = nullptr); +inline std::shared_ptr CLUE(const std::vector &dataset_files, + const std::string &task = "AFQMC", const std::string &usage = "train", + int64_t num_samples = 0, ShuffleMode shuffle = ShuffleMode::kGlobal, + int32_t num_shards = 1, int32_t shard_id = 0, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(VectorStringToChar(dataset_files), StringToChar(task), StringToChar(usage), + num_samples, shuffle, num_shards, shard_id, cache); +} class CocoDataset : public Dataset { public: - CocoDataset(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task = "Detection", - const bool &decode = false, const std::shared_ptr &sampler = std::make_shared(), - const std::shared_ptr &cache = nullptr); - CocoDataset(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task, - const bool &decode, Sampler *sampler, const std::shared_ptr &cache = nullptr); - CocoDataset(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task, - const bool &decode, const std::reference_wrapper sampler, - const std::shared_ptr &cache = nullptr); + CocoDataset(const std::vector &dataset_dir, const std::vector &annotation_file, + const std::vector &task, const bool &decode, const std::shared_ptr &sampler, + const std::shared_ptr &cache); + CocoDataset(const std::vector &dataset_dir, const std::vector &annotation_file, + const std::vector &task, const bool &decode, Sampler *sampler, + const std::shared_ptr &cache); + CocoDataset(const std::vector &dataset_dir, const std::vector &annotation_file, + const std::vector &task, const bool &decode, const std::reference_wrapper sampler, + const std::shared_ptr &cache); ~CocoDataset() = default; }; @@ -852,10 +972,14 @@ class CocoDataset : public Dataset { /// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current Dataset -std::shared_ptr Coco(const std::string &dataset_dir, const std::string &annotation_file, - const std::string &task = "Detection", const bool &decode = false, - const std::shared_ptr &sampler = std::make_shared(), - const std::shared_ptr &cache = nullptr); +inline std::shared_ptr Coco(const std::string &dataset_dir, const std::string &annotation_file, + const std::string &task = "Detection", const bool &decode = false, + const std::shared_ptr &sampler = std::make_shared(), + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(annotation_file), StringToChar(task), + decode, sampler, cache); +} + /// \brief Function to create a CocoDataset /// \notes The generated dataset has multi-columns : /// - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32], @@ -872,9 +996,13 @@ std::shared_ptr Coco(const std::string &dataset_dir, const std::str /// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset. /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current Dataset -std::shared_ptr Coco(const std::string &dataset_dir, const std::string &annotation_file, - const std::string &task, const bool &decode, Sampler *sampler, - const std::shared_ptr &cache = nullptr); +inline std::shared_ptr Coco(const std::string &dataset_dir, const std::string &annotation_file, + const std::string &task, const bool &decode, Sampler *sampler, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(annotation_file), StringToChar(task), + decode, sampler, cache); +} + /// \brief Function to create a CocoDataset /// \notes The generated dataset has multi-columns : /// - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32], @@ -891,18 +1019,20 @@ std::shared_ptr Coco(const std::string &dataset_dir, const std::str /// \param[in] sampler Sampler object used to choose samples from the dataset. /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current Dataset -std::shared_ptr Coco(const std::string &dataset_dir, const std::string &annotation_file, - const std::string &task, const bool &decode, - const std::reference_wrapper sampler, - const std::shared_ptr &cache = nullptr); +inline std::shared_ptr Coco(const std::string &dataset_dir, const std::string &annotation_file, + const std::string &task, const bool &decode, + const std::reference_wrapper sampler, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(annotation_file), StringToChar(task), + decode, sampler, cache); +} class CSVDataset : public Dataset { public: - explicit CSVDataset(const std::vector &dataset_files, char field_delim = ',', - const std::vector> &column_defaults = {}, - const std::vector &column_names = {}, int64_t num_samples = 0, - ShuffleMode shuffle = ShuffleMode::kGlobal, int32_t num_shards = 1, int32_t shard_id = 0, - const std::shared_ptr &cache = nullptr); + explicit CSVDataset(const std::vector> &dataset_files, char field_delim, + const std::vector> &column_defaults, + const std::vector> &column_names, int64_t num_samples, ShuffleMode shuffle, + int32_t num_shards, int32_t shard_id, const std::shared_ptr &cache); ~CSVDataset() = default; }; @@ -927,28 +1057,31 @@ class CSVDataset : public Dataset { /// specified only when num_shards is also specified. (Default = 0) /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current Dataset -std::shared_ptr CSV(const std::vector &dataset_files, char field_delim = ',', - const std::vector> &column_defaults = {}, - const std::vector &column_names = {}, int64_t num_samples = 0, - ShuffleMode shuffle = ShuffleMode::kGlobal, int32_t num_shards = 1, - int32_t shard_id = 0, const std::shared_ptr &cache = nullptr); +inline std::shared_ptr CSV(const std::vector &dataset_files, char field_delim = ',', + const std::vector> &column_defaults = {}, + const std::vector &column_names = {}, int64_t num_samples = 0, + ShuffleMode shuffle = ShuffleMode::kGlobal, int32_t num_shards = 1, + int32_t shard_id = 0, const std::shared_ptr &cache = nullptr) { + return std::make_shared(VectorStringToChar(dataset_files), field_delim, column_defaults, + VectorStringToChar(column_names), num_samples, shuffle, num_shards, shard_id, + cache); +} class ImageFolderDataset : public Dataset { public: - explicit ImageFolderDataset(const std::string &dataset_dir, bool decode = false, - const std::shared_ptr &sampler = std::make_shared(), - const std::set &extensions = {}, - const std::map &class_indexing = {}, - const std::shared_ptr &cache = nullptr); - explicit ImageFolderDataset(const std::string &dataset_dir, bool decode, Sampler *sampler, - const std::set &extensions = {}, - const std::map &class_indexing = {}, - const std::shared_ptr &cache = nullptr); - explicit ImageFolderDataset(const std::string &dataset_dir, bool decode, + explicit ImageFolderDataset(const std::vector &dataset_dir, bool decode, + const std::shared_ptr &sampler, const std::set> &extensions, + const std::map, int32_t> &class_indexing, + const std::shared_ptr &cache); + explicit ImageFolderDataset(const std::vector &dataset_dir, bool decode, Sampler *sampler, + const std::set> &extensions, + const std::map, int32_t> &class_indexing, + const std::shared_ptr &cache); + explicit ImageFolderDataset(const std::vector &dataset_dir, bool decode, const std::reference_wrapper sampler, - const std::set &extensions = {}, - const std::map &class_indexing = {}, - const std::shared_ptr &cache = nullptr); + const std::set> &extensions, + const std::map, int32_t> &class_indexing, + const std::shared_ptr &cache); ~ImageFolderDataset() = default; }; @@ -965,11 +1098,15 @@ class ImageFolderDataset : public Dataset { /// \param[in] class_indexing a class name to label map /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current ImageFolderDataset -std::shared_ptr ImageFolder( +inline std::shared_ptr ImageFolder( const std::string &dataset_dir, bool decode = false, const std::shared_ptr &sampler = std::make_shared(), const std::set &extensions = {}, const std::map &class_indexing = {}, - const std::shared_ptr &cache = nullptr); + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), decode, sampler, SetStringToChar(extensions), + MapStringToChar(class_indexing), cache); +} + /// \brief Function to create an ImageFolderDataset /// \notes A source dataset that reads images from a tree of directories /// All images within one folder have the same label @@ -981,10 +1118,14 @@ std::shared_ptr ImageFolder( /// \param[in] class_indexing a class name to label map /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current ImageFolderDataset -std::shared_ptr ImageFolder(const std::string &dataset_dir, bool decode, Sampler *sampler, - const std::set &extensions = {}, - const std::map &class_indexing = {}, - const std::shared_ptr &cache = nullptr); +inline std::shared_ptr ImageFolder(const std::string &dataset_dir, bool decode, Sampler *sampler, + const std::set &extensions = {}, + const std::map &class_indexing = {}, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), decode, sampler, SetStringToChar(extensions), + MapStringToChar(class_indexing), cache); +} + /// \brief Function to create an ImageFolderDataset /// \notes A source dataset that reads images from a tree of directories /// All images within one folder have the same label @@ -996,25 +1137,28 @@ std::shared_ptr ImageFolder(const std::string &dataset_dir, /// \param[in] class_indexing a class name to label map /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current ImageFolderDataset -std::shared_ptr ImageFolder(const std::string &dataset_dir, bool decode, - const std::reference_wrapper sampler, - const std::set &extensions = {}, - const std::map &class_indexing = {}, - const std::shared_ptr &cache = nullptr); +inline std::shared_ptr ImageFolder(const std::string &dataset_dir, bool decode, + const std::reference_wrapper sampler, + const std::set &extensions = {}, + const std::map &class_indexing = {}, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), decode, sampler, SetStringToChar(extensions), + MapStringToChar(class_indexing), cache); +} class ManifestDataset : public Dataset { public: - explicit ManifestDataset(const std::string &dataset_file, const std::string &usage = "train", - const std::shared_ptr &sampler = std::make_shared(), - const std::map &class_indexing = {}, bool decode = false, - const std::shared_ptr &cache = nullptr); - explicit ManifestDataset(const std::string &dataset_file, const std::string &usage, Sampler *sampler, - const std::map &class_indexing = {}, bool decode = false, - const std::shared_ptr &cache = nullptr); - explicit ManifestDataset(const std::string &dataset_file, const std::string &usage, + explicit ManifestDataset(const std::vector &dataset_file, const std::vector &usage, + const std::shared_ptr &sampler, + const std::map, int32_t> &class_indexing, bool decode, + const std::shared_ptr &cache); + explicit ManifestDataset(const std::vector &dataset_file, const std::vector &usage, Sampler *sampler, + const std::map, int32_t> &class_indexing, bool decode, + const std::shared_ptr &cache); + explicit ManifestDataset(const std::vector &dataset_file, const std::vector &usage, const std::reference_wrapper sampler, - const std::map &class_indexing = {}, bool decode = false, - const std::shared_ptr &cache = nullptr); + const std::map, int32_t> &class_indexing, bool decode, + const std::shared_ptr &cache); ~ManifestDataset() = default; }; @@ -1030,10 +1174,15 @@ class ManifestDataset : public Dataset { /// \param[in] decode Decode the images after reading (default=false). /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current ManifestDataset -std::shared_ptr Manifest(const std::string &dataset_file, const std::string &usage = "train", - const std::shared_ptr &sampler = std::make_shared(), - const std::map &class_indexing = {}, - bool decode = false, const std::shared_ptr &cache = nullptr); +inline std::shared_ptr Manifest( + const std::string &dataset_file, const std::string &usage = "train", + const std::shared_ptr &sampler = std::make_shared(), + const std::map &class_indexing = {}, bool decode = false, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_file), StringToChar(usage), sampler, + MapStringToChar(class_indexing), decode, cache); +} + /// \brief Function to create a ManifestDataset /// \notes The generated dataset has two columns ["image", "label"] /// \param[in] dataset_file The dataset file to be read @@ -1044,9 +1193,15 @@ std::shared_ptr Manifest(const std::string &dataset_file, const /// \param[in] decode Decode the images after reading (default=false). /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current ManifestDataset -std::shared_ptr Manifest(const std::string &dataset_file, const std::string &usage, Sampler *sampler, - const std::map &class_indexing = {}, - bool decode = false, const std::shared_ptr &cache = nullptr); +inline std::shared_ptr Manifest(const std::string &dataset_file, const std::string &usage, + Sampler *sampler, + const std::map &class_indexing = {}, + bool decode = false, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_file), StringToChar(usage), sampler, + MapStringToChar(class_indexing), decode, cache); +} + /// \brief Function to create a ManifestDataset /// \notes The generated dataset has two columns ["image", "label"] /// \param[in] dataset_file The dataset file to be read @@ -1057,30 +1212,34 @@ std::shared_ptr Manifest(const std::string &dataset_file, const /// \param[in] decode Decode the images after reading (default=false). /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current ManifestDataset -std::shared_ptr Manifest(const std::string &dataset_file, const std::string &usage, - const std::reference_wrapper sampler, - const std::map &class_indexing = {}, - bool decode = false, const std::shared_ptr &cache = nullptr); +inline std::shared_ptr Manifest(const std::string &dataset_file, const std::string &usage, + const std::reference_wrapper sampler, + const std::map &class_indexing = {}, + bool decode = false, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_file), StringToChar(usage), sampler, + MapStringToChar(class_indexing), decode, cache); +} class MindDataDataset : public Dataset { public: - explicit MindDataDataset(const std::string &dataset_file, const std::vector &columns_list = {}, - const std::shared_ptr &sampler = std::make_shared(), - nlohmann::json padded_sample = nullptr, int64_t num_padded = 0); - explicit MindDataDataset(const std::string &dataset_file, const std::vector &columns_list, - Sampler *sampler, nlohmann::json padded_sample = nullptr, int64_t num_padded = 0); - explicit MindDataDataset(const std::string &dataset_file, const std::vector &columns_list, - const std::reference_wrapper sampler, nlohmann::json padded_sample = nullptr, - int64_t num_padded = 0); - explicit MindDataDataset(const std::vector &dataset_files, - const std::vector &columns_list = {}, - const std::shared_ptr &sampler = std::make_shared(), - nlohmann::json padded_sample = nullptr, int64_t num_padded = 0); - explicit MindDataDataset(const std::vector &dataset_files, const std::vector &columns_list, - Sampler *sampler, nlohmann::json padded_sample = nullptr, int64_t num_padded = 0); - explicit MindDataDataset(const std::vector &dataset_files, const std::vector &columns_list, - const std::reference_wrapper sampler, nlohmann::json padded_sample = nullptr, - int64_t num_padded = 0); + explicit MindDataDataset(const std::vector &dataset_file, const std::vector> &columns_list, + const std::shared_ptr &sampler, nlohmann::json padded_sample, int64_t num_padded); + explicit MindDataDataset(const std::vector &dataset_file, const std::vector> &columns_list, + Sampler *sampler, nlohmann::json padded_sample, int64_t num_padded); + explicit MindDataDataset(const std::vector &dataset_file, const std::vector> &columns_list, + const std::reference_wrapper sampler, nlohmann::json padded_sample, + int64_t num_padded); + explicit MindDataDataset(const std::vector> &dataset_files, + const std::vector> &columns_list, const std::shared_ptr &sampler, + nlohmann::json padded_sample, int64_t num_padded); + explicit MindDataDataset(const std::vector> &dataset_files, + const std::vector> &columns_list, Sampler *sampler, + nlohmann::json padded_sample, int64_t num_padded); + explicit MindDataDataset(const std::vector> &dataset_files, + const std::vector> &columns_list, + const std::reference_wrapper sampler, nlohmann::json padded_sample, + int64_t num_padded); ~MindDataDataset() = default; }; @@ -1095,10 +1254,14 @@ class MindDataDataset : public Dataset { /// \param[in] padded_sample Samples will be appended to dataset, where keys are the same as column_list. /// \param[in] num_padded Number of padding samples. Dataset size plus num_padded should be divisible by num_shards. /// \return Shared pointer to the current MindDataDataset -std::shared_ptr MindData(const std::string &dataset_file, - const std::vector &columns_list = {}, - const std::shared_ptr &sampler = std::make_shared(), - nlohmann::json padded_sample = nullptr, int64_t num_padded = 0); +inline std::shared_ptr MindData( + const std::string &dataset_file, const std::vector &columns_list = {}, + const std::shared_ptr &sampler = std::make_shared(), nlohmann::json padded_sample = nullptr, + int64_t num_padded = 0) { + return std::make_shared(StringToChar(dataset_file), VectorStringToChar(columns_list), sampler, + padded_sample, num_padded); +} + /// \brief Function to create a MindDataDataset /// \param[in] dataset_file File name of one component of a mindrecord source. Other files with identical source /// in the same path will be found and loaded automatically. @@ -1108,9 +1271,13 @@ std::shared_ptr MindData(const std::string &dataset_file, /// \param[in] padded_sample Samples will be appended to dataset, where keys are the same as column_list. /// \param[in] num_padded Number of padding samples. Dataset size plus num_padded should be divisible by num_shards. /// \return Shared pointer to the current MindDataDataset -std::shared_ptr MindData(const std::string &dataset_file, const std::vector &columns_list, - Sampler *sampler, nlohmann::json padded_sample = nullptr, - int64_t num_padded = 0); +inline std::shared_ptr MindData(const std::string &dataset_file, + const std::vector &columns_list, Sampler *sampler, + nlohmann::json padded_sample = nullptr, int64_t num_padded = 0) { + return std::make_shared(StringToChar(dataset_file), VectorStringToChar(columns_list), sampler, + padded_sample, num_padded); +} + /// \brief Function to create a MindDataDataset /// \param[in] dataset_file File name of one component of a mindrecord source. Other files with identical source /// in the same path will be found and loaded automatically. @@ -1120,9 +1287,13 @@ std::shared_ptr MindData(const std::string &dataset_file, const /// \param[in] padded_sample Samples will be appended to dataset, where keys are the same as column_list. /// \param[in] num_padded Number of padding samples. Dataset size plus num_padded should be divisible by num_shards. /// \return Shared pointer to the current MindDataDataset -std::shared_ptr MindData(const std::string &dataset_file, const std::vector &columns_list, - const std::reference_wrapper sampler, - nlohmann::json padded_sample = nullptr, int64_t num_padded = 0); +inline std::shared_ptr MindData(const std::string &dataset_file, + const std::vector &columns_list, + const std::reference_wrapper sampler, + nlohmann::json padded_sample = nullptr, int64_t num_padded = 0) { + return std::make_shared(StringToChar(dataset_file), VectorStringToChar(columns_list), sampler, + padded_sample, num_padded); +} /// \brief Function to create a MindDataDataset /// \param[in] dataset_files List of dataset files to be read directly. @@ -1134,10 +1305,14 @@ std::shared_ptr MindData(const std::string &dataset_file, const /// \param[in] padded_sample Samples will be appended to dataset, where keys are the same as column_list. /// \param[in] num_padded Number of padding samples. Dataset size plus num_padded should be divisible by num_shards. /// \return Shared pointer to the current MindDataDataset -std::shared_ptr MindData(const std::vector &dataset_files, - const std::vector &columns_list = {}, - const std::shared_ptr &sampler = std::make_shared(), - nlohmann::json padded_sample = nullptr, int64_t num_padded = 0); +inline std::shared_ptr MindData( + const std::vector &dataset_files, const std::vector &columns_list = {}, + const std::shared_ptr &sampler = std::make_shared(), nlohmann::json padded_sample = nullptr, + int64_t num_padded = 0) { + return std::make_shared(VectorStringToChar(dataset_files), VectorStringToChar(columns_list), sampler, + padded_sample, num_padded); +} + /// \brief Function to create a MindDataDataset /// \param[in] dataset_files List of dataset files to be read directly. /// \param[in] columns_list List of columns to be read @@ -1146,9 +1321,13 @@ std::shared_ptr MindData(const std::vector &datase /// \param[in] padded_sample Samples will be appended to dataset, where keys are the same as column_list. /// \param[in] num_padded Number of padding samples. Dataset size plus num_padded should be divisible by num_shards. /// \return Shared pointer to the current MindDataDataset -std::shared_ptr MindData(const std::vector &dataset_files, - const std::vector &columns_list, Sampler *sampler, - nlohmann::json padded_sample = nullptr, int64_t num_padded = 0); +inline std::shared_ptr MindData(const std::vector &dataset_files, + const std::vector &columns_list, Sampler *sampler, + nlohmann::json padded_sample = nullptr, int64_t num_padded = 0) { + return std::make_shared(VectorStringToChar(dataset_files), VectorStringToChar(columns_list), sampler, + padded_sample, num_padded); +} + /// \brief Function to create a MindDataDataset /// \param[in] dataset_files List of dataset files to be read directly. /// \param[in] columns_list List of columns to be read @@ -1157,22 +1336,23 @@ std::shared_ptr MindData(const std::vector &datase /// \param[in] padded_sample Samples will be appended to dataset, where keys are the same as column_list. /// \param[in] num_padded Number of padding samples. Dataset size plus num_padded should be divisible by num_shards. /// \return Shared pointer to the current MindDataDataset -std::shared_ptr MindData(const std::vector &dataset_files, - const std::vector &columns_list, - const std::reference_wrapper sampler, - nlohmann::json padded_sample = nullptr, int64_t num_padded = 0); +inline std::shared_ptr MindData(const std::vector &dataset_files, + const std::vector &columns_list, + const std::reference_wrapper sampler, + nlohmann::json padded_sample = nullptr, int64_t num_padded = 0) { + return std::make_shared(VectorStringToChar(dataset_files), VectorStringToChar(columns_list), sampler, + padded_sample, num_padded); +} #endif class MnistDataset : public Dataset { public: - explicit MnistDataset(const std::string &dataset_dir, const std::string &usage = "all", - const std::shared_ptr &sampler = std::make_shared(), - const std::shared_ptr &cache = nullptr); - explicit MnistDataset(const std::string &dataset_dir, const std::string &usage, Sampler *sampler, - const std::shared_ptr &cache = nullptr); - explicit MnistDataset(const std::string &dataset_dir, const std::string &usage, - const std::reference_wrapper sampler, - const std::shared_ptr &cache = nullptr); + explicit MnistDataset(const std::vector &dataset_dir, const std::vector &usage, + const std::shared_ptr &sampler, const std::shared_ptr &cache); + explicit MnistDataset(const std::vector &dataset_dir, const std::vector &usage, Sampler *sampler, + const std::shared_ptr &cache); + explicit MnistDataset(const std::vector &dataset_dir, const std::vector &usage, + const std::reference_wrapper sampler, const std::shared_ptr &cache); ~MnistDataset() = default; }; @@ -1185,9 +1365,12 @@ class MnistDataset : public Dataset { /// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current MnistDataset -std::shared_ptr Mnist(const std::string &dataset_dir, const std::string &usage = "all", - const std::shared_ptr &sampler = std::make_shared(), - const std::shared_ptr &cache = nullptr); +inline std::shared_ptr Mnist(const std::string &dataset_dir, const std::string &usage = "all", + const std::shared_ptr &sampler = std::make_shared(), + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, cache); +} + /// \brief Function to create a MnistDataset /// \notes The generated dataset has two columns ["image", "label"] /// \param[in] dataset_dir Path to the root directory that contains the dataset @@ -1195,8 +1378,11 @@ std::shared_ptr Mnist(const std::string &dataset_dir, const std::s /// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset. /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current MnistDataset -std::shared_ptr Mnist(const std::string &dataset_dir, const std::string &usage, Sampler *sampler, - const std::shared_ptr &cache = nullptr); +inline std::shared_ptr Mnist(const std::string &dataset_dir, const std::string &usage, Sampler *sampler, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, cache); +} + /// \brief Function to create a MnistDataset /// \notes The generated dataset has two columns ["image", "label"] /// \param[in] dataset_dir Path to the root directory that contains the dataset @@ -1204,25 +1390,30 @@ std::shared_ptr Mnist(const std::string &dataset_dir, const std::s /// \param[in] sampler Sampler object used to choose samples from the dataset. /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current MnistDataset -std::shared_ptr Mnist(const std::string &dataset_dir, const std::string &usage, - const std::reference_wrapper sampler, - const std::shared_ptr &cache = nullptr); +inline std::shared_ptr Mnist(const std::string &dataset_dir, const std::string &usage, + const std::reference_wrapper sampler, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, cache); +} #ifndef ENABLE_ANDROID + /// \brief Function to create a ConcatDataset /// \notes Reload "+" operator to concat two datasets /// \param[in] datasets1 Shared pointer to the first dataset to be concatenated /// \param[in] datasets2 Shared pointer to the second dataset to be concatenated /// \return Shared pointer to the current ConcatDataset -std::shared_ptr operator+(const std::shared_ptr &datasets1, - const std::shared_ptr &datasets2); +inline std::shared_ptr operator+(const std::shared_ptr &datasets1, + const std::shared_ptr &datasets2) { + return std::make_shared(std::vector({datasets1, datasets2})); +} class RandomDataDataset : public Dataset { public: RandomDataDataset(const int32_t &total_rows, std::shared_ptr schema, - const std::vector &columns_list, std::shared_ptr cache); + const std::vector> &columns_list, std::shared_ptr cache); - RandomDataDataset(const int32_t &total_rows, std::string schema_path, const std::vector &columns_list, - std::shared_ptr cache); + RandomDataDataset(const int32_t &total_rows, const std::vector &schema_path, + const std::vector> &columns_list, std::shared_ptr cache); ~RandomDataDataset() = default; }; @@ -1240,18 +1431,19 @@ std::shared_ptr RandomData(const int32_t &total_rows = 0, con std::shared_ptr ds; if constexpr (std::is_same::value || std::is_same>::value) { std::shared_ptr schema_obj = schema; - ds = std::make_shared(total_rows, std::move(schema_obj), std::move(columns_list), cache); + ds = + std::make_shared(total_rows, std::move(schema_obj), VectorStringToChar(columns_list), cache); } else { - ds = std::make_shared(total_rows, std::move(schema), std::move(columns_list), cache); + ds = std::make_shared(total_rows, StringToChar(schema), VectorStringToChar(columns_list), cache); } return ds; } class TextFileDataset : public Dataset { public: - explicit TextFileDataset(const std::vector &dataset_files, int64_t num_samples = 0, - ShuffleMode shuffle = ShuffleMode::kGlobal, int32_t num_shards = 1, int32_t shard_id = 0, - const std::shared_ptr &cache = nullptr); + explicit TextFileDataset(const std::vector> &dataset_files, int64_t num_samples, + ShuffleMode shuffle, int32_t num_shards, int32_t shard_id, + const std::shared_ptr &cache); ~TextFileDataset() = default; }; @@ -1271,20 +1463,24 @@ class TextFileDataset : public Dataset { /// specified only when num_shards is also specified. (Default = 0) /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current TextFileDataset -std::shared_ptr TextFile(const std::vector &dataset_files, int64_t num_samples = 0, - ShuffleMode shuffle = ShuffleMode::kGlobal, int32_t num_shards = 1, - int32_t shard_id = 0, const std::shared_ptr &cache = nullptr); +inline std::shared_ptr TextFile(const std::vector &dataset_files, int64_t num_samples = 0, + ShuffleMode shuffle = ShuffleMode::kGlobal, int32_t num_shards = 1, + int32_t shard_id = 0, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(VectorStringToChar(dataset_files), num_samples, shuffle, num_shards, + shard_id, cache); +} class TFRecordDataset : public Dataset { public: - TFRecordDataset(const std::vector &dataset_files, std::string schema, - const std::vector &columns_list, int64_t num_samples, ShuffleMode shuffle, + TFRecordDataset(const std::vector> &dataset_files, const std::vector &schema, + const std::vector> &columns_list, int64_t num_samples, ShuffleMode shuffle, int32_t num_shards, int32_t shard_id, bool shard_equal_rows, std::shared_ptr cache); /// \brief Constructor /// \note Parameter 'schema' is shared pointer to Schema object - TFRecordDataset(const std::vector &dataset_files, std::shared_ptr schema, - const std::vector &columns_list, int64_t num_samples, ShuffleMode shuffle, + TFRecordDataset(const std::vector> &dataset_files, std::shared_ptr schema, + const std::vector> &columns_list, int64_t num_samples, ShuffleMode shuffle, int32_t num_shards, int32_t shard_id, bool shard_equal_rows, std::shared_ptr cache); ~TFRecordDataset() = default; @@ -1322,8 +1518,9 @@ std::shared_ptr TFRecord(const std::vector &datase std::shared_ptr ds = nullptr; if constexpr (std::is_same::value || std::is_same>::value) { std::shared_ptr schema_obj = schema; - ds = std::make_shared(dataset_files, schema_obj, columns_list, num_samples, shuffle, num_shards, - shard_id, shard_equal_rows, cache); + ds = std::make_shared(VectorStringToChar(dataset_files), std::move(schema_obj), + VectorStringToChar(columns_list), num_samples, shuffle, num_shards, shard_id, + shard_equal_rows, cache); } else { std::string schema_path = schema; if (!schema_path.empty()) { @@ -1337,25 +1534,25 @@ std::shared_ptr TFRecord(const std::vector &datase return nullptr; } } - ds = std::make_shared(dataset_files, schema_path, columns_list, num_samples, shuffle, num_shards, - shard_id, shard_equal_rows, cache); + ds = std::make_shared(VectorStringToChar(dataset_files), StringToChar(schema_path), + VectorStringToChar(columns_list), num_samples, shuffle, num_shards, shard_id, + shard_equal_rows, cache); } return ds; } class VOCDataset : public Dataset { public: - explicit VOCDataset(const std::string &dataset_dir, const std::string &task = "Segmentation", - const std::string &usage = "train", const std::map &class_indexing = {}, - bool decode = false, const std::shared_ptr &sampler = std::make_shared(), - const std::shared_ptr &cache = nullptr); - explicit VOCDataset(const std::string &dataset_dir, const std::string &task, const std::string &usage, - const std::map &class_indexing, bool decode, Sampler *sampler, - const std::shared_ptr &cache = nullptr); - explicit VOCDataset(const std::string &dataset_dir, const std::string &task, const std::string &usage, - const std::map &class_indexing, bool decode, - const std::reference_wrapper sampler, - const std::shared_ptr &cache = nullptr); + explicit VOCDataset(const std::vector &dataset_dir, const std::vector &task, + const std::vector &usage, const std::map, int32_t> &class_indexing, + bool decode, const std::shared_ptr &sampler, const std::shared_ptr &cache); + explicit VOCDataset(const std::vector &dataset_dir, const std::vector &task, + const std::vector &usage, const std::map, int32_t> &class_indexing, + bool decode, Sampler *sampler, const std::shared_ptr &cache); + explicit VOCDataset(const std::vector &dataset_dir, const std::vector &task, + const std::vector &usage, const std::map, int32_t> &class_indexing, + bool decode, const std::reference_wrapper sampler, + const std::shared_ptr &cache); ~VOCDataset() = default; }; @@ -1374,11 +1571,15 @@ class VOCDataset : public Dataset { /// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current Dataset -std::shared_ptr VOC(const std::string &dataset_dir, const std::string &task = "Segmentation", - const std::string &usage = "train", - const std::map &class_indexing = {}, bool decode = false, - const std::shared_ptr &sampler = std::make_shared(), - const std::shared_ptr &cache = nullptr); +inline std::shared_ptr VOC(const std::string &dataset_dir, const std::string &task = "Segmentation", + const std::string &usage = "train", + const std::map &class_indexing = {}, bool decode = false, + const std::shared_ptr &sampler = std::make_shared(), + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(task), StringToChar(usage), + MapStringToChar(class_indexing), decode, sampler, cache); +} + /// \brief Function to create a VOCDataset /// \notes The generated dataset has multi-columns : /// - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['label', dtype=uint32], @@ -1392,9 +1593,14 @@ std::shared_ptr VOC(const std::string &dataset_dir, const std::strin /// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset. /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current Dataset -std::shared_ptr VOC(const std::string &dataset_dir, const std::string &task, const std::string &usage, - const std::map &class_indexing, bool decode, Sampler *sampler, - const std::shared_ptr &cache = nullptr); +inline std::shared_ptr VOC(const std::string &dataset_dir, const std::string &task, + const std::string &usage, const std::map &class_indexing, + bool decode, Sampler *sampler, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(task), StringToChar(usage), + MapStringToChar(class_indexing), decode, sampler, cache); +} + /// \brief Function to create a VOCDataset /// \notes The generated dataset has multi-columns : /// - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['label', dtype=uint32], @@ -1408,10 +1614,19 @@ std::shared_ptr VOC(const std::string &dataset_dir, const std::strin /// \param[in] sampler Sampler object used to choose samples from the dataset. /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current Dataset -std::shared_ptr VOC(const std::string &dataset_dir, const std::string &task, const std::string &usage, - const std::map &class_indexing, bool decode, - const std::reference_wrapper sampler, - const std::shared_ptr &cache = nullptr); +inline std::shared_ptr VOC(const std::string &dataset_dir, const std::string &task, + const std::string &usage, const std::map &class_indexing, + bool decode, const std::reference_wrapper sampler, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(task), StringToChar(usage), + MapStringToChar(class_indexing), decode, sampler, cache); +} + +std::shared_ptr CreateDatasetCacheCharIF(session_id_type id, uint64_t mem_sz, bool spill, + std::optional> hostname = std::nullopt, + std::optional port = std::nullopt, + std::optional num_connections = std::nullopt, + std::optional prefetch_sz = std::nullopt); /// \brief Function the create a cache to be attached to a dataset /// \param id A user assigned session id for the current pipeline. @@ -1423,17 +1638,22 @@ std::shared_ptr VOC(const std::string &dataset_dir, const std::strin /// \param num_connections optional number of connections (default=12). /// \param prefetch_sz optional prefetch size (default=20). /// \return Shared pointer to DatasetCache. If error, nullptr is returned. -std::shared_ptr CreateDatasetCache(session_id_type id, uint64_t mem_sz, bool spill, - std::optional hostname = std::nullopt, - std::optional port = std::nullopt, - std::optional num_connections = std::nullopt, - std::optional prefetch_sz = std::nullopt); +inline std::shared_ptr CreateDatasetCache(session_id_type id, uint64_t mem_sz, bool spill, + std::optional hostname = std::nullopt, + std::optional port = std::nullopt, + std::optional num_connections = std::nullopt, + std::optional prefetch_sz = std::nullopt) { + return CreateDatasetCacheCharIF(id, mem_sz, spill, OptionalStringToChar(hostname), port, num_connections, + prefetch_sz); +} /// \brief Function to create a ZipDataset /// \notes Applies zip to the dataset /// \param[in] datasets List of shared pointers to the datasets that we want to zip /// \return Shared pointer to the current Dataset -std::shared_ptr Zip(const std::vector> &datasets); +inline std::shared_ptr Zip(const std::vector> &datasets) { + return std::make_shared(datasets); +} #endif } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/include/iterator.h b/mindspore/ccsrc/minddata/dataset/include/iterator.h index 263fbfbc0d5..b69cb33a1cc 100644 --- a/mindspore/ccsrc/minddata/dataset/include/iterator.h +++ b/mindspore/ccsrc/minddata/dataset/include/iterator.h @@ -17,10 +17,12 @@ #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_ITERATOR_H_ #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_ITERATOR_H_ +#include #include #include #include #include +#include "include/api/dual_abi_helper.h" #include "include/api/status.h" #include "include/api/types.h" @@ -39,6 +41,7 @@ class IteratorConsumer; class Dataset; using MSTensorMap = std::unordered_map; +using MSTensorMapChar = std::map, mindspore::MSTensor>; using MSTensorVec = std::vector; // Abstract class for iterating over the dataset. @@ -60,7 +63,18 @@ class Iterator { /// \note Type of return data is a map(with column name). /// \param[out] row - the output tensor row. /// \return - a Status error code, returns OK if no error encountered. - Status GetNextRow(MSTensorMap *row); + Status GetNextRow(MSTensorMap *row) { + MSTensorMapChar row_; + row_.clear(); + row->clear(); + Status s = GetNextRowCharIF(&row_); + TensorMapCharToString(&row_, row); + return s; + } + + // Char interface(CharIF) of GetNextRow + // This api exists because std::string will constrained by ABI compile macro but char don't. + Status GetNextRowCharIF(MSTensorMapChar *row); /// \brief Function to get the next row from the data pipeline. /// \note Type of return data is a vector(without column name). diff --git a/mindspore/ccsrc/minddata/dataset/include/text.h b/mindspore/ccsrc/minddata/dataset/include/text.h index a0f6c44c774..660f86b9e76 100644 --- a/mindspore/ccsrc/minddata/dataset/include/text.h +++ b/mindspore/ccsrc/minddata/dataset/include/text.h @@ -23,6 +23,7 @@ #include #include +#include "include/api/dual_abi_helper.h" #include "include/api/status.h" #include "minddata/dataset/include/constants.h" #include "minddata/dataset/include/transforms.h" @@ -64,11 +65,8 @@ class BasicTokenizer : public TensorTransform { std::shared_ptr Parse() override; private: - bool lower_case_; - bool keep_whitespace_; - NormalizeForm normalize_form_; - bool preserve_unused_token_; - bool with_offsets_; + struct Data; + std::shared_ptr data_; }; /// \brief Tokenizer used for Bert text process. @@ -94,7 +92,14 @@ class BertTokenizer : public TensorTransform { int32_t max_bytes_per_token = 100, const std::string &unknown_token = "[UNK]", bool lower_case = false, bool keep_whitespace = false, const NormalizeForm normalize_form = NormalizeForm::kNone, bool preserve_unused_token = true, - bool with_offsets = false); + bool with_offsets = false) + : BertTokenizer(vocab, StringToChar(suffix_indicator), max_bytes_per_token, StringToChar(unknown_token), + lower_case, keep_whitespace, normalize_form, preserve_unused_token, with_offsets) {} + + explicit BertTokenizer(const std::shared_ptr &vocab, const std::vector &suffix_indicator, + int32_t max_bytes_per_token, const std::vector &unknown_token, bool lower_case, + bool keep_whitespace, const NormalizeForm normalize_form, bool preserve_unused_token, + bool with_offsets); /// \brief Destructor ~BertTokenizer() = default; @@ -104,15 +109,8 @@ class BertTokenizer : public TensorTransform { std::shared_ptr Parse() override; private: - std::shared_ptr vocab_; - std::string suffix_indicator_; - int32_t max_bytes_per_token_; - std::string unknown_token_; - bool lower_case_; - bool keep_whitespace_; - NormalizeForm normalize_form_; - bool preserve_unused_token_; - bool with_offsets_; + struct Data; + std::shared_ptr data_; }; /// \brief Apply case fold operation on UTF-8 string tensor. @@ -146,7 +144,11 @@ class JiebaTokenizer : public TensorTransform { /// - JiebaMode.kMIX, tokenize with a mix of MPSegment and HMMSegment algorithm. /// \param[in] with_offsets If or not output offsets of tokens (default=false). explicit JiebaTokenizer(const std::string &hmm_path, const std::string &mp_path, - const JiebaMode &mode = JiebaMode::kMix, bool with_offsets = false); + const JiebaMode &mode = JiebaMode::kMix, bool with_offsets = false) + : JiebaTokenizer(StringToChar(hmm_path), StringToChar(mp_path), mode, with_offsets) {} + + explicit JiebaTokenizer(const std::vector &hmm_path, const std::vector &mp_path, const JiebaMode &mode, + bool with_offsets); /// \brief Destructor ~JiebaTokenizer() = default; @@ -158,11 +160,8 @@ class JiebaTokenizer : public TensorTransform { Status AddWord(const std::string &word, int64_t freq = 0); private: - std::string hmm_path_; - std::string mp_path_; - JiebaMode mode_; - bool with_offsets_; - std::vector> words_list_; + struct Data; + std::shared_ptr data_; }; /// \brief Look up a word into an id according to the input vocabulary table. @@ -175,7 +174,11 @@ class Lookup : public TensorTransform { /// specify unknown_token when word being out of Vocabulary (default={}). /// \param[in] data_type type of the tensor after lookup, typically int32. explicit Lookup(const std::shared_ptr &vocab, const std::optional &unknown_token = {}, - const std::string &data_type = "int32"); + const std::string &data_type = "int32") + : Lookup(vocab, OptionalStringToChar(unknown_token), StringToChar(data_type)) {} + + explicit Lookup(const std::shared_ptr &vocab, const std::optional> &unknown_token, + const std::vector &data_type); /// \brief Destructor ~Lookup() = default; @@ -185,9 +188,8 @@ class Lookup : public TensorTransform { std::shared_ptr Parse() override; private: - std::shared_ptr vocab_; - std::optional unknown_token_; - std::string data_type_; + struct Data; + std::shared_ptr data_; }; /// \brief TensorOp to generate n-gram from a 1-D string Tensor. @@ -203,7 +205,11 @@ class Ngram : public TensorTransform { /// be capped at n-1. right_pad=("-":2) would pad right side of the sequence with "--" (default={"", 0}}). /// \param[in] separator Symbol used to join strings together (default=" "). explicit Ngram(const std::vector &ngrams, const std::pair &left_pad = {"", 0}, - const std::pair &right_pad = {"", 0}, const std::string &separator = " "); + const std::pair &right_pad = {"", 0}, const std::string &separator = " ") + : Ngram(ngrams, PairStringToChar(left_pad), PairStringToChar(right_pad), StringToChar(separator)) {} + + explicit Ngram(const std::vector &ngrams, const std::pair, int32_t> &left_pad, + const std::pair, int32_t> &right_pad, const std::vector &separator); /// \brief Destructor ~Ngram() = default; @@ -213,10 +219,8 @@ class Ngram : public TensorTransform { std::shared_ptr Parse() override; private: - std::vector ngrams_; - std::pair left_pad_; - std::pair right_pad_; - std::string separator_; + struct Data; + std::shared_ptr data_; }; #ifndef _WIN32 @@ -243,7 +247,8 @@ class NormalizeUTF8 : public TensorTransform { std::shared_ptr Parse() override; private: - NormalizeForm normalize_form_; + struct Data; + std::shared_ptr data_; }; /// \brief Replace UTF-8 string tensor with 'replace' according to regular expression 'pattern'. @@ -254,7 +259,10 @@ class RegexReplace : public TensorTransform { /// \param[in] replace The string to replace matched element. /// \param[in] replace_all Confirm whether to replace all. If false, only replace first matched element; /// if true, replace all matched elements (default=true). - explicit RegexReplace(std::string pattern, std::string replace, bool replace_all = true); + explicit RegexReplace(std::string pattern, std::string replace, bool replace_all = true) + : RegexReplace(StringToChar(pattern), StringToChar(replace), replace_all) {} + + explicit RegexReplace(const std::vector &pattern, const std::vector &replace, bool replace_all); /// \brief Destructor ~RegexReplace() = default; @@ -264,9 +272,8 @@ class RegexReplace : public TensorTransform { std::shared_ptr Parse() override; private: - std::string pattern_; - std::string replace_; - bool replace_all_; + struct Data; + std::shared_ptr data_; }; /// \brief Tokenize a scalar tensor of UTF-8 string by regex expression pattern. @@ -278,7 +285,11 @@ class RegexTokenizer : public TensorTransform { /// matched by 'keep_delim_pattern'. The default value is an empty string ("") /// which means that delimiters will not be kept as an output token (default=""). /// \param[in] with_offsets If or not output offsets of tokens (default=false). - explicit RegexTokenizer(std::string delim_pattern, std::string keep_delim_pattern = "", bool with_offsets = false); + explicit RegexTokenizer(std::string delim_pattern, std::string keep_delim_pattern = "", bool with_offsets = false) + : RegexTokenizer(StringToChar(delim_pattern), StringToChar(keep_delim_pattern), with_offsets) {} + + explicit RegexTokenizer(const std::vector &delim_pattern, const std::vector &keep_delim_pattern, + bool with_offsets); /// \brief Destructor ~RegexTokenizer() = default; @@ -288,9 +299,8 @@ class RegexTokenizer : public TensorTransform { std::shared_ptr Parse() override; private: - std::string delim_pattern_; - std::string keep_delim_pattern_; - bool with_offsets_; + struct Data; + std::shared_ptr data_; }; #endif @@ -306,7 +316,10 @@ class SentencePieceTokenizer : public TensorTransform { /// \brief Constructor. /// \param[in] vocab_path vocab model file path. /// \param[in] out_type The type of output. - SentencePieceTokenizer(const std::string &vocab_path, mindspore::dataset::SPieceTokenizerOutType out_type); + SentencePieceTokenizer(const std::string &vocab_path, mindspore::dataset::SPieceTokenizerOutType out_type) + : SentencePieceTokenizer(StringToChar(vocab_path), out_type) {} + + SentencePieceTokenizer(const std::vector &vocab_path, mindspore::dataset::SPieceTokenizerOutType out_type); /// \brief Destructor ~SentencePieceTokenizer() = default; @@ -316,10 +329,8 @@ class SentencePieceTokenizer : public TensorTransform { std::shared_ptr Parse() override; private: - std::shared_ptr vocab_; - std::string vocab_path_; - SPieceTokenizerLoadType load_type_; - SPieceTokenizerOutType out_type_; + struct Data; + std::shared_ptr data_; }; /// \brief TensorOp to construct a tensor from data (only 1-D for now), where each element in the dimension @@ -340,8 +351,8 @@ class SlidingWindow : public TensorTransform { std::shared_ptr Parse() override; private: - int32_t width_; - int32_t axis_; + struct Data; + std::shared_ptr data_; }; /// \brief Tensor operation to convert every element of a string tensor to a number. @@ -353,7 +364,9 @@ class ToNumber : public TensorTransform { public: /// \brief Constructor. /// \param[in] data_type of the tensor to be casted to. Must be a numeric type. - explicit ToNumber(const std::string &data_type); + explicit ToNumber(const std::string &data_type) : ToNumber(StringToChar(data_type)) {} + + explicit ToNumber(const std::vector &data_type); /// \brief Destructor ~ToNumber() = default; @@ -363,7 +376,8 @@ class ToNumber : public TensorTransform { std::shared_ptr Parse() override; private: - std::string data_type_; + struct Data; + std::shared_ptr data_; }; /// \brief Truncate a pair of rank-1 tensors such that the total length is less than max_length. @@ -381,7 +395,8 @@ class TruncateSequencePair : public TensorTransform { std::shared_ptr Parse() override; private: - int32_t max_length_; + struct Data; + std::shared_ptr data_; }; /// \brief Tokenize a scalar tensor of UTF-8 string to Unicode characters. @@ -399,7 +414,8 @@ class UnicodeCharTokenizer : public TensorTransform { std::shared_ptr Parse() override; private: - bool with_offsets_; + struct Data; + std::shared_ptr data_; }; #ifndef _WIN32 @@ -419,8 +435,8 @@ class UnicodeScriptTokenizer : public TensorTransform { std::shared_ptr Parse() override; private: - bool keep_whitespace_; - bool with_offsets_; + struct Data; + std::shared_ptr data_; }; /// \brief Tokenize a scalar tensor of UTF-8 string on ICU4C defined whitespaces. @@ -438,7 +454,8 @@ class WhitespaceTokenizer : public TensorTransform { std::shared_ptr Parse() override; private: - bool with_offsets_; + struct Data; + std::shared_ptr data_; }; #endif } // namespace text diff --git a/mindspore/ccsrc/minddata/dataset/include/transforms.h b/mindspore/ccsrc/minddata/dataset/include/transforms.h index fce85e30b5b..08c2a8aa268 100644 --- a/mindspore/ccsrc/minddata/dataset/include/transforms.h +++ b/mindspore/ccsrc/minddata/dataset/include/transforms.h @@ -22,6 +22,7 @@ #include #include +#include "include/api/dual_abi_helper.h" #include "include/api/status.h" #include "minddata/dataset/include/constants.h" @@ -72,7 +73,8 @@ class Compose : public TensorTransform { std::shared_ptr Parse() override; private: - std::vector> transforms_; + struct Data; + std::shared_ptr data_; }; /// \brief Duplicate Op. @@ -107,7 +109,8 @@ class OneHot : public TensorTransform { std::shared_ptr Parse() override; private: - float num_classes_; + struct Data; + std::shared_ptr data_; }; /// \brief RandomApply Op. @@ -129,8 +132,8 @@ class RandomApply : public TensorTransform { std::shared_ptr Parse() override; private: - std::vector> transforms_; - double prob_; + struct Data; + std::shared_ptr data_; }; /// \brief RandomChoice Op. @@ -151,7 +154,8 @@ class RandomChoice : public TensorTransform { std::shared_ptr Parse() override; private: - std::vector> transforms_; + struct Data; + std::shared_ptr data_; }; /// \brief TypeCast Op. @@ -160,7 +164,9 @@ class TypeCast : public TensorTransform { public: /// \brief Constructor. /// \param[in] data_type mindspore.dtype to be cast to. - explicit TypeCast(std::string data_type); + explicit TypeCast(std::string data_type) : TypeCast(StringToChar(data_type)) {} + + explicit TypeCast(const std::vector &data_type); /// \brief Destructor ~TypeCast() = default; @@ -170,7 +176,8 @@ class TypeCast : public TensorTransform { std::shared_ptr Parse() override; private: - std::string data_type_; + struct Data; + std::shared_ptr data_; }; /// \brief Unique Op. diff --git a/mindspore/ccsrc/minddata/dataset/include/vision.h b/mindspore/ccsrc/minddata/dataset/include/vision.h index c69a9969096..c283b91c135 100644 --- a/mindspore/ccsrc/minddata/dataset/include/vision.h +++ b/mindspore/ccsrc/minddata/dataset/include/vision.h @@ -23,6 +23,7 @@ #include #include +#include "include/api/dual_abi_helper.h" #include "include/api/status.h" #include "minddata/dataset/include/constants.h" #include "minddata/dataset/include/transforms.h" @@ -52,8 +53,8 @@ class AutoContrast : public TensorTransform { std::shared_ptr Parse() override; private: - float cutoff_; - std::vector ignore_; + struct Data; + std::shared_ptr data_; }; /// \brief BoundingBoxAugment TensorTransform. @@ -83,8 +84,8 @@ class BoundingBoxAugment : public TensorTransform { std::shared_ptr Parse() override; private: - std::shared_ptr transform_; - float ratio_; + struct Data; + std::shared_ptr data_; }; /// \brief Constructor to apply CutMix on a batch of images @@ -106,9 +107,8 @@ class CutMixBatch : public TensorTransform { std::shared_ptr Parse() override; private: - float alpha_; - float prob_; - ImageBatchFormat image_batch_format_; + struct Data; + std::shared_ptr data_; }; /// \brief CutOut TensorOp @@ -128,8 +128,8 @@ class CutOut : public TensorTransform { std::shared_ptr Parse() override; private: - int32_t length_; - int32_t num_patches_; + struct Data; + std::shared_ptr data_; }; /// \brief Equalize TensorTransform. @@ -194,7 +194,8 @@ class MixUpBatch : public TensorTransform { std::shared_ptr Parse() override; private: - float alpha_; + struct Data; + std::shared_ptr data_; }; /// \brief NormalizePad TensorTransform. @@ -210,7 +211,10 @@ class NormalizePad : public TensorTransform { /// \param[in] dtype The output datatype of Tensor. /// The standard deviation values must be "float32" or "float16"(default = "float32") explicit NormalizePad(const std::vector &mean, const std::vector &std, - const std::string &dtype = "float32"); + const std::string &dtype = "float32") + : NormalizePad(mean, std, StringToChar(dtype)) {} + + explicit NormalizePad(const std::vector &mean, const std::vector &std, const std::vector &dtype); /// \brief Destructor. ~NormalizePad() = default; @@ -220,9 +224,8 @@ class NormalizePad : public TensorTransform { std::shared_ptr Parse() override; private: - std::vector mean_; - std::vector std_; - std::string dtype_; + struct Data; + std::shared_ptr data_; }; /// \brief Pad TensorOp @@ -257,9 +260,8 @@ class Pad : public TensorTransform { std::shared_ptr Parse() override; private: - std::vector padding_; - std::vector fill_value_; - BorderType padding_mode_; + struct Data; + std::shared_ptr data_; }; /// \brief Blends an image with its grayscale version with random weights @@ -280,8 +282,8 @@ class RandomColor : public TensorTransform { std::shared_ptr Parse() override; private: - float t_lb_; - float t_ub_; + struct Data; + std::shared_ptr data_; }; /// \brief RandomColorAdjust TensorTransform. @@ -309,10 +311,8 @@ class RandomColorAdjust : public TensorTransform { std::shared_ptr Parse() override; private: - std::vector brightness_; - std::vector contrast_; - std::vector saturation_; - std::vector hue_; + struct Data; + std::shared_ptr data_; }; /// \brief RandomCrop TensorTransform. @@ -346,11 +346,8 @@ class RandomCrop : public TensorTransform { std::shared_ptr Parse() override; private: - std::vector size_; - std::vector padding_; - bool pad_if_needed_; - std::vector fill_value_; - BorderType padding_mode_; + struct Data; + std::shared_ptr data_; }; /// \brief RandomCropDecodeResize TensorTransform. @@ -381,11 +378,8 @@ class RandomCropDecodeResize : public TensorTransform { std::shared_ptr Parse() override; private: - std::vector size_; - std::vector scale_; - std::vector ratio_; - InterpolationMode interpolation_; - int32_t max_attempts_; + struct Data; + std::shared_ptr data_; }; /// \brief RandomCropWithBBox TensorTransform. @@ -421,11 +415,8 @@ class RandomCropWithBBox : public TensorTransform { std::shared_ptr Parse() override; private: - std::vector size_; - std::vector padding_; - bool pad_if_needed_; - std::vector fill_value_; - BorderType padding_mode_; + struct Data; + std::shared_ptr data_; }; /// \brief RandomHorizontalFlip TensorTransform. @@ -444,7 +435,8 @@ class RandomHorizontalFlip : public TensorTransform { std::shared_ptr Parse() override; private: - float probability_; + struct Data; + std::shared_ptr data_; }; /// \brief RandomHorizontalFlipWithBBox TensorTransform. @@ -463,7 +455,8 @@ class RandomHorizontalFlipWithBBox : public TensorTransform { std::shared_ptr Parse() override; private: - float probability_; + struct Data; + std::shared_ptr data_; }; /// \brief RandomPosterize TensorTransform. @@ -482,7 +475,8 @@ class RandomPosterize : public TensorTransform { std::shared_ptr Parse() override; private: - std::vector bit_range_; + struct Data; + std::shared_ptr data_; }; /// \brief RandomResize TensorTransform. @@ -503,7 +497,8 @@ class RandomResize : public TensorTransform { std::shared_ptr Parse() override; private: - std::vector size_; + struct Data; + std::shared_ptr data_; }; /// \brief RandomResizeWithBBox TensorTransform. @@ -525,7 +520,8 @@ class RandomResizeWithBBox : public TensorTransform { std::shared_ptr Parse() override; private: - std::vector size_; + struct Data; + std::shared_ptr data_; }; /// \brief RandomResizedCrop TensorTransform. @@ -555,11 +551,8 @@ class RandomResizedCrop : public TensorTransform { std::shared_ptr Parse() override; private: - std::vector size_; - std::vector scale_; - std::vector ratio_; - InterpolationMode interpolation_; - int32_t max_attempts_; + struct Data; + std::shared_ptr data_; }; /// \brief RandomResizedCropWithBBox TensorTransform. @@ -589,11 +582,8 @@ class RandomResizedCropWithBBox : public TensorTransform { std::shared_ptr Parse() override; private: - std::vector size_; - std::vector scale_; - std::vector ratio_; - InterpolationMode interpolation_; - int32_t max_attempts_; + struct Data; + std::shared_ptr data_; }; /// \brief RandomRotation TensorOp @@ -620,11 +610,8 @@ class RandomRotation : public TensorTransform { std::shared_ptr Parse() override; private: - std::vector degrees_; - InterpolationMode interpolation_mode_; - std::vector center_; - bool expand_; - std::vector fill_value_; + struct Data; + std::shared_ptr data_; }; /// \brief RandomSelectSubpolicy TensorTransform. @@ -655,7 +642,8 @@ class RandomSelectSubpolicy : public TensorTransform { std::shared_ptr Parse() override; private: - std::vector, double>>> policy_; + struct Data; + std::shared_ptr data_; }; /// \brief RandomSharpness TensorTransform. @@ -675,7 +663,8 @@ class RandomSharpness : public TensorTransform { std::shared_ptr Parse() override; private: - std::vector degrees_; + struct Data; + std::shared_ptr data_; }; /// \brief RandomSolarize TensorTransform. @@ -695,7 +684,8 @@ class RandomSolarize : public TensorTransform { std::shared_ptr Parse() override; private: - std::vector threshold_; + struct Data; + std::shared_ptr data_; }; /// \brief RandomVerticalFlip TensorTransform. @@ -714,7 +704,8 @@ class RandomVerticalFlip : public TensorTransform { std::shared_ptr Parse() override; private: - float probability_; + struct Data; + std::shared_ptr data_; }; /// \brief RandomVerticalFlipWithBBox TensorTransform. @@ -733,7 +724,8 @@ class RandomVerticalFlipWithBBox : public TensorTransform { std::shared_ptr Parse() override; private: - float probability_; + struct Data; + std::shared_ptr data_; }; /// \brief RescaleOperation TensorTransform. @@ -753,8 +745,8 @@ class Rescale : public TensorTransform { std::shared_ptr Parse() override; private: - float rescale_; - float shift_; + struct Data; + std::shared_ptr data_; }; /// \brief ResizeWithBBox TensorTransform. @@ -776,8 +768,8 @@ class ResizeWithBBox : public TensorTransform { std::shared_ptr Parse() override; private: - std::vector size_; - InterpolationMode interpolation_; + struct Data; + std::shared_ptr data_; }; /// \brief RgbaToBgr TensorTransform. @@ -833,10 +825,8 @@ class SoftDvppDecodeRandomCropResizeJpeg : public TensorTransform { std::shared_ptr Parse() override; private: - std::vector size_; - std::vector scale_; - std::vector ratio_; - int32_t max_attempts_; + struct Data; + std::shared_ptr data_; }; /// \brief SoftDvppDecodeResizeJpeg TensorTransform. @@ -864,7 +854,8 @@ class SoftDvppDecodeResizeJpeg : public TensorTransform { std::shared_ptr Parse() override; private: - std::vector size_; + struct Data; + std::shared_ptr data_; }; /// \brief SwapRedBlue TensorOp @@ -909,8 +900,8 @@ class UniformAugment : public TensorTransform { std::shared_ptr Parse() override; private: - std::vector> transforms_; - int32_t num_ops_; + struct Data; + std::shared_ptr data_; }; } // namespace vision diff --git a/mindspore/ccsrc/minddata/dataset/include/vision_ascend.h b/mindspore/ccsrc/minddata/dataset/include/vision_ascend.h index 5fcf8841e1c..9a947941db7 100644 --- a/mindspore/ccsrc/minddata/dataset/include/vision_ascend.h +++ b/mindspore/ccsrc/minddata/dataset/include/vision_ascend.h @@ -54,7 +54,8 @@ class DvppDecodeResizeJpeg : public TensorTransform { std::shared_ptr Parse(const MapTargetDevice &env) override; private: - std::vector resize_; + struct Data; + std::shared_ptr data_; }; class DvppDecodeResizeCropJpeg : public TensorTransform { @@ -74,8 +75,8 @@ class DvppDecodeResizeCropJpeg : public TensorTransform { std::shared_ptr Parse(const MapTargetDevice &env) override; private: - std::vector crop_; - std::vector resize_; + struct Data; + std::shared_ptr data_; }; class DvppDecodePng : public TensorTransform { diff --git a/mindspore/ccsrc/minddata/dataset/include/vision_lite.h b/mindspore/ccsrc/minddata/dataset/include/vision_lite.h index ff82be7256e..7fca4a13f93 100644 --- a/mindspore/ccsrc/minddata/dataset/include/vision_lite.h +++ b/mindspore/ccsrc/minddata/dataset/include/vision_lite.h @@ -62,12 +62,8 @@ class Affine : public TensorTransform { std::shared_ptr Parse() override; private: - float degrees_; - std::vector translation_; - float scale_; - std::vector shear_; - InterpolationMode interpolation_; - std::vector fill_value_; + struct Data; + std::shared_ptr data_; }; /// \brief CenterCrop TensorTransform. @@ -90,7 +86,8 @@ class CenterCrop : public TensorTransform { std::shared_ptr Parse(const MapTargetDevice &env) override; private: - std::vector size_; + struct Data; + std::shared_ptr data_; }; /// \brief Crop TensorTransform. @@ -112,8 +109,8 @@ class Crop : public TensorTransform { std::shared_ptr Parse() override; private: - std::vector coordinates_; - std::vector size_; + struct Data; + std::shared_ptr data_; }; /// \brief Decode TensorTransform. @@ -134,7 +131,8 @@ class Decode : public TensorTransform { std::shared_ptr Parse(const MapTargetDevice &env) override; private: - bool rgb_; + struct Data; + std::shared_ptr data_; }; /// \brief Normalize TensorTransform. @@ -158,8 +156,8 @@ class Normalize : public TensorTransform { std::shared_ptr Parse(const MapTargetDevice &env) override; private: - std::vector mean_; - std::vector std_; + struct Data; + std::shared_ptr data_; }; /// \brief RandomAffine TensorTransform. @@ -196,12 +194,8 @@ class RandomAffine : public TensorTransform { std::shared_ptr Parse() override; private: - std::vector degrees_; // min_degree, max_degree - std::vector translate_range_; // maximum x translation percentage, maximum y translation percentage - std::vector scale_range_; // min_scale, max_scale - std::vector shear_ranges_; // min_x_shear, max_x_shear, min_y_shear, max_y_shear - InterpolationMode interpolation_; - std::vector fill_value_; + struct Data; + std::shared_ptr data_; }; /// \brief Resize TensorTransform. @@ -225,8 +219,8 @@ class Resize : public TensorTransform { std::shared_ptr Parse(const MapTargetDevice &env) override; private: - std::vector size_; - InterpolationMode interpolation_; + struct Data; + std::shared_ptr data_; }; /// \brief Rotate TensorTransform. diff --git a/mindspore/ccsrc/minddata/dataset/liteapi/include/datasets.h b/mindspore/ccsrc/minddata/dataset/liteapi/include/datasets.h index 86a5327553a..752b2d44e9a 100644 --- a/mindspore/ccsrc/minddata/dataset/liteapi/include/datasets.h +++ b/mindspore/ccsrc/minddata/dataset/liteapi/include/datasets.h @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -28,8 +29,10 @@ #include #include +#include "include/api/dual_abi_helper.h" #include "include/iterator.h" #include "include/samplers.h" +#include "include/transforms.h" namespace mindspore { namespace dataset { @@ -94,11 +97,13 @@ class Dataset : public std::enable_shared_from_this { /// \brief Gets the column names /// \return Names of the columns. If failed, return an empty vector - std::vector GetColumnNames(); + std::vector GetColumnNames() { return VectorCharToString(GetColumnNamesCharIF()); } /// \brief Gets the class indexing /// \return a map of ClassIndexing. If failed, return an empty map - std::vector>> GetClassIndexing(); + std::vector>> GetClassIndexing() { + return ClassIndexCharToString(GetClassIndexingCharIF()); + } /// \brief Setter function for runtime number of workers /// \param[in] num_workers The number of threads in this operator @@ -110,7 +115,9 @@ class Dataset : public std::enable_shared_from_this { /// \param[in] num_epochs Number of epochs to run through the pipeline, default -1 which means infinite epochs. /// An empty row is returned at the end of each epoch /// \return Shared pointer to the Iterator - std::shared_ptr CreateIterator(std::vector columns = {}, int32_t num_epochs = -1); + std::shared_ptr CreateIterator(std::vector columns = {}, int32_t num_epochs = -1) { + return CreateIteratorCharIF(VectorStringToChar(columns), num_epochs); + } /// \brief Function to create a BatchDataset /// \notes Combines batch_size number of consecutive rows into batches @@ -138,14 +145,49 @@ class Dataset : public std::enable_shared_from_this { /// \param[in] project_columns A list of column names to project /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current MapDataset - std::shared_ptr Map(std::vector> operations, + std::shared_ptr Map(std::vector operations, const std::vector &input_columns = {}, const std::vector &output_columns = {}, const std::vector &project_columns = {}, const std::shared_ptr &cache = nullptr, std::vector> callbacks = {}) { - return std::make_shared(shared_from_this(), operations, input_columns, output_columns, project_columns, - cache, callbacks); + std::vector> transform_ops; + (void)std::transform( + operations.begin(), operations.end(), std::back_inserter(transform_ops), + [](TensorTransform *op) -> std::shared_ptr { return op != nullptr ? op->Parse() : nullptr; }); + return std::make_shared(shared_from_this(), transform_ops, VectorStringToChar(input_columns), + VectorStringToChar(output_columns), VectorStringToChar(project_columns), cache, + callbacks); + } + + std::shared_ptr Map(std::vector> operations, + const std::vector &input_columns = {}, + const std::vector &output_columns = {}, + const std::vector &project_columns = {}, + const std::shared_ptr &cache = nullptr, + std::vector> callbacks = {}) { + std::vector> transform_ops; + (void)std::transform(operations.begin(), operations.end(), std::back_inserter(transform_ops), + [](std::shared_ptr op) -> std::shared_ptr { + return op != nullptr ? op->Parse() : nullptr; + }); + return std::make_shared(shared_from_this(), transform_ops, VectorStringToChar(input_columns), + VectorStringToChar(output_columns), VectorStringToChar(project_columns), cache, + callbacks); + } + + std::shared_ptr Map(const std::vector> operations, + const std::vector &input_columns = {}, + const std::vector &output_columns = {}, + const std::vector &project_columns = {}, + const std::shared_ptr &cache = nullptr, + std::vector> callbacks = {}) { + std::vector> transform_ops; + (void)std::transform(operations.begin(), operations.end(), std::back_inserter(transform_ops), + [](TensorTransform &op) -> std::shared_ptr { return op.Parse(); }); + return std::make_shared(shared_from_this(), transform_ops, VectorStringToChar(input_columns), + VectorStringToChar(output_columns), VectorStringToChar(project_columns), cache, + callbacks); } /// \brief Function to create a Project Dataset @@ -153,7 +195,7 @@ class Dataset : public std::enable_shared_from_this { /// \param[in] columns The name of columns to project /// \return Shared pointer to the current Dataset std::shared_ptr Project(const std::vector &columns) { - return std::make_shared(shared_from_this(), columns); + return std::make_shared(shared_from_this(), VectorStringToChar(columns)); } /// \brief Function to create a Shuffle Dataset @@ -169,6 +211,16 @@ class Dataset : public std::enable_shared_from_this { protected: std::shared_ptr tree_getters_; std::shared_ptr ir_node_; + + private: + // Char interface(CharIF) of GetColumnNames + std::vector> GetColumnNamesCharIF(); + + // Char interface(CharIF) of GetClassIndexing + std::vector, std::vector>> GetClassIndexingCharIF(); + + // Char interface(CharIF) of CreateIterator + std::shared_ptr CreateIteratorCharIF(std::vector> columns, int32_t num_epochs); }; class BatchDataset : public Dataset { @@ -180,15 +232,15 @@ class BatchDataset : public Dataset { class MapDataset : public Dataset { public: MapDataset(std::shared_ptr input, std::vector> operations, - const std::vector &input_columns, const std::vector &output_columns, - const std::vector &project_columns, const std::shared_ptr &cache, + const std::vector> &input_columns, const std::vector> &output_columns, + const std::vector> &project_columns, const std::shared_ptr &cache, std::vector> callbacks); ~MapDataset() = default; }; class ProjectDataset : public Dataset { public: - ProjectDataset(std::shared_ptr input, const std::vector &columns); + ProjectDataset(std::shared_ptr input, const std::vector> &columns); ~ProjectDataset() = default; }; @@ -201,14 +253,22 @@ class ShuffleDataset : public Dataset { /// \brief Function to create a SchemaObj /// \param[in] schema_file Path of schema file /// \return Shared pointer to the current schema -std::shared_ptr Schema(const std::string &schema_file = ""); +std::shared_ptr SchemaCharIF(const std::vector &schema_file); +inline std::shared_ptr Schema(const std::string &schema_file = "") { + return SchemaCharIF(StringToChar(schema_file)); +} class AlbumDataset : public Dataset { public: - AlbumDataset(const std::string &dataset_dir, const std::string &data_schema, - const std::vector &column_names = {}, bool decode = false, - const std::shared_ptr &sampler = RandomSampler(), - const std::shared_ptr &cache = nullptr); + AlbumDataset(const std::vector &dataset_dir, const std::vector &data_schema, + const std::vector> &column_names, bool decode, const std::shared_ptr &sampler, + const std::shared_ptr &cache); + AlbumDataset(const std::vector &dataset_dir, const std::vector &data_schema, + const std::vector> &column_names, bool decode, Sampler *sampler, + const std::shared_ptr &cache); + AlbumDataset(const std::vector &dataset_dir, const std::vector &data_schema, + const std::vector> &column_names, bool decode, + const std::reference_wrapper sampler, const std::shared_ptr &cache); ~AlbumDataset() = default; }; @@ -219,20 +279,58 @@ class AlbumDataset : public Dataset { /// \param[in] column_names Column names used to specify columns to load, if empty, will read all columns. /// (default = {}) /// \param[in] decode the option to decode the images in dataset (default = false) -/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given, +/// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. If sampler is not +/// given, /// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current Dataset -std::shared_ptr Album(const std::string &dataset_dir, const std::string &data_schema, - const std::vector &column_names = {}, bool decode = false, - const std::shared_ptr &sampler = RandomSampler(), - const std::shared_ptr &cache = nullptr); +inline std::shared_ptr Album(const std::string &dataset_dir, const std::string &data_schema, + const std::vector &column_names = {}, bool decode = false, + const std::shared_ptr &sampler = std::make_shared(), + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(data_schema), + VectorStringToChar(column_names), decode, sampler, cache); +} +/// \brief Function to create an AlbumDataset +/// \notes The generated dataset is specified through setting a schema +/// \param[in] dataset_dir Path to the root directory that contains the dataset +/// \param[in] data_schema Path to dataset schema file +/// \param[in] column_names Column names used to specify columns to load +/// \param[in] decode the option to decode the images in dataset +/// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset. +/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). +/// \return Shared pointer to the current Dataset +inline std::shared_ptr Album(const std::string &dataset_dir, const std::string &data_schema, + const std::vector &column_names, bool decode, Sampler *sampler, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(data_schema), + VectorStringToChar(column_names), decode, sampler, cache); +} +/// \brief Function to create an AlbumDataset +/// \notes The generated dataset is specified through setting a schema +/// \param[in] dataset_dir Path to the root directory that contains the dataset +/// \param[in] data_schema Path to dataset schema file +/// \param[in] column_names Column names used to specify columns to load +/// \param[in] decode the option to decode the images in dataset +/// \param[in] sampler Sampler object used to choose samples from the dataset. +/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). +/// \return Shared pointer to the current Dataset +inline std::shared_ptr Album(const std::string &dataset_dir, const std::string &data_schema, + const std::vector &column_names, bool decode, + const std::reference_wrapper sampler, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(data_schema), + VectorStringToChar(column_names), decode, sampler, cache); +} class MnistDataset : public Dataset { public: - explicit MnistDataset(const std::string &dataset_dir, const std::string &usage = "all", - const std::shared_ptr &sampler = RandomSampler(), - const std::shared_ptr &cache = nullptr); + explicit MnistDataset(const std::vector &dataset_dir, const std::vector &usage, + const std::shared_ptr &sampler, const std::shared_ptr &cache); + explicit MnistDataset(const std::vector &dataset_dir, const std::vector &usage, Sampler *sampler, + const std::shared_ptr &cache); + explicit MnistDataset(const std::vector &dataset_dir, const std::vector &usage, + const std::reference_wrapper sampler, const std::shared_ptr &cache); ~MnistDataset() = default; }; @@ -240,13 +338,41 @@ class MnistDataset : public Dataset { /// \notes The generated dataset has two columns ["image", "label"] /// \param[in] dataset_dir Path to the root directory that contains the dataset /// \param[in] usage of MNIST, can be "train", "test" or "all" (default = "all"). -/// \param[in] sampler Object used to choose samples from the dataset. If sampler is not given, +/// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. If sampler is not +/// given, /// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). /// \return Shared pointer to the current MnistDataset -std::shared_ptr Mnist(const std::string &dataset_dir, const std::string &usage = "all", - const std::shared_ptr &sampler = RandomSampler(), - const std::shared_ptr &cache = nullptr); +inline std::shared_ptr Mnist(const std::string &dataset_dir, const std::string &usage = "all", + const std::shared_ptr &sampler = std::make_shared(), + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, cache); +} + +/// \brief Function to create a MnistDataset +/// \notes The generated dataset has two columns ["image", "label"] +/// \param[in] dataset_dir Path to the root directory that contains the dataset +/// \param[in] usage of MNIST, can be "train", "test" or "all" +/// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset. +/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). +/// \return Shared pointer to the current MnistDataset +inline std::shared_ptr Mnist(const std::string &dataset_dir, const std::string &usage, Sampler *sampler, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, cache); +} + +/// \brief Function to create a MnistDataset +/// \notes The generated dataset has two columns ["image", "label"] +/// \param[in] dataset_dir Path to the root directory that contains the dataset +/// \param[in] usage of MNIST, can be "train", "test" or "all" +/// \param[in] sampler Sampler object used to choose samples from the dataset. +/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). +/// \return Shared pointer to the current MnistDataset +inline std::shared_ptr Mnist(const std::string &dataset_dir, const std::string &usage, + const std::reference_wrapper sampler, + const std::shared_ptr &cache = nullptr) { + return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, cache); +} } // namespace dataset } // namespace mindspore diff --git a/mindspore/ccsrc/minddata/dataset/liteapi/include/execute.h b/mindspore/ccsrc/minddata/dataset/liteapi/include/execute.h index 5229b097296..48149430342 100644 --- a/mindspore/ccsrc/minddata/dataset/liteapi/include/execute.h +++ b/mindspore/ccsrc/minddata/dataset/liteapi/include/execute.h @@ -26,16 +26,27 @@ namespace mindspore { namespace dataset { +class DeviceResource; // class to run tensor operations in eager mode class Execute { public: /// \brief Constructor - explicit Execute(std::shared_ptr op); + // FIXME - Temporarily overload Execute to support both TensorOperation and TensorTransform + explicit Execute(std::shared_ptr op, MapTargetDevice deviceType = MapTargetDevice::kCpu); + explicit Execute(std::shared_ptr op, MapTargetDevice deviceType = MapTargetDevice::kCpu); + // explicit Execute(TensorTransform op, MapTargetDevice deviceType = MapTargetDevice::KCpu); + explicit Execute(TensorTransform *op, MapTargetDevice deviceType = MapTargetDevice::kCpu); - explicit Execute(std::vector> ops); + explicit Execute(std::vector> ops, + MapTargetDevice deviceType = MapTargetDevice::kCpu); + explicit Execute(std::vector> ops, + MapTargetDevice deviceType = MapTargetDevice::kCpu); + explicit Execute(const std::vector> ops, + MapTargetDevice deviceType = MapTargetDevice::kCpu); + explicit Execute(std::vector ops, MapTargetDevice deviceType = MapTargetDevice::kCpu); /// \brief Destructor - ~Execute() = default; + ~Execute(); /// \brief callable function to execute the TensorOperation in eager mode /// \param[in] input Tensor to be transformed @@ -49,8 +60,16 @@ class Execute { /// \return - Status Status operator()(const std::vector &input_tensor_list, std::vector *out); + Status DeviceMemoryRelease(); + private: + Status validate_device_(); + std::vector> ops_; + + MapTargetDevice device_type_; + + std::shared_ptr device_resource_; }; } // namespace dataset diff --git a/mindspore/ccsrc/minddata/dataset/liteapi/include/iterator.h b/mindspore/ccsrc/minddata/dataset/liteapi/include/iterator.h index 263fbfbc0d5..b69cb33a1cc 100644 --- a/mindspore/ccsrc/minddata/dataset/liteapi/include/iterator.h +++ b/mindspore/ccsrc/minddata/dataset/liteapi/include/iterator.h @@ -17,10 +17,12 @@ #ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_ITERATOR_H_ #define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_ITERATOR_H_ +#include #include #include #include #include +#include "include/api/dual_abi_helper.h" #include "include/api/status.h" #include "include/api/types.h" @@ -39,6 +41,7 @@ class IteratorConsumer; class Dataset; using MSTensorMap = std::unordered_map; +using MSTensorMapChar = std::map, mindspore::MSTensor>; using MSTensorVec = std::vector; // Abstract class for iterating over the dataset. @@ -60,7 +63,18 @@ class Iterator { /// \note Type of return data is a map(with column name). /// \param[out] row - the output tensor row. /// \return - a Status error code, returns OK if no error encountered. - Status GetNextRow(MSTensorMap *row); + Status GetNextRow(MSTensorMap *row) { + MSTensorMapChar row_; + row_.clear(); + row->clear(); + Status s = GetNextRowCharIF(&row_); + TensorMapCharToString(&row_, row); + return s; + } + + // Char interface(CharIF) of GetNextRow + // This api exists because std::string will constrained by ABI compile macro but char don't. + Status GetNextRowCharIF(MSTensorMapChar *row); /// \brief Function to get the next row from the data pipeline. /// \note Type of return data is a vector(without column name). diff --git a/mindspore/ccsrc/minddata/dataset/liteapi/include/samplers.h b/mindspore/ccsrc/minddata/dataset/liteapi/include/samplers.h index 2a86c3afa04..c2434d1d26b 100644 --- a/mindspore/ccsrc/minddata/dataset/liteapi/include/samplers.h +++ b/mindspore/ccsrc/minddata/dataset/liteapi/include/samplers.h @@ -26,143 +26,59 @@ namespace mindspore { namespace dataset { -// Internal Sampler class forward declaration -class SamplerRT; +class SamplerObj; + +// Abstract class to represent a sampler in the data pipeline. +/// \class Sampler samplers.h +/// \brief An abstract base class to represent a sampler in the data pipeline. +class Sampler : std::enable_shared_from_this { + friend class AlbumDataset; + friend class MindDataDataset; + friend std::shared_ptr SelectSampler(int64_t, bool, int32_t, int32_t); -class SamplerObj { public: /// \brief Constructor - SamplerObj(); + Sampler() {} /// \brief Destructor - ~SamplerObj() = default; + ~Sampler() = default; - /// \brief Pure virtual function for derived class to implement parameters validation - /// \return The Status code of the function. It returns OK status if parameters are valid. - virtual Status ValidateParams() = 0; - - /// \brief Pure virtual function to convert a SamplerObj class into a runtime sampler object - /// \return Shared pointers to the newly created Sampler - virtual std::shared_ptr SamplerBuild() = 0; - - /// \brief Pure virtual function to copy a SamplerObj class - /// \return Shared pointers to the newly copied SamplerObj - virtual std::shared_ptr SamplerCopy() = 0; - - /// \brief Function for derived class to get the shard id of sampler - /// \return The shard id of the derived sampler - virtual int64_t ShardId() { return 0; } - - /// \brief Adds a child to the sampler - /// \param[in] child The sampler to be added as child - /// \return the Status code returned - Status AddChildSampler(std::shared_ptr child); - - std::vector> GetChild() { return children_; } + /// \brief A virtual function to add a child sampler. + /// \param[in] child The child sampler to be added as a children of this sampler. + virtual void AddChild(std::shared_ptr child) { children_.push_back(child); } protected: - /// \brief A function that calls build on the children of this sampler - /// \param[in] sampler The samplerRT object built from this sampler - void BuildChildren(std::shared_ptr sampler); + /// \brief Pure virtual function to convert a Sampler class into an IR Sampler object. + /// \return shared pointer to the newly created TensorOperation. + virtual std::shared_ptr Parse() = 0; - std::vector> children_; + std::vector> children_; }; -class DistributedSamplerObj; -class PKSamplerObj; -class PreBuiltSamplerObj; -class RandomSamplerObj; -class SequentialSamplerObj; -class SubsetSamplerObj; -class SubsetRandomSamplerObj; -class WeightedRandomSamplerObj; +/// \brief A class to represent a Distributed Sampler in the data pipeline. +/// \notes A Sampler that accesses a shard of the dataset. +class DistributedSampler : public Sampler { + friend std::shared_ptr SelectSampler(int64_t, bool, int32_t, int32_t); -/// Function to create a Distributed Sampler. -/// \notes A Sampler that access a shard of the dataset. -/// \param[in] num_shards - Number of shards to divide the dataset into. -/// \param[in] shard_id - Shard ID of the current shard within num_shards. -/// \param[in] shuffle - If true, the indices are shuffled. -/// \param[in] num_samples - The number of samples to draw (default to all elements). -/// \param[in] seed - The seed in use when shuffle is true. -/// \param[in] offset - The starting position where access to elements in the dataset begins. -/// \param[in] even_dist - If true, each shard would return the same number of rows (default to true). -/// If false the total rows returned by all the shards would not have overlap. -/// \return Shared pointer to the current Sampler. -std::shared_ptr DistributedSampler(int64_t num_shards, int64_t shard_id, bool shuffle = true, - int64_t num_samples = 0, uint32_t seed = 1, - int64_t offset = -1, bool even_dist = true); - -/// Function to create a PK Sampler. -/// \notes Samples K elements for each P class in the dataset. -/// This will sample all classes. -/// \param[in] num_val - Number of elements to sample for each class. -/// \param[in] shuffle - If true, the class IDs are shuffled. -/// \param[in] num_samples - The number of samples to draw (default to all elements). -/// \return Shared pointer to the current Sampler. -std::shared_ptr PKSampler(int64_t num_val, bool shuffle = false, int64_t num_samples = 0); - -/// Function to create a Random Sampler. -/// \notes Samples the elements randomly. -/// \param[in] replacement - If true, put the sample ID back for the next draw. -/// \param[in] num_samples - The number of samples to draw (default to all elements). -/// \return Shared pointer to the current Sampler. -std::shared_ptr RandomSampler(bool replacement = false, int64_t num_samples = 0); - -/// Function to create a Sequential Sampler. -/// \notes Samples the dataset elements sequentially, same as not having a sampler. -/// \param[in] start_index - Index to start sampling at (default to start at first id). -/// \param[in] num_samples - The number of samples to draw (default to all elements). -/// \return Shared pointer to the current Sampler. -std::shared_ptr SequentialSampler(int64_t start_index = 0, int64_t num_samples = 0); - -/// Function to create a Subset Sampler. -/// \notes Samples the elements from a sequence of indices. -/// \param[in] indices - A vector sequence of indices. -/// \param[in] num_samples - The number of samples to draw (default to all elements). -/// \return Shared pointer to the current Sampler. -std::shared_ptr SubsetSampler(std::vector indices, int64_t num_samples = 0); - -/// Function to create a Subset Random Sampler. -/// \notes Samples the elements randomly from a sequence of indices. -/// \param[in] indices - A vector sequence of indices. -/// \param[in] num_samples - The number of samples to draw (default to all elements). -/// \return Shared pointer to the current Sampler. -std::shared_ptr SubsetRandomSampler(std::vector indices, int64_t num_samples = 0); - -/// Function to create a Weighted Random Sampler. -/// \notes Samples the elements from [0, len(weights) - 1] randomly with the given -/// weights (probabilities). -/// \param[in] weights - A vector sequence of weights, not necessarily summing up to 1. -/// \param[in] num_samples - The number of samples to draw (default to all elements). -/// \param[in] replacement - If true, put the sample ID back for the next draw. -/// \return Shared pointer to the current Sampler. -std::shared_ptr WeightedRandomSampler(std::vector weights, int64_t num_samples = 0, - bool replacement = true); - -/* ####################################### Derived Sampler classes ################################# */ -class DistributedSamplerObj : public SamplerObj { public: - DistributedSamplerObj(int64_t num_shards, int64_t shard_id, bool shuffle, int64_t num_samples, uint32_t seed, - int64_t offset, bool even_dist); + /// \brief Constructor + /// \param[in] num_shards - Number of shards to divide the dataset into. + /// \param[in] shard_id - Shard ID of the current shard within num_shards. + /// \param[in] shuffle - If true, the indices are shuffled. + /// \param[in] num_samples - The number of samples to draw (default to all elements). + /// \param[in] seed - The seed in use when shuffle is true. + /// \param[in] offset - The starting position where access to elements in the dataset begins. + /// \param[in] even_dist - If true, each shard would return the same number of rows (default to true). + /// If false the total rows returned by all the shards would not have overlap. + explicit DistributedSampler(int64_t num_shards, int64_t shard_id, bool shuffle = true, int64_t num_samples = 0, + uint32_t seed = 1, int64_t offset = -1, bool even_dist = true); + /// \brief Destructor. + ~DistributedSampler() = default; - virtual ~DistributedSamplerObj() = default; - - std::shared_ptr SamplerBuild() override; - - std::shared_ptr SamplerCopy() override { - auto sampler = std::make_shared(num_shards_, shard_id_, shuffle_, num_samples_, seed_, - offset_, even_dist_); - for (auto child : children_) { - sampler->AddChildSampler(child); - } - return sampler; - } - - Status ValidateParams() override; - - /// \brief Function to get the shard id of sampler - /// \return The shard id of sampler - int64_t ShardId() override { return shard_id_; } + protected: + /// \brief Function to convert a Sampler into an IR SamplerObj. + /// \return shared pointer to the newly created SamplerObj. + std::shared_ptr Parse() override; private: int64_t num_shards_; @@ -174,23 +90,26 @@ class DistributedSamplerObj : public SamplerObj { bool even_dist_; }; -class PKSamplerObj : public SamplerObj { +/// \brief A class to represent a PK Sampler in the data pipeline. +/// \notes Samples K elements for each P class in the dataset. +/// This will sample all classes. +class PKSampler : public Sampler { + friend std::shared_ptr SelectSampler(int64_t, bool, int32_t, int32_t); + public: - PKSamplerObj(int64_t num_val, bool shuffle, int64_t num_samples); + /// \brief Constructor + /// \param[in] num_val - Number of elements to sample for each class. + /// \param[in] shuffle - If true, the class IDs are shuffled. + /// \param[in] num_samples - The number of samples to draw (default to all elements). + explicit PKSampler(int64_t num_val, bool shuffle = false, int64_t num_samples = 0); - virtual ~PKSamplerObj() = default; + /// \brief Destructor. + ~PKSampler() = default; - std::shared_ptr SamplerBuild() override; - - std::shared_ptr SamplerCopy() override { - auto sampler = std::make_shared(num_val_, shuffle_, num_samples_); - for (auto child : children_) { - sampler->AddChildSampler(child); - } - return sampler; - } - - Status ValidateParams() override; + protected: + /// \brief Function to convert a Sampler into an IR SamplerObj. + /// \return shared pointer to the newly created SamplerObj. + std::shared_ptr Parse() override; private: int64_t num_val_; @@ -198,131 +117,120 @@ class PKSamplerObj : public SamplerObj { int64_t num_samples_; }; -class PreBuiltSamplerObj : public SamplerObj { +/// \brief A class to represent a Random Sampler in the data pipeline. +/// \notes Samples the elements randomly. +class RandomSampler : public Sampler { + friend std::shared_ptr SelectSampler(int64_t, bool, int32_t, int32_t); + public: - explicit PreBuiltSamplerObj(std::shared_ptr sampler); + /// \brief Constructor + /// \param[in] replacement - If true, put the sample ID back for the next draw. + /// \param[in] num_samples - The number of samples to draw (default to all elements). + explicit RandomSampler(bool replacement = false, int64_t num_samples = 0); - ~PreBuiltSamplerObj() = default; + /// \brief Destructor. + ~RandomSampler() = default; - std::shared_ptr SamplerBuild() override; - - std::shared_ptr SamplerCopy() override; - - Status ValidateParams() override; - - private: - std::shared_ptr sp_; -}; - -class RandomSamplerObj : public SamplerObj { - public: - RandomSamplerObj(bool replacement, int64_t num_samples, bool reshuffle_each_epoch = true); - - virtual ~RandomSamplerObj() = default; - - std::shared_ptr SamplerBuild() override; - - std::shared_ptr SamplerCopy() override { - auto sampler = std::make_shared(replacement_, num_samples_, reshuffle_each_epoch_); - for (auto child : children_) { - sampler->AddChildSampler(child); - } - return sampler; - } - - Status ValidateParams() override; + protected: + /// \brief Function to convert a Sampler into an IR SamplerObj. + /// \return shared pointer to the newly created SamplerObj. + std::shared_ptr Parse() override; private: bool replacement_; int64_t num_samples_; - bool reshuffle_each_epoch_; }; -class SequentialSamplerObj : public SamplerObj { +/// \brief A class to represent a Sequential Sampler in the data pipeline. +/// \notes Samples the dataset elements sequentially, same as not having a sampler. +class SequentialSampler : public Sampler { + friend std::shared_ptr SelectSampler(int64_t, bool, int32_t, int32_t); + public: - SequentialSamplerObj(int64_t start_index, int64_t num_samples); + /// \brief Constructor + /// \param[in] start_index - Index to start sampling at (default to start at first id). + /// \param[in] num_samples - The number of samples to draw (default to all elements). + explicit SequentialSampler(int64_t start_index = 0, int64_t num_samples = 0); - virtual ~SequentialSamplerObj() = default; + /// \brief Destructor. + ~SequentialSampler() = default; - std::shared_ptr SamplerBuild() override; - - std::shared_ptr SamplerCopy() override { - auto sampler = std::make_shared(start_index_, num_samples_); - for (auto child : children_) { - sampler->AddChildSampler(child); - } - return sampler; - } - - Status ValidateParams() override; + protected: + /// \brief Function to convert a Sampler into an IR SamplerObj. + /// \return shared pointer to the newly created SamplerObj. + std::shared_ptr Parse() override; private: int64_t start_index_; int64_t num_samples_; }; -class SubsetSamplerObj : public SamplerObj { +/// \brief A class to represent a Subset Sampler in the data pipeline. +/// \notes Samples the elements from a sequence of indices. +class SubsetSampler : public Sampler { + friend std::shared_ptr SelectSampler(int64_t, bool, int32_t, int32_t); + public: - SubsetSamplerObj(std::vector indices, int64_t num_samples); + /// \brief Constructor + /// \param[in] indices - A vector sequence of indices. + /// \param[in] num_samples - The number of samples to draw (default to all elements). + explicit SubsetSampler(std::vector indices, int64_t num_samples = 0); - virtual ~SubsetSamplerObj() = default; - - std::shared_ptr SamplerBuild() override; - - std::shared_ptr SamplerCopy() override { - auto sampler = std::make_shared(indices_, num_samples_); - for (auto child : children_) { - sampler->AddChildSampler(child); - } - return sampler; - } - - Status ValidateParams() override; + /// \brief Destructor. + ~SubsetSampler() = default; protected: - const std::vector indices_; + /// \brief Function to convert a Sampler into an IR SamplerObj. + /// \return shared pointer to the newly created SamplerObj. + std::shared_ptr Parse() override; + + std::vector indices_; int64_t num_samples_; }; -class SubsetRandomSamplerObj : public SubsetSamplerObj { +/// \brief A class to represent a Subset Random Sampler in the data pipeline. +/// \notes Samples the elements randomly from a sequence of indices. +class SubsetRandomSampler : public SubsetSampler { + friend std::shared_ptr SelectSampler(int64_t, bool, int32_t, int32_t); + public: - SubsetRandomSamplerObj(std::vector indices, int64_t num_samples); + /// \brief Constructor + /// \param[in] indices - A vector sequence of indices. + /// \param[in] num_samples - The number of samples to draw (default to all elements). + explicit SubsetRandomSampler(std::vector indices, int64_t num_samples = 0); - ~SubsetRandomSamplerObj() = default; + /// \brief Destructor. + ~SubsetRandomSampler() = default; - std::shared_ptr SamplerBuild() override; - - std::shared_ptr SamplerCopy() override { - auto sampler = std::make_shared(indices_, num_samples_); - for (auto child : children_) { - sampler->AddChildSampler(child); - } - return sampler; - } - - private: + protected: + /// \brief Function to convert a Sampler into an IR SamplerObj. + /// \return shared pointer to the newly created SamplerObj. + std::shared_ptr Parse() override; }; -class WeightedRandomSamplerObj : public SamplerObj { +/// \brief A class to represent a Weighted Random Sampler in the data pipeline. +/// \notes Samples the elements from [0, len(weights) - 1] randomly with the given +/// weights (probabilities). +class WeightedRandomSampler : public Sampler { + friend std::shared_ptr SelectSampler(int64_t, bool, int32_t, int32_t); + public: - explicit WeightedRandomSamplerObj(std::vector weights, int64_t num_samples = 0, bool replacement = true); + /// \brief Constructor + /// \param[in] weights - A vector sequence of weights, not necessarily summing up to 1. + /// \param[in] num_samples - The number of samples to draw (default to all elements). + /// \param[in] replacement - If true, put the sample ID back for the next draw. + explicit WeightedRandomSampler(std::vector weights, int64_t num_samples = 0, bool replacement = true); - virtual ~WeightedRandomSamplerObj() = default; + /// \brief Destructor. + ~WeightedRandomSampler() = default; - std::shared_ptr SamplerBuild() override; - - std::shared_ptr SamplerCopy() override { - auto sampler = std::make_shared(weights_, num_samples_, replacement_); - for (auto child : children_) { - sampler->AddChildSampler(child); - } - return sampler; - } - - Status ValidateParams() override; + protected: + /// \brief Function to convert a Sampler into an IR SamplerObj. + /// \return shared pointer to the newly created SamplerObj. + std::shared_ptr Parse() override; private: - const std::vector weights_; + std::vector weights_; int64_t num_samples_; bool replacement_; }; diff --git a/mindspore/ccsrc/minddata/dataset/liteapi/include/transforms.h b/mindspore/ccsrc/minddata/dataset/liteapi/include/transforms.h index da4a643dc02..ace7a969928 100644 --- a/mindspore/ccsrc/minddata/dataset/liteapi/include/transforms.h +++ b/mindspore/ccsrc/minddata/dataset/liteapi/include/transforms.h @@ -20,231 +20,178 @@ #include #include #include -#include "include/constants.h" +#include "include/api/dual_abi_helper.h" #include "include/api/status.h" +#include "include/constants.h" namespace mindspore { namespace dataset { -class TensorOp; +class TensorOperation; -// Char arrays storing name of corresponding classes (in alphabetical order) -constexpr char kComposeOperation[] = "Compose"; -constexpr char kDuplicateOperation[] = "Duplicate"; -constexpr char kOneHotOperation[] = "OneHot"; -constexpr char kPreBuiltOperation[] = "PreBuilt"; -constexpr char kRandomApplyOperation[] = "RandomApply"; -constexpr char kRandomChoiceOperation[] = "RandomChoice"; -constexpr char kRandomSelectSubpolicyOperation[] = "RandomSelectSubpolicy"; -constexpr char kTypeCastOperation[] = "TypeCast"; -constexpr char kUniqueOperation[] = "Unique"; - -// Abstract class to represent a dataset in the data pipeline. -class TensorOperation : public std::enable_shared_from_this { +// Abstract class to represent a tensor transform operation in the data pipeline. +/// \class TensorTransform transforms.h +/// \brief A base class to represent a tensor transform operation in the data pipeline. +class TensorTransform : public std::enable_shared_from_this { public: /// \brief Constructor - TensorOperation() : random_op_(false) {} - - /// \brief Constructor - explicit TensorOperation(bool random) : random_op_(random) {} + TensorTransform() {} /// \brief Destructor - ~TensorOperation() = default; + ~TensorTransform() = default; - /// \brief Pure virtual function to convert a TensorOperation class into a runtime TensorOp object. - /// \return shared pointer to the newly created TensorOp. - virtual std::shared_ptr Build() = 0; + /// \brief Pure virtual function to convert a TensorTransform class into a IR TensorOperation object. + /// \return shared pointer to the newly created TensorOperation. + virtual std::shared_ptr Parse() = 0; - virtual Status ValidateParams() = 0; - - virtual std::string Name() const = 0; - - /// \brief Check whether the operation is deterministic. - /// \return true if this op is a random op (returns non-deterministic result e.g. RandomCrop) - bool IsRandomOp() const { return random_op_; } - - protected: - bool random_op_; + /// \brief Virtual function to convert a TensorTransform class into a IR TensorOperation object. + /// \param[in] env A string to determine the running environment + /// \return shared pointer to the newly created TensorOperation. + virtual std::shared_ptr Parse(const MapTargetDevice &env) { return nullptr; } }; -// Helper function to validate fill value -Status ValidateVectorFillvalue(const std::string &transform_name, const std::vector &fill_value); - -// Helper function to validate probability -Status ValidateProbability(const std::string &transform_name, const float &probability); - -// Helper function to validate padding -Status ValidateVectorPadding(const std::string &transform_name, const std::vector &padding); - -// Helper function to validate size -Status ValidateVectorPositive(const std::string &transform_name, const std::vector &size); - -// Helper function to validate transforms -Status ValidateVectorTransforms(const std::string &transform_name, - const std::vector> &transforms); - -// Helper function to compare float value -bool CmpFloat(const float &a, const float &b, float epsilon = 0.0000000001f); - // Transform operations for performing data transformation. namespace transforms { -// Transform Op classes (in alphabetical order) -class ComposeOperation; -class DuplicateOperation; -class OneHotOperation; -class PreBuiltOperation; -class RandomApplyOperation; -class RandomChoiceOperation; -class TypeCastOperation; - -/// \brief Function to create a Compose TensorOperation. +/// \brief Compose Op. /// \notes Compose a list of transforms into a single transform. -/// \param[in] transforms A vector of transformations to be applied. -/// \return Shared pointer to the current TensorOperation. -std::shared_ptr Compose(const std::vector> &transforms); +class Compose : public TensorTransform { + public: + /// \brief Constructor. + /// \param[in] transforms A vector of transformations to be applied. + explicit Compose(const std::vector &transforms); + explicit Compose(const std::vector> &transforms); + explicit Compose(const std::vector> &transforms); -/// \brief Function to create a Duplicate TensorOperation. + /// \brief Destructor + ~Compose() = default; + + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; + + private: + struct Data; + std::shared_ptr data_; +}; + +/// \brief Duplicate Op. /// \notes Duplicate the input tensor to a new output tensor. /// The input tensor is carried over to the output list. -/// \return Shared pointer to the current TensorOperation. -std::shared_ptr Duplicate(); +class Duplicate : public TensorTransform { + public: + /// \brief Constructor. + Duplicate(); -/// \brief Function to create a OneHot TensorOperation. + /// \brief Destructor + ~Duplicate() = default; + + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; +}; + +/// \brief OneHot Op. /// \notes Convert the labels into OneHot format. -/// \param[in] num_classes number of classes. -/// \return Shared pointer to the current TensorOperation. -std::shared_ptr OneHot(int32_t num_classes); +class OneHot : public TensorTransform { + public: + /// \brief Constructor. + /// \param[in] num_classes number of classes. + explicit OneHot(int32_t num_classes); -/// \brief Function to create a RandomApply TensorOperation. + /// \brief Destructor + ~OneHot() = default; + + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; + + private: + struct Data; + std::shared_ptr data_; +}; + +/// \brief RandomApply Op. /// \notes Randomly perform a series of transforms with a given probability. -/// \param[in] transforms A vector of transformations to be applied. -/// \param[in] prob The probability to apply the transformation list (default=0.5) -/// \return Shared pointer to the current TensorOperation. -std::shared_ptr RandomApply(const std::vector> &transforms, - double prob = 0.5); +class RandomApply : public TensorTransform { + public: + /// \brief Constructor. + /// \param[in] transforms A vector of transformations to be applied. + /// \param[in] prob The probability to apply the transformation list (default=0.5) + explicit RandomApply(const std::vector &transforms, double prob = 0.5); + explicit RandomApply(const std::vector> &transforms, double prob = 0.5); + explicit RandomApply(const std::vector> &transforms, double prob = 0.5); -/// \brief Function to create a RandomChoice TensorOperation. + /// \brief Destructor + ~RandomApply() = default; + + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; + + private: + struct Data; + std::shared_ptr data_; +}; + +/// \brief RandomChoice Op. /// \notes Randomly selects one transform from a list of transforms to perform operation. -/// \param[in] transforms A vector of transformations to be chosen from to apply. -/// \return Shared pointer to the current TensorOperation. -std::shared_ptr RandomChoice(const std::vector> &transforms); +class RandomChoice : public TensorTransform { + public: + /// \brief Constructor. + /// \param[in] transforms A vector of transformations to be chosen from to apply. + explicit RandomChoice(const std::vector &transforms); + explicit RandomChoice(const std::vector> &transforms); + explicit RandomChoice(const std::vector> &transforms); -/// \brief Function to create a TypeCast TensorOperation. + /// \brief Destructor + ~RandomChoice() = default; + + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; + + private: + struct Data; + std::shared_ptr data_; +}; + +/// \brief TypeCast Op. /// \notes Tensor operation to cast to a given MindSpore data type. -/// \param[in] data_type mindspore.dtype to be cast to. -/// \return Shared pointer to the current TensorOperation. -std::shared_ptr TypeCast(std::string data_type); - -/* ####################################### Derived TensorOperation classes ################################# */ - -class ComposeOperation : public TensorOperation { +class TypeCast : public TensorTransform { public: - explicit ComposeOperation(const std::vector> &transforms); + /// \brief Constructor. + /// \param[in] data_type mindspore.dtype to be cast to. + explicit TypeCast(std::string data_type) : TypeCast(StringToChar(data_type)) {} - ~ComposeOperation() = default; + explicit TypeCast(const std::vector &data_type); - std::shared_ptr Build() override; + /// \brief Destructor + ~TypeCast() = default; - Status ValidateParams() override; - - std::string Name() const override { return kComposeOperation; } + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; private: - std::vector> transforms_; + struct Data; + std::shared_ptr data_; }; -class DuplicateOperation : public TensorOperation { +/// \brief Unique Op. +/// \notes Return an output tensor containing all the unique elements of the input tensor in +/// the same order that they occur in the input tensor. +class Unique : public TensorTransform { public: - DuplicateOperation() = default; + /// \brief Constructor. + Unique(); - ~DuplicateOperation() = default; + /// \brief Destructor + ~Unique() = default; - std::shared_ptr Build() override; - - Status ValidateParams() override; - - std::string Name() const override { return kDuplicateOperation; } -}; - -class OneHotOperation : public TensorOperation { - public: - explicit OneHotOperation(int32_t num_classes_); - - ~OneHotOperation() = default; - - std::shared_ptr Build() override; - - Status ValidateParams() override; - - std::string Name() const override { return kOneHotOperation; } - - private: - float num_classes_; -}; - -class PreBuiltOperation : public TensorOperation { - public: - explicit PreBuiltOperation(std::shared_ptr tensor_op); - - ~PreBuiltOperation() = default; - - std::shared_ptr Build() override; - - Status ValidateParams() override; - - std::string Name() const override { return kPreBuiltOperation; } - - private: - std::shared_ptr op_; -}; - -class RandomApplyOperation : public TensorOperation { - public: - explicit RandomApplyOperation(const std::vector> &transforms, double prob); - - ~RandomApplyOperation() = default; - - std::shared_ptr Build() override; - - Status ValidateParams() override; - - std::string Name() const override { return kRandomApplyOperation; } - - private: - std::vector> transforms_; - double prob_; -}; - -class RandomChoiceOperation : public TensorOperation { - public: - explicit RandomChoiceOperation(const std::vector> &transforms); - - ~RandomChoiceOperation() = default; - - std::shared_ptr Build() override; - - Status ValidateParams() override; - - std::string Name() const override { return kRandomChoiceOperation; } - - private: - std::vector> transforms_; -}; -class TypeCastOperation : public TensorOperation { - public: - explicit TypeCastOperation(std::string data_type); - - ~TypeCastOperation() = default; - - std::shared_ptr Build() override; - - Status ValidateParams() override; - - std::string Name() const override { return kTypeCastOperation; } - - private: - std::string data_type_; + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; }; } // namespace transforms } // namespace dataset diff --git a/mindspore/ccsrc/minddata/dataset/liteapi/include/vision_lite.h b/mindspore/ccsrc/minddata/dataset/liteapi/include/vision_lite.h index 39609947292..34c645dd94a 100644 --- a/mindspore/ccsrc/minddata/dataset/liteapi/include/vision_lite.h +++ b/mindspore/ccsrc/minddata/dataset/liteapi/include/vision_lite.h @@ -22,7 +22,7 @@ #include #include #include - +#include "include/constants.h" #include "include/transforms.h" namespace mindspore { @@ -31,167 +31,210 @@ namespace dataset { // Transform operations for performing computer vision. namespace vision { -// Char arrays storing name of corresponding classes (in alphabetical order) -constexpr char kCenterCropOperation[] = "CenterCrop"; -constexpr char kCropOperation[] = "Crop"; -constexpr char kDecodeOperation[] = "Decode"; -constexpr char kNormalizeOperation[] = "Normalize"; -constexpr char kResizeOperation[] = "Resize"; -constexpr char kRotateOperation[] = "Rotate"; -// Transform Op classes (in alphabetical order) -class CenterCropOperation; -class CropOperation; -class DecodeOperation; -class NormalizeOperation; -class ResizeOperation; +// Forward Declarations class RotateOperation; -/// \brief Function to create a CenterCrop TensorOperation. +/// \brief Affine TensorTransform. +/// \notes Apply affine transform on input image. +class Affine : public TensorTransform { + public: + /// \brief Constructor. + /// \param[in] degrees The degrees to rotate the image by + /// \param[in] translation The value representing vertical and horizontal translation (default = {0.0, 0.0}) + /// The first value represent the x axis translation while the second represents y axis translation. + /// \param[in] scale The scaling factor for the image (default = 0.0) + /// \param[in] shear A float vector of size 2, representing the shear degrees (default = {0.0, 0.0}) + /// \param[in] interpolation An enum for the mode of interpolation + /// \param[in] fill_value A vector representing the value to fill the area outside the transform + /// in the output image. If 1 value is provided, it is used for all RGB channels. + /// If 3 values are provided, it is used to fill R, G, B channels respectively. + explicit Affine(float_t degrees, const std::vector &translation = {0.0, 0.0}, float scale = 0.0, + const std::vector &shear = {0.0, 0.0}, + InterpolationMode interpolation = InterpolationMode::kNearestNeighbour, + const std::vector &fill_value = {0, 0, 0}); + + /// \brief Destructor. + ~Affine() = default; + + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; + + private: + struct Data; + std::shared_ptr data_; +}; +/// \brief CenterCrop TensorTransform. /// \notes Crops the input image at the center to the given size. -/// \param[in] size A vector representing the output size of the cropped image. -/// If size is a single value, a square crop of size (size, size) is returned. -/// If size has 2 values, it should be (height, width). -/// \return Shared pointer to the current TensorOperation. -std::shared_ptr CenterCrop(std::vector size); +class CenterCrop : public TensorTransform { + public: + /// \brief Constructor. + /// \param[in] size A vector representing the output size of the cropped image. + /// If size is a single value, a square crop of size (size, size) is returned. + /// If size has 2 values, it should be (height, width). + explicit CenterCrop(std::vector size); -/// \brief Function to create a Crop TensorOp + /// \brief Destructor. + ~CenterCrop() = default; + + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; + + std::shared_ptr Parse(const MapTargetDevice &env) override; + + private: + struct Data; + std::shared_ptr data_; +}; + +/// \brief Crop TensorTransform. /// \notes Crop an image based on location and crop size -/// \param[in] coordinates Starting location of crop. Must be a vector of two values, in the form of {x_coor, y_coor} -/// \param[in] size Size of the cropped area. -/// If size is a single value, a square crop of size (size, size) is returned. -/// If size has 2 values, it should be (height, width). -/// \return Shared pointer to the current TensorOp -std::shared_ptr Crop(std::vector coordinates, std::vector size); +class Crop : public TensorTransform { + public: + /// \brief Constructor. + /// \param[in] coordinates Starting location of crop. Must be a vector of two values, in the form of {x_coor, y_coor} + /// \param[in] size Size of the cropped area. + /// If size is a single value, a square crop of size (size, size) is returned. + /// If size has 2 values, it should be (height, width). + Crop(std::vector coordinates, std::vector size); -/// \brief Function to create a Decode TensorOperation. + /// \brief Destructor. + ~Crop() = default; + + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; + + private: + struct Data; + std::shared_ptr data_; +}; + +/// \brief Decode TensorTransform. /// \notes Decode the input image in RGB mode. -/// \param[in] rgb A boolean of whether to decode in RGB mode or not. -/// \return Shared pointer to the current TensorOperation. -std::shared_ptr Decode(bool rgb = true); +class Decode : public TensorTransform { + public: + /// \brief Constructor. + /// \param[in] rgb A boolean of whether to decode in RGB mode or not. + explicit Decode(bool rgb = true); -/// \brief Function to create a Normalize TensorOperation. + /// \brief Destructor. + ~Decode() = default; + + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; + + std::shared_ptr Parse(const MapTargetDevice &env) override; + + private: + struct Data; + std::shared_ptr data_; +}; + +/// \brief Normalize TensorTransform. /// \notes Normalize the input image with respect to mean and standard deviation. -/// \param[in] mean A vector of mean values for each channel, w.r.t channel order. -/// The mean values must be in range [0.0, 255.0]. -/// \param[in] std A vector of standard deviations for each channel, w.r.t. channel order. -/// The standard deviation values must be in range (0.0, 255.0] -/// \return Shared pointer to the current TensorOperation. -std::shared_ptr Normalize(std::vector mean, std::vector std); +class Normalize : public TensorTransform { + public: + /// \brief Constructor. + /// \param[in] mean A vector of mean values for each channel, w.r.t channel order. + /// The mean values must be in range [0.0, 255.0]. + /// \param[in] std A vector of standard deviations for each channel, w.r.t. channel order. + /// The standard deviation values must be in range (0.0, 255.0] + Normalize(std::vector mean, std::vector std); -/// \brief Function to create a Resize TensorOperation. + /// \brief Destructor. + ~Normalize() = default; + + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; + + private: + struct Data; + std::shared_ptr data_; +}; + +class RandomAffine : public TensorTransform { + public: + /// \brief Constructor. + /// \param[in] degrees A float vector of size 2, representing the starting and ending degree + /// \param[in] translate_range A float vector of size 2 or 4, representing percentages of translation on x and y axes. + /// if size is 2, (min_dx, max_dx, 0, 0) + /// if size is 4, (min_dx, max_dx, min_dy, max_dy) + /// all values are in range [-1, 1] + /// \param[in] scale_range A float vector of size 2, representing the starting and ending scales in the range. + /// \param[in] shear_ranges A float vector of size 2 or 4, representing the starting and ending shear degrees + /// vertically and horizontally. + /// if size is 2, (min_shear_x, max_shear_x, 0, 0) + /// if size is 4, (min_shear_x, max_shear_x, min_shear_y, max_shear_y) + /// \param[in] interpolation An enum for the mode of interpolation + /// \param[in] fill_value A vector representing the value to fill the area outside the transform + /// in the output image. If 1 value is provided, it is used for all RGB channels. + /// If 3 values are provided, it is used to fill R, G, B channels respectively. + explicit RandomAffine(const std::vector °rees, + const std::vector &translate_range = {0.0, 0.0, 0.0, 0.0}, + const std::vector &scale_range = {1.0, 1.0}, + const std::vector &shear_ranges = {0.0, 0.0, 0.0, 0.0}, + InterpolationMode interpolation = InterpolationMode::kNearestNeighbour, + const std::vector &fill_value = {0, 0, 0}); + + /// \brief Destructor. + ~RandomAffine() = default; + + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; + + private: + struct Data; + std::shared_ptr data_; +}; + +/// \brief Resize TensorTransform. /// \notes Resize the input image to the given size. -/// \param[in] size A vector representing the output size of the resized image. -/// If size is a single value, the image will be resized to this value with -/// the same image aspect ratio. If size has 2 values, it should be (height, width). -/// \param[in] interpolation An enum for the mode of interpolation -/// \return Shared pointer to the current TensorOperation. -std::shared_ptr Resize(std::vector size, - InterpolationMode interpolation = InterpolationMode::kLinear); -/// \brief Applies an rotate transformation to an image. +class Resize : public TensorTransform { + public: + /// \brief Constructor. + /// \param[in] size A vector representing the output size of the resized image. + /// If size is a single value, the image will be resized to this value with + /// the same image aspect ratio. If size has 2 values, it should be (height, width). + /// \param[in] interpolation An enum for the mode of interpolation + explicit Resize(std::vector size, InterpolationMode interpolation = InterpolationMode::kLinear); + + /// \brief Destructor. + ~Resize() = default; + + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; + + std::shared_ptr Parse(const MapTargetDevice &env) override; + + private: + struct Data; + std::shared_ptr data_; +}; + +/// \brief Rotate TensorTransform. /// \notes Rotate the input image using a specified angle id. -/// \return Shared pointer to the current TensorOperation. -std::shared_ptr Rotate(); - -class CenterCropOperation : public TensorOperation { +class Rotate : public TensorTransform { public: - explicit CenterCropOperation(std::vector size); + /// \brief Constructor. + Rotate(); - ~CenterCropOperation() = default; + /// \brief Destructor. + ~Rotate() = default; - std::shared_ptr Build() override; - - Status ValidateParams() override; - - std::string Name() const override { return kCenterCropOperation; } + /// \brief Function to convert TensorTransform object into a TensorOperation object. + /// \return Shared pointer to TensorOperation object. + std::shared_ptr Parse() override; private: - std::vector size_; + std::shared_ptr op_; }; -class CropOperation : public TensorOperation { - public: - CropOperation(std::vector coordinates, std::vector size); - - ~CropOperation() = default; - - std::shared_ptr Build() override; - - Status ValidateParams() override; - - std::string Name() const override { return kCropOperation; } - - private: - std::vector coordinates_; - std::vector size_; -}; -class DecodeOperation : public TensorOperation { - public: - explicit DecodeOperation(bool rgb = true); - - ~DecodeOperation() = default; - - std::shared_ptr Build() override; - - Status ValidateParams() override; - - std::string Name() const override { return kDecodeOperation; } - - private: - bool rgb_; -}; - -class NormalizeOperation : public TensorOperation { - public: - NormalizeOperation(std::vector mean, std::vector std); - - ~NormalizeOperation() = default; - - std::shared_ptr Build() override; - - Status ValidateParams() override; - - std::string Name() const override { return kNormalizeOperation; } - - private: - std::vector mean_; - std::vector std_; -}; - -class ResizeOperation : public TensorOperation { - public: - explicit ResizeOperation(std::vector size, - InterpolationMode interpolation_mode = InterpolationMode::kLinear); - - ~ResizeOperation() = default; - - std::shared_ptr Build() override; - - Status ValidateParams() override; - - std::string Name() const override { return kResizeOperation; } - - private: - std::vector size_; - InterpolationMode interpolation_; -}; - -class RotateOperation : public TensorOperation { - public: - RotateOperation(); - - ~RotateOperation() = default; - - std::shared_ptr Build() override; - - Status ValidateParams() override; - - std::string Name() const override { return kRotateOperation; } - - void setAngle(uint64_t angle_id); - - private: - std::shared_ptr rotate_op; -}; } // namespace vision } // namespace dataset } // namespace mindspore