!24689 Add examples for MindData C++ API - stage 1

Merge pull request !24689 from luoyang/code_docs_cppdoc
This commit is contained in:
i-robot 2021-10-11 11:52:59 +00:00 committed by Gitee
commit f81ca1f020
5 changed files with 592 additions and 1 deletions

View File

@ -134,11 +134,24 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
/// \brief Function to set runtime number of workers.
/// \param[in] num_workers The number of threads in this operator.
/// \return Shared pointer to the original object.
/// \par Example
/// \code
/// /* Set number of workers(threads) to process the dataset in parallel */
/// std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true);
/// ds = ds->SetNumWorkers(16);
/// \endcode
std::shared_ptr<Dataset> SetNumWorkers(int32_t num_workers);
/// \brief A Function to create an PullBasedIterator over the Dataset.
/// \param[in] columns List of columns to be used to specify the order of columns.
/// \return Shared pointer to the Iterator.
/// \par Example
/// \code
/// /* dataset is an instance of Dataset object */
/// std::shared_ptr<Iterator> = dataset->CreatePullBasedIterator();
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
/// \endcode
std::shared_ptr<PullIterator> CreatePullBasedIterator(std::vector<std::vector<char>> columns = {});
/// \brief Function to create an Iterator over the Dataset pipeline.
@ -146,6 +159,13 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
/// \param[in] num_epochs Number of epochs to run through the pipeline (default=-1, which means infinite epochs).
/// An empty row is returned at the end of each epoch.
/// \return Shared pointer to the Iterator.
/// \par Example
/// \code
/// /* dataset is an instance of Dataset object */
/// std::shared_ptr<Iterator> = dataset->CreateIterator();
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
/// \endcode
std::shared_ptr<Iterator> CreateIterator(std::vector<std::string> columns = {}, int32_t num_epochs = -1) {
return CreateIteratorCharIF(VectorStringToChar(columns), num_epochs);
}
@ -181,6 +201,14 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
/// \param[in] num_files Number of dataset files (default=1).
/// \param[in] dataset_type Dataset format (default="mindrecord").
/// \return Returns true if no error encountered else false.
/// \par Example
/// \code
/// /* Create a dataset and save its data into MindRecord */
/// std::string folder_path = "/path/to/cifar_dataset";
/// std::shared_ptr<Dataset> ds = Cifar10(folder_path, "all", std::make_shared<SequentialSampler>(0, 10));
/// std::string save_file = "Cifar10Data.mindrecord";
/// bool rc = ds->Save(save_file);
/// \endcode
bool Save(std::string dataset_path, int32_t num_files = 1, std::string dataset_type = "mindrecord") {
return SaveCharIF(StringToChar(dataset_path), num_files, StringToChar(dataset_type));
}
@ -193,6 +221,12 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
/// available to make the last batch, then those rows will
/// be dropped and not propagated to the next node.
/// \return Shared pointer to the current Dataset.
/// \par Example
/// \code
/// /* Create a dataset where every 100 rows is combined into a batch */
/// std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true);
/// ds = ds->Batch(100, true);
/// \endcode
std::shared_ptr<BatchDataset> Batch(int32_t batch_size, bool drop_remainder = false);
/// \brief Function to create a BucketBatchByLengthDataset.
@ -221,6 +255,12 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
/// \param[in] drop_remainder If true, will drop the last batch for each bucket if it is not a full batch
/// (default=false).
/// \return Shared pointer to the current Dataset.
/// \par Example
/// \code
/// /* Bucket elements according to their lengths */
/// std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
/// ds = ds->BucketBatchByLength({"image"}, {1, 2, 3}, {4, 5, 6, 7});
/// \endcode
std::shared_ptr<BucketBatchByLengthDataset> BucketBatchByLength(
const std::vector<std::string> &column_names, const std::vector<int32_t> &bucket_boundaries,
const std::vector<int32_t> &bucket_batch_sizes,
@ -243,6 +283,14 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
/// The input sentence must be pretokenized when using word type.
/// \param[in] params A vector contains more option parameters of sentencepiece library.
/// \return Shared pointer to the SentencePieceVocab.
/// \par Example
/// \code
/// /* Build a SentencePieceVocab from TextFile dataset */
/// std::string vocab_file = "/path/to/txtfile";
/// std::shared_ptr<Dataset> ds_vocab = TextFile({vocab_file}, 0, ShuffleMode::kFalse);
/// std::shared_ptr<SentencePieceVocab> vocab =
/// ds_vocab->BuildSentencePieceVocab({}, 5000, 0.9995, SentencePieceModel::kUnigram, {});
/// \endcode
std::shared_ptr<SentencePieceVocab> BuildSentencePieceVocab(
const std::vector<std::string> &col_names, int32_t vocab_size, float character_coverage,
SentencePieceModel model_type, const std::unordered_map<std::string, std::string> &params) {
@ -263,6 +311,13 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
/// \param[in] special_first Whether special_tokens will be prepended/appended to vocab, If special_tokens
/// is specified and special_first is set to default, special_tokens will be prepended.
/// \return Shared pointer to the Vocab.
/// \par Example
/// \code
/// /* Build a Vocab from TextFile dataset */
/// std::string vocab_file = "/path/to/txtfile";
/// std::shared_ptr<Dataset> ds = TextFile({data_file}, 0, ShuffleMode::kFalse);
/// std::shared_ptr<Vocab> vocab = ds->BuildVocab();
/// \endcode
std::shared_ptr<Vocab> BuildVocab(const std::vector<std::string> &columns = {},
const std::pair<int64_t, int64_t> &freq_range = {0, kDeMaxFreq},
int64_t top_k = kDeMaxTopk, const std::vector<std::string> &special_tokens = {},
@ -275,6 +330,13 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
/// \note Concat the datasets in the input.
/// \param[in] datasets List of shared pointers to the dataset that should be concatenated together.
/// \return Shared pointer to the current Dataset.
/// \par Example
/// \code
/// /* Create a dataset by concatenating dataset_1 and dataset_2 with "+" operator */
/// std::shared_ptr<Dataset> dataset = dataset_1 + dataset_2;
/// /* Create a dataset by concatenating dataset_1 and dataset_2 with concat operation */
/// std::shared_ptr<Dataset> dataset = dataset_1->Concat({dataset_2});
/// \endcode
std::shared_ptr<ConcatDataset> Concat(const std::vector<std::shared_ptr<Dataset>> &datasets) {
std::vector<std::shared_ptr<Dataset>> all_datasets{shared_from_this()};
all_datasets.insert(std::end(all_datasets), std::begin(datasets), std::end(datasets));
@ -286,6 +348,28 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
/// \param[in] predicate Function callable which returns a boolean value. If false then filter the element.
/// \param[in] input_columns List of names of the input columns to filter.
/// \return Shared pointer to the current Dataset.
/// \par Example
/// \code
/// /* Define a predicate function */
/// MSTensorVec Predicate1(MSTensorVec in) {
/// // Return true if input is equal to 3
/// uint64_t input_value;
/// TensorRow input = VecToRow(in);
/// (void)input.at(0)->GetItemAt(&input_value, {0});
/// bool result = (input_value == 3);
/// // Convert from boolean to TensorRow
/// TensorRow output;
/// std::shared_ptr<Tensor> out;
/// (void)Tensor::CreateEmpty(mindspore::dataset::TensorShape({}),
/// mindspore::dataset::DataType(mindspore::dataset::DataType::Type::DE_BOOL), &out);
/// (void)out->SetItemAt({}, result);
/// output.push_back(out);
/// return RowToVec(output);
/// }
///
/// /* Apply predicate function for datase */
/// std::shared_ptr<Dataset> ds = ds->Filter(Predicate1, {"label"});
/// \endcode
std::shared_ptr<FilterDataset> Filter(std::function<MSTensorVec(MSTensorVec)> predicate,
const std::vector<std::string> &input_columns = {}) {
return std::make_shared<FilterDataset>(shared_from_this(), predicate, VectorStringToChar(input_columns));
@ -308,6 +392,40 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
/// \param[in] callbacks List of Dataset callbacks to be called.
/// \return Shared pointer to the current Dataset.
/// \par Example
/// \code
/// // Create objects for the tensor ops
/// std::shared_ptr<TensorTransform> decode_op = std::make_shared<vision::Decode>(true);
/// std::shared_ptr<TensorTransform> random_color_op = std::make_shared<vision::RandomColor>(0.0, 0.0);
///
/// /* 1) Simple map example */
/// // Apply decode_op on column "image". This column will be replaced by the outputted
/// // column of decode_op. Since column_order is not provided, both columns "image"
/// // and "label" will be propagated to the child node in their original order.
/// dataset = dataset->Map({decode_op}, {"image"});
///
/// // Decode and rename column "image" to "decoded_image".
/// dataset = dataset->Map({decode_op}, {"image"}, {"decoded_image"});
///
/// // Specify the order of the output columns.
/// dataset = dataset->Map({decode_op}, {"image"}, {}, {"label", "image"});
///
/// // Rename column "image" to "decoded_image" and also specify the order of the output columns.
/// dataset = dataset->Map({decode_op}, {"image"}, {"decoded_image"}, {"label", "decoded_image"});
///
/// // Rename column "image" to "decoded_image" and keep only this column.
/// dataset = dataset->Map({decode_op}, {"image"}, {"decoded_image"}, {"decoded_image"});
///
/// /* 2) Map example with more than one operation */
/// // Create a dataset where the images are decoded, then randomly color jittered.
/// // decode_op takes column "image" as input and outputs one column. The column
/// // outputted by decode_op is passed as input to random_jitter_op.
/// // random_jitter_op will output one column. Column "image" will be replaced by
/// // the column outputted by random_jitter_op (the very last operation). All other
/// // columns are unchanged. Since column_order is not specified, the order of the
/// // columns will remain the same.
/// dataset = dataset->Map({decode_op, random_jitter_op}, {"image"})
/// \endcode
std::shared_ptr<MapDataset> Map(std::vector<TensorTransform *> operations,
const std::vector<std::string> &input_columns = {},
const std::vector<std::string> &output_columns = {},
@ -391,6 +509,12 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
/// \note Applies project to the dataset.
/// \param[in] columns The name of columns to project.
/// \return Shared pointer to the current Dataset.
/// \par Example
/// \code
/// /* Reorder the original column names in dataset */
/// std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
/// ds = ds->Project({"label", "image"});
/// \endcode
std::shared_ptr<ProjectDataset> Project(const std::vector<std::string> &columns) {
return std::make_shared<ProjectDataset>(shared_from_this(), VectorStringToChar(columns));
}
@ -400,6 +524,12 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
/// \param[in] input_columns List of the input columns to rename.
/// \param[in] output_columns List of the output columns.
/// \return Shared pointer to the current Dataset.
/// \par Example
/// \code
/// /* Rename the original column names in dataset */
/// std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
/// ds = ds->Rename({"image", "label"}, {"image_output", "label_output"});
/// \endcode
std::shared_ptr<RenameDataset> Rename(const std::vector<std::string> &input_columns,
const std::vector<std::string> &output_columns) {
return std::make_shared<RenameDataset>(shared_from_this(), VectorStringToChar(input_columns),
@ -409,6 +539,12 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
/// \note Repeats this dataset count times. Repeat indefinitely if count is -1.
/// \param[in] count Number of times the dataset should be repeated.
/// \return Shared pointer to the current Dataset.
/// \par Example
/// \code
/// /* Create a dataset where the dataset is repeated for 50 epochs */
/// std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
/// ds = ds->Repeat(50);
/// \endcode
std::shared_ptr<RepeatDataset> Repeat(int32_t count = -1) {
return std::make_shared<RepeatDataset>(shared_from_this(), count);
}
@ -416,6 +552,12 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
/// \note Randomly shuffles the rows of this dataset.
/// \param[in] buffer_size The size of the buffer (must be larger than 1) for shuffling
/// \return Shared pointer to the current Dataset.
/// \par Example
/// \code
/// /* Create a shuffled dataset using a shuffle buffer of size 4 */
/// std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
/// ds = ds->Shuffle(4);
/// \endcode
std::shared_ptr<ShuffleDataset> Shuffle(int32_t buffer_size) {
return std::make_shared<ShuffleDataset>(shared_from_this(), buffer_size);
}
@ -424,12 +566,24 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
/// \note Skips count elements in this dataset.
/// \param[in] count Number of elements the dataset to be skipped.
/// \return Shared pointer to the current Dataset.
/// \par Example
/// \code
/// /* Create a dataset which skips first 3 elements from data */
/// std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
/// ds = ds->Skip(3);
/// \endcode
std::shared_ptr<SkipDataset> Skip(int32_t count) { return std::make_shared<SkipDataset>(shared_from_this(), count); }
/// \brief Function to create a TakeDataset.
/// \note Takes count elements in this dataset.
/// \param[in] count Number of elements the dataset to be taken.
/// \return Shared pointer to the current Dataset.
/// \par Example
/// \code
/// /* Create a dataset where the dataset includes 50 elements. */
/// std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
/// ds = ds->Take(50);
/// \endcode
std::shared_ptr<TakeDataset> Take(int32_t count = -1) {
return std::make_shared<TakeDataset>(shared_from_this(), count);
}
@ -438,6 +592,13 @@ class Dataset : public std::enable_shared_from_this<Dataset> {
/// \note Applies zip to the dataset.
/// \param[in] datasets A list of shared pointers to the datasets that we want to zip.
/// \return Shared pointer to the current Dataset.
/// \par Example
/// \code
/// /* Create a dataset which is the combination of dataset and dataset_1 */
/// std::shared_ptr<Dataset> ds1 = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
/// std::shared_ptr<Dataset> ds2 = Cifar10(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
/// std::shared_ptr<Dataset> ds = ds->Zip({ds1, ds2});
/// \endcode
std::shared_ptr<ZipDataset> Zip(const std::vector<std::shared_ptr<Dataset>> &datasets) {
std::vector<std::shared_ptr<Dataset>> all_datasets = datasets;
all_datasets.push_back(shared_from_this());
@ -880,6 +1041,22 @@ class AlbumDataset : public Dataset {
/// given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()).
/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
/// \return Shared pointer to the AlbumDataset.
/// \par Example
/// \code
/// /* Define dataset path and MindData object */
/// std::string folder_path = "/path/to/album_dataset_directory";
/// std::string schema_file = "/path/to/album_schema_file";
/// std::vector<std::string> column_names = {"image", "label", "id"};
/// std::shared_ptr<Dataset> ds = Album(folder_path, schema_file, column_names);
///
/// /* Create iterator to read dataset */
/// std::shared_ptr<Iterator> iter = ds->CreateIterator();
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
///
/// /* Note: As we defined before, each data dictionary owns keys "image", "label" and "id" */
/// auto image = row["image"];
/// \endcode
inline std::shared_ptr<AlbumDataset> Album(const std::string &dataset_dir, const std::string &data_schema,
const std::vector<std::string> &column_names = {}, bool decode = false,
const std::shared_ptr<Sampler> &sampler = std::make_shared<RandomSampler>(),
@ -972,6 +1149,20 @@ class CelebADataset : public Dataset {
/// \param[in] extensions Set of file extensions to be included in the dataset (default={}).
/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
/// \return Shared pointer to the CelebADataset.
/// \par Example
/// \code
/// /* Define dataset path and MindData object */
/// std::string folder_path = "/path/to/celeba_dataset_directory";
/// std::shared_ptr<Dataset> ds = CelebA(folder_path, "all", std::make_shared<SequentialSampler>(0, 5));
///
/// /* Create iterator to read dataset */
/// std::shared_ptr<Iterator> iter = ds->CreateIterator();
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
///
/// /* Note: In CelebA dataset, each data dictionary owns keys "image" and "attr" */
/// auto image = row["image"];
/// \endcode
inline std::shared_ptr<CelebADataset> CelebA(
const std::string &dataset_dir, const std::string &usage = "all",
const std::shared_ptr<Sampler> &sampler = std::make_shared<RandomSampler>(), bool decode = false,
@ -1057,6 +1248,20 @@ class Cifar10Dataset : public Dataset {
/// given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()).
/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
/// \return Shared pointer to the Cifar10Dataset.
/// \par Example
/// \code
/// /* Define dataset path and MindData object */
/// std::string folder_path = "/path/to/cifar10_dataset_directory";
/// std::shared_ptr<Dataset> ds = Cifar10(folder_path, "all", std::make_shared<RandomSampler>(false, 10));
///
/// /* Create iterator to read dataset */
/// std::shared_ptr<Iterator> iter = ds->CreateIterator();
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
///
/// /* Note: In CIFAR10 dataset, each data dictionary owns keys "image" and "label" */
/// auto image = row["image"];
/// \endcode
inline std::shared_ptr<Cifar10Dataset> Cifar10(
const std::string &dataset_dir, const std::string &usage = "all",
const std::shared_ptr<Sampler> &sampler = std::make_shared<RandomSampler>(),
@ -1131,6 +1336,20 @@ class Cifar100Dataset : public Dataset {
/// given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()).
/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
/// \return Shared pointer to the Cifar100Dataset.
/// \par Example
/// \code
/// /* Define dataset path and MindData object */
/// std::string folder_path = "/path/to/cifar100_dataset_directory";
/// std::shared_ptr<Dataset> ds = Cifar100(folder_path, "all", std::make_shared<RandomSampler>());
///
/// /* Create iterator to read dataset */
/// std::shared_ptr<Iterator> iter = ds->CreateIterator();
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
///
/// /* Note: In CIFAR100 dataset, each dictionary has 3 keys: "image", "fine_label" and "coarse_label" */
/// auto image = row["image"];
/// \endcode
inline std::shared_ptr<Cifar100Dataset> Cifar100(
const std::string &dataset_dir, const std::string &usage = "all",
const std::shared_ptr<Sampler> &sampler = std::make_shared<RandomSampler>(),
@ -1232,6 +1451,20 @@ class CityscapesDataset : public Dataset {
/// given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()).
/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
/// \return Shared pointer to the current CityscapesDataset.
/// \par Example
/// \code
/// /* Define dataset path and MindData object */
/// std::string folder_path = "/path/to/cityscapes_dataset_directory";
/// std::shared_ptr<Dataset> ds = Cityscapes(dataset_path, "train", "fine", "color");
///
/// /* Create iterator to read dataset */
/// std::shared_ptr<Iterator> iter = ds->CreateIterator();
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
///
/// /* Note: In Cityscapes dataset, each data dictionary owns keys "image" and "task" */
/// auto task = row["task"];
/// \endcode
inline std::shared_ptr<CityscapesDataset> Cityscapes(
const std::string &dataset_dir, const std::string &usage, const std::string &quality_mode, const std::string &task,
bool decode = false, const std::shared_ptr<Sampler> &sampler = std::make_shared<RandomSampler>(),
@ -1328,6 +1561,19 @@ class CLUEDataset : public Dataset {
/// specified only when num_shards is also specified (Default = 0).
/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
/// \return Shared pointer to the CLUEDataset.
/// \par Example
/// \code
/// /* Define dataset path and MindData object */
/// std::string train_file = "/path/to/clue_dataset_file";
/// std::shared_ptr<Dataset> ds = CLUE({train_file}, "AFQMC", "train", 0, ShuffleMode::kFalse);
///
/// /* Create iterator to read dataset */
/// std::shared_ptr<Iterator> iter = ds->CreateIterator();
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
///
/// auto text = row["sentence1"];
/// \endcode
inline std::shared_ptr<CLUEDataset> CLUE(const std::vector<std::string> &dataset_files,
const std::string &task = "AFQMC", const std::string &usage = "train",
int64_t num_samples = 0, ShuffleMode shuffle = ShuffleMode::kGlobal,
@ -1400,6 +1646,21 @@ class CocoDataset : public Dataset {
/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
/// \param[in] extra_metadata Flag to add extra meta-data to row. (default=false).
/// \return Shared pointer to the CocoDataset.
/// \par Example
/// \code
/// /* Define dataset path and MindData object */
/// std::string folder_path = "/path/to/coco_dataset_directory";
/// std::string annotation_file = "/path/to/annotation_file";
/// std::shared_ptr<Dataset> ds = Coco(folder_path, annotation_file);
///
/// /* Create iterator to read dataset */
/// std::shared_ptr<Iterator> iter = ds->CreateIterator();
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
///
/// /* Note: In COCO dataset, each dictionary has keys "image" and "annotation" */
/// auto image = row["image"];
/// \endcode
inline std::shared_ptr<CocoDataset> Coco(const std::string &dataset_dir, const std::string &annotation_file,
const std::string &task = "Detection", const bool &decode = false,
const std::shared_ptr<Sampler> &sampler = std::make_shared<RandomSampler>(),
@ -1513,6 +1774,21 @@ class CSVDataset : public Dataset {
/// specified only when num_shards is also specified (Default = 0).
/// \param[in] cache Tensor cache to use.(default=nullptr which means no cache is used).
/// \return Shared pointer to the CSVDataset
/// \par Example
/// \code
/// /* Define dataset path and MindData object */
/// std::string train_file = "/path/to/csv_file";
/// std::vector<std::string> column_names = {"col1", "col2", "col3", "col4"};
/// std::shared_ptr<Dataset> ds = CSV({train_file}, ',', {}, column_names, 0, ShuffleMode::kFalse);
///
/// /* Create iterator to read dataset */
/// std::shared_ptr<Iterator> iter = ds->CreateIterator();
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
///
/// /* Note: As we defined before, the dataset has column "col1", "col2", "col3" and "col4" */
/// auto col1 = row["col1"];
/// \endcode
inline std::shared_ptr<CSVDataset> CSV(const std::vector<std::string> &dataset_files, char field_delim = ',',
const std::vector<std::shared_ptr<CsvBase>> &column_defaults = {},
const std::vector<std::string> &column_names = {}, int64_t num_samples = 0,
@ -1582,6 +1858,20 @@ class DIV2KDataset : public Dataset {
/// given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()).
/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
/// \return Shared pointer to the current DIV2KDataset.
/// \par Example
/// \code
/// /* Define dataset path and MindData object */
/// std::string dataset_path = "/path/to/div2k_dataset_directory";
/// std::shared_ptr<Dataset> ds = DIV2K(dataset_path, "train", "bicubic", 2);
///
/// /* Create iterator to read dataset */
/// std::shared_ptr<Iterator> iter = ds->CreateIterator();
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
///
/// /* Note: In DIV2K dataset, each dictionary has keys "hr_image" and "lr_image" */
/// auto hr_image = row["hr_image"];
/// \endcode
inline std::shared_ptr<DIV2KDataset> DIV2K(const std::string &dataset_dir, const std::string &usage,
const std::string &downgrade, int32_t scale, bool decode = false,
const std::shared_ptr<Sampler> &sampler = std::make_shared<RandomSampler>(),
@ -1677,6 +1967,20 @@ class EMnistDataset : public Dataset {
/// given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()).
/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
/// \return Shared pointer to the current EMnistDataset.
/// \par Example
/// \code
/// /* Define dataset path and MindData object */
/// std::string folder_path = "/path/to/emnist_dataset_directory";
/// std::shared_ptr<Dataset> ds = EMnist(folder_path, "mnist", "train", std::make_shared<RandomSampler>(false, 5));
///
/// /* Create iterator to read dataset */
/// std::shared_ptr<Iterator> iter = ds->CreateIterator();
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
///
/// /* Note: In EMNIST dataset dataset, each dictionary has keys "image" and "label" */
/// auto image = row["image"];
/// \endcode
inline std::shared_ptr<EMnistDataset> EMnist(
const std::string &dataset_dir, const std::string &name, const std::string &usage = "all",
const std::shared_ptr<Sampler> &sampler = std::make_shared<RandomSampler>(),
@ -1760,6 +2064,21 @@ class FlickrDataset : public Dataset {
/// given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()).
/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used).
/// \return Shared pointer to the current FlickrDataset
/// \par Example
/// \code
/// /* Define dataset path and MindData object */
/// std::string dataset_path = "/path/to/flickr30k_dataset_directory";
/// std::string file_path = "/path/to/token_file";
/// std::shared_ptr<Dataset> ds = Flickr(dataset_path, file_path);
///
/// /* Create iterator to read dataset */
/// std::shared_ptr<Iterator> iter = ds->CreateIterator();
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
///
/// /* Note: In FLICKR dataset, each dictionary has keys "image" and "annotation" */
/// auto image = row["image"];
/// \endcode
inline std::shared_ptr<FlickrDataset> Flickr(
const std::string &dataset_dir, const std::string &annotation_file, bool decode = false,
const std::shared_ptr<Sampler> &sampler = std::make_shared<RandomSampler>(),
@ -1855,6 +2174,20 @@ class ImageFolderDataset : public Dataset {
/// \param[in] class_indexing a class name to label map.
/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
/// \return Shared pointer to the ImageFolderDataset.
/// \par Example
/// \code
/// /* Define dataset path and MindData object */
/// std::string dataset_path = "/path/to/image_directory";
/// std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
///
/// /* Create iterator to read dataset */
/// std::shared_ptr<Iterator> iter = ds->CreateIterator();
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
///
/// /* Note: In ImageFolder dataset, each data dictionary has keys "image" and "label" */
/// auto image = row["image"];
/// \endcode
inline std::shared_ptr<ImageFolderDataset> ImageFolder(
const std::string &dataset_dir, bool decode = false,
const std::shared_ptr<Sampler> &sampler = std::make_shared<RandomSampler>(),
@ -1961,6 +2294,20 @@ class ManifestDataset : public Dataset {
/// \param[in] decode Decode the images after reading (default=false).
/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
/// \return Shared pointer to the ManifestDataset.
/// \par Example
/// \code
/// /* Define dataset path and MindData object */
/// std::string dataset_path = "/path/to/manifest_file";
/// std::shared_ptr<Dataset> ds = Manifest(file_path);
///
/// /* Create iterator to read dataset */
/// std::shared_ptr<Iterator> iter = ds->CreateIterator();
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
///
/// /* Note: In Manifest dataset, each data dictionary has keys "image" and "label" */
/// auto image = row["image"];
/// \endcode
inline std::shared_ptr<ManifestDataset> Manifest(
const std::string &dataset_file, const std::string &usage = "train",
const std::shared_ptr<Sampler> &sampler = std::make_shared<RandomSampler>(),
@ -2154,6 +2501,21 @@ class MindDataDataset : public Dataset {
/// ShuffleMode::kInfile - Shuffle samples in file.
/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
/// \return Shared pointer to the current MindDataDataset.
/// \par Example
/// \code
/// /* Define dataset path and MindData object */
/// std::string dataset_path = "/path/to/mindrecord_file";
/// std::vector<std::string> column_names = {"data", "file_name", "label"};
/// std::shared_ptr<Dataset> ds = MindData(file_path, column_names);
///
/// /* Create iterator to read dataset */
/// std::shared_ptr<Iterator> iter = ds->CreateIterator();
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
///
/// /* Note: As we defined before, each data dictionary owns keys "data", "file_name" and "label" */
/// auto data = row["data"];
/// \endcode
inline std::shared_ptr<MindDataDataset> MindData(
const std::string &dataset_file, const std::vector<std::string> &columns_list = {},
const std::shared_ptr<Sampler> &sampler = std::make_shared<RandomSampler>(), nlohmann::json *padded_sample = nullptr,
@ -2229,6 +2591,23 @@ inline std::shared_ptr<MindDataDataset> MindData(const std::string &dataset_file
/// ShuffleMode::kInfile - Shuffle samples in file.
/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
/// \return Shared pointer to the MindDataDataset.
/// \par Example
/// \code
/// /* Define dataset path and MindData object */
/// std::string file_path1 = "/path/to/mindrecord_file1";
/// std::string file_path2 = "/path/to/mindrecord_file2";
/// std::string file_list = {file_path1, file_path2};
/// std::vector<std::string> column_names = {"data", "file_name", "label"};
/// std::shared_ptr<Dataset> ds = MindData(file_list, column_names);
///
/// /* Create iterator to read dataset */
/// std::shared_ptr<Iterator> iter = ds->CreateIterator();
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
///
/// /* Note: As we defined before, each data dictionary owns keys "data", "file_name" and "label" */
/// auto data = row["data"];
/// \endcode
inline std::shared_ptr<MindDataDataset> MindData(
const std::vector<std::string> &dataset_files, const std::vector<std::string> &columns_list = {},
const std::shared_ptr<Sampler> &sampler = std::make_shared<RandomSampler>(), nlohmann::json *padded_sample = nullptr,
@ -2328,6 +2707,20 @@ class MnistDataset : public Dataset {
/// given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()).
/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
/// \return Shared pointer to the MnistDataset.
/// \par Example
/// \code
/// /* Define dataset path and MindData object */
/// std::string folder_path = "/path/to/mnist_dataset_directory";
/// std::shared_ptr<Dataset> ds = Mnist(folder_path, "all", std::make_shared<RandomSampler>(false, 20));
///
/// /* Create iterator to read dataset */
/// std::shared_ptr<Iterator> iter = ds->CreateIterator();
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
///
/// /* Note: In MNIST dataset, each dictionary has keys "image" and "label" */
/// auto image = row["image"];
/// \endcode
inline std::shared_ptr<MnistDataset> Mnist(const std::string &dataset_dir, const std::string &usage = "all",
const std::shared_ptr<Sampler> &sampler = std::make_shared<RandomSampler>(),
const std::shared_ptr<DatasetCache> &cache = nullptr) {
@ -2409,6 +2802,20 @@ class QMnistDataset : public Dataset {
/// given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()).
/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
/// \return Shared pointer to the QMnistDataset.
/// \par Example
/// \code
/// /* Define dataset path and MindData object */
/// std::string folder_path = "/path/to/qmnist_dataset_directory";
/// std::shared_ptr<Dataset> ds = QMnist(folder_path, "train", true, std::make_shared<RandomSampler>(false, 5));
///
/// /* Create iterator to read dataset */
/// std::shared_ptr<Iterator> iter = ds->CreateIterator();
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
///
/// /* Note: In QMNIST dataset, each dictionary has keys "image" and "label" */
/// auto image = row["image"];
/// \endcode
inline std::shared_ptr<QMnistDataset> QMnist(
const std::string &dataset_dir, const std::string &usage = "all", bool compat = true,
const std::shared_ptr<Sampler> &sampler = std::make_shared<RandomSampler>(),
@ -2484,6 +2891,22 @@ class RandomDataDataset : public Dataset {
/// \param[in] columns_list List of columns to be read (default={}, read all columns).
/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
/// \return Shared pointer to the RandomDataset.
/// \par Example
/// \code
/// /* Define MindData objects */
/// std::shared_ptr<SchemaObj> schema = Schema();
/// schema->add_column("column1", mindspore::DataType::kNumberTypeUInt8, {2});
/// schema->add_column("column2", mindspore::DataType::kNumberTypeUInt8, {1});
/// std::shared_ptr<Dataset> ds = RandomData(50, schema);
///
/// /* Create iterator to read dataset */
/// std::shared_ptr<Iterator> iter = ds->CreateIterator();
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
///
/// /* Note: As we defined the schema before, each data dictionary owns keys "column1" and "column2" */
/// auto column1 = row["column1"];
/// \endcode
template <typename T = std::shared_ptr<SchemaObj>>
std::shared_ptr<RandomDataDataset> RandomData(const int32_t &total_rows = 0, const T &schema = nullptr,
const std::vector<std::string> &columns_list = {},
@ -2540,6 +2963,20 @@ class SBUDataset : public Dataset {
/// given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()).
/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
/// \return Shared pointer to the current SBUDataset.
/// \par Example
/// \code
/// /* Define dataset path and MindData object */
/// std::string folder_path = "/path/to/sbu_dataset_directory";
/// std::shared_ptr<Dataset> ds = SBU(folder_path, true, std::make_shared<RandomSampler>(false, 5));
///
/// /* Create iterator to read dataset */
/// std::shared_ptr<Iterator> iter = ds->CreateIterator();
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
///
/// /* Note: In SBU dataset, each dictionary has keys "image" and "caption" */
/// auto caption = row["caption"];
/// \endcode
inline std::shared_ptr<SBUDataset> SBU(const std::string &dataset_dir, bool decode = false,
const std::shared_ptr<Sampler> &sampler = std::make_shared<RandomSampler>(),
const std::shared_ptr<DatasetCache> &cache = nullptr) {
@ -2612,6 +3049,20 @@ class TextFileDataset : public Dataset {
/// specified only when num_shards is also specified (Default = 0).
/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
/// \return Shared pointer to the TextFileDataset.
/// \par Example
/// \code
/// /* Define dataset path and MindData object */
/// std::string file_path = "/path/to/text_file_dataset_file";
/// std::shared_ptr<Dataset> ds = TextFile({file_path}, 2);
///
/// /* Create iterator to read dataset */
/// std::shared_ptr<Iterator> iter = ds->CreateIterator();
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
///
/// /* Note: In TextFile dataset, each dictionary has key "text" */
/// auto text = row["text"];
/// \endcode
inline std::shared_ptr<TextFileDataset> TextFile(const std::vector<std::string> &dataset_files, int64_t num_samples = 0,
ShuffleMode shuffle = ShuffleMode::kGlobal, int32_t num_shards = 1,
int32_t shard_id = 0,
@ -2702,6 +3153,21 @@ class TFRecordDataset : public Dataset {
/// each shard may be not equal).
/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
/// \return Shared pointer to the TFRecordDataset.
/// \par Example
/// \code
/// /* Define dataset path and MindData object */
/// std::string file_path = "/path/to/tfrecord_file";
/// std::string schema_path = "/path/to/schema_file";
/// std::shared_ptr<Dataset> ds = TFRecord({file_path}, schema_path, {"image"});
///
/// /* Create iterator to read dataset */
/// std::shared_ptr<Iterator> iter = ds->CreateIterator();
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
///
/// /* Note: The columns of generated dataset depend on the source TFRecord files. */
/// auto image = row["image"];
/// \endcode
template <typename T = std::shared_ptr<SchemaObj>>
std::shared_ptr<TFRecordDataset> TFRecord(const std::vector<std::string> &dataset_files, const T &schema = nullptr,
const std::vector<std::string> &columns_list = {}, int64_t num_samples = 0,
@ -2770,6 +3236,20 @@ class USPSDataset : public Dataset {
/// specified only when num_shards is also specified (Default = 0).
/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
/// \return Shared pointer to the current USPSDataset.
/// \par Example
/// \code
/// /* Define dataset path and MindData object */
/// std::string folder_path = "/path/to/usps_dataset_directory";
/// std::shared_ptr<Dataset> ds = USPS(folder_path, "train");
///
/// /* Create iterator to read dataset */
/// std::shared_ptr<Iterator> iter = ds->CreateIterator();
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
///
/// /* Note: In USPS dataset, each dictionary has keys "image" and "label" */
/// auto image = row["image"];
/// \endcode
inline std::shared_ptr<USPSDataset> USPS(const std::string &dataset_dir, const std::string &usage = "all",
int64_t num_samples = 0, ShuffleMode shuffle = ShuffleMode::kGlobal,
int32_t num_shards = 1, int32_t shard_id = 0,
@ -2842,6 +3322,22 @@ class VOCDataset : public Dataset {
/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used).
/// \param[in] extra_metadata Flag to add extra meta-data to row (default=false).
/// \return Shared pointer to the VOCDataset
/// \par Example
/// \code
/// /* Define dataset path and MindData object */
/// std::string folder_path = "/path/to/voc_dataset_directory";
/// std::shared_ptr<Dataset> ds = VOC(folder_path, "Detection", "train", class_index, false,
/// std::make_shared<SequentialSampler>(0, 6));
///
/// /* Create iterator to read dataset */
/// std::shared_ptr<Iterator> iter = ds->CreateIterator();
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
///
/// /* Note: In VOC dataset, if task='Segmentation', each dictionary has keys "image" and "target" */
/// /* Note: In VOC dataset, if task='Detection', each dictionary has keys "image" and "annotation" */
/// auto image = row["image"];
/// \endcode
inline std::shared_ptr<VOCDataset> VOC(const std::string &dataset_dir, const std::string &task = "Segmentation",
const std::string &usage = "train",
const std::map<std::string, int32_t> &class_indexing = {}, bool decode = false,

View File

@ -98,12 +98,35 @@ class Execute {
/// \param[in] input Tensor to be transformed.
/// \param[out] output Transformed tensor.
/// \return Status error code, returns OK if no error encountered.
/// \par Example
/// \code
/// /* Usage of Execute */
/// std::shared_ptr<TensorTransform> decode = std::make_shared<vision::Decode>();
/// std::shared_ptr<TensorTransform> center_crop(new vision::CenterCrop({30}));
/// std::shared_ptr<TensorTransform> rescale = std::make_shared<vision::Rescale>(1. / 3, 0.5);
/// mindspore::dataset::Execute transform = Execute({decode, center_crop, rescale});
///
/// /* Apply transforms */
/// mindspore::MSTensor image = ReadFileToTensor("apple.jpg");
/// Status rc = transform(image, &image);
/// \endcode
Status operator()(const mindspore::MSTensor &input, mindspore::MSTensor *output);
/// \brief Callable function to execute the TensorTransform in eager mode.
/// \param[in] input_tensor_list List of Tensor to be transformed.
/// \param[out] out Result tensor after transform.
/// \return Status error code, returns OK if no error encountered.
/// \par Example
/// \code
/// /* Usage of Execute */
/// auto tokenizer = text::BasicTokenizer();
/// mindspore::dataset::Execute transform = Execute({tokenizer});
///
/// /* Apply transforms */
/// std::vector<mindspore::MSTensor> txt = ReadTextToTensor("demo.txt");
/// std::vector<mindspore::MSTensor> txt_result;
/// Status rc = transform1({txt}, &txt_result);
/// \endcode
Status operator()(const std::vector<mindspore::MSTensor> &input_tensor_list, std::vector<mindspore::MSTensor> *out);
/// \brief Given a set of Executes, run them

View File

@ -63,6 +63,13 @@ class Iterator {
/// \note Type of return data is a unordered_map(with column name).
/// \param[out] row The output tensor row.
/// \return Status error code, returns OK if no error encountered.
/// \par Example
/// \code
/// /* dataset is an instance of Dataset object */
/// std::shared_ptr<Iterator> = dataset->CreateIterator();
/// std::unordered_map<std::string, mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
/// \endcode
Status GetNextRow(MSTensorMap *row) {
if (row == nullptr) {
return Status(kMDUnexpectedError, "Got nullptr when GetNext row.");
@ -84,6 +91,13 @@ class Iterator {
/// \note Type of return data is a vector(without column name).
/// \param[out] row The output tensor row.
/// \return Status error code, returns OK if no error encountered.
/// \par Example
/// \code
/// /* dataset is an instance of Dataset object */
/// std::shared_ptr<Iterator> = dataset->CreateIterator();
/// std::vector<mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
/// \endcode
virtual Status GetNextRow(MSTensorVec *row);
/// \brief Function to shut down the data pipeline.
@ -144,6 +158,13 @@ class PullIterator : public Iterator {
/// \note Type of return data is a vector(without column name).
/// \param[out] row The output tensor row.
/// \return Status error code, returns OK if no error encountered else false.
/// \par Example
/// \code
/// /* dataset is an instance of Dataset object */
/// std::shared_ptr<Iterator> = dataset->CreatePullBasedIterator();
/// std::vector<mindspore::MSTensor> row;
/// iter->GetNextRow(&row);
/// \endcode
Status GetNextRow(MSTensorVec *const row) override;
/// \brief Function to get specified rows from the data pipeline.
@ -151,6 +172,13 @@ class PullIterator : public Iterator {
/// \param[in] num_rows The number of rows to fetch.
/// \param[out] row The output tensor row.
/// \return Status error code, returns OK if no error encountered else false.
/// \par Example
/// \code
/// /* dataset is an instance of Dataset object */
/// std::shared_ptr<Iterator> = dataset->CreatePullBasedIterator();
/// std::vector<std::vector<mindspore::MSTensor>> rows;
/// iter->GetNextRow(5, &rows);
/// \endcode
Status GetRows(int32_t num_rows, std::vector<MSTensorVec> *const row);
/// \brief Method for building and launching the pipeline.

View File

@ -88,6 +88,12 @@ class DistributedSampler final : public Sampler {
/// \param[in] offset The starting position where access to elements in the dataset begins (default=-1).
/// \param[in] even_dist If true, each shard would return the same number of rows (default=true).
/// If false the total rows returned by all the shards would not have overlap.
/// \par Example
/// \code
/// /* creates a distributed sampler with 2 shards in total. This shard is shard 0 */
/// std::string file_path = "/path/to/test.mindrecord";
/// std::shared_ptr<Dataset> ds = MindData(file_path, {}, std::make_shared<DistributedSampler>(2, 0, false));
/// \endcode
DistributedSampler(int64_t num_shards, int64_t shard_id, bool shuffle = true, int64_t num_samples = 0,
uint32_t seed = 1, int64_t offset = -1, bool even_dist = true);
/// \brief Destructor.
@ -119,6 +125,12 @@ class PKSampler final : public Sampler {
/// \param[in] num_val Number of elements to sample for each class.
/// \param[in] shuffle If true, the class IDs are shuffled (default=false).
/// \param[in] num_samples The number of samples to draw (default=0, return all samples).
/// \par Example
/// \code
/// /* creates a PKSampler that will get 3 samples from every class. */
/// std::string folder_path = "/path/to/image/folder";
/// std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<PKSampler>(3));
/// \endcode
explicit PKSampler(int64_t num_val, bool shuffle = false, int64_t num_samples = 0);
/// \brief Destructor.
@ -144,6 +156,12 @@ class RandomSampler final : public Sampler {
/// \brief Constructor
/// \param[in] replacement If true, put the sample ID back for the next draw (default=false).
/// \param[in] num_samples The number of samples to draw (default=0, return all samples).
/// \par Example
/// \code
/// /* creates a RandomSampler that will get 10 samples randomly */
/// std::string folder_path = "/path/to/image/folder";
/// std::shared_ptr<Dataset> ds = ImageFolder(folder_path, true, std::make_shared<RandomSampler>(false, 10));
/// \endcode
explicit RandomSampler(bool replacement = false, int64_t num_samples = 0);
/// \brief Destructor.
@ -168,6 +186,12 @@ class SequentialSampler final : public Sampler {
/// \brief Constructor
/// \param[in] start_index Index to start sampling at (default=0, start at first id).
/// \param[in] num_samples The number of samples to draw (default=0, return all samples).
/// \par Example
/// \code
/// /* creates a SequentialSampler that will get 2 samples sequentially in original dataset */
/// std::string folder_path = "/path/to/image/folder";
/// std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, std::make_shared<SequentialSampler>(0, 2));
/// \endcode
explicit SequentialSampler(int64_t start_index = 0, int64_t num_samples = 0);
/// \brief Destructor.
@ -192,6 +216,12 @@ class SubsetSampler : public Sampler {
/// \brief Constructor
/// \param[in] indices A vector sequence of indices.
/// \param[in] num_samples The number of samples to draw (default=0, return all samples).
/// \par Example
/// \code
/// /* creates a SubsetSampler, will sample from the provided indices */
/// std::string folder_path = "/path/to/image/folder";
/// std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, std::make_shared<SubsetSampler>({0, 2, 5}));
/// \endcode
explicit SubsetSampler(std::vector<int64_t> indices, int64_t num_samples = 0);
/// \brief Destructor.
@ -215,6 +245,12 @@ class SubsetRandomSampler final : public SubsetSampler {
/// \brief Constructor
/// \param[in] indices A vector sequence of indices.
/// \param[in] num_samples The number of samples to draw (default=0, return all samples).
/// \par Example
/// \code
/// /* create a SubsetRandomSampler, will sample from the provided indices */
/// std::string folder_path = "/path/to/image/folder";
/// std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, std::make_shared<SubsetRandomSampler>({2, 7}));
/// \endcode
explicit SubsetRandomSampler(std::vector<int64_t> indices, int64_t num_samples = 0);
/// \brief Destructor.
@ -237,6 +273,14 @@ class WeightedRandomSampler final : public Sampler {
/// \param[in] weights A vector sequence of weights, not necessarily summing up to 1.
/// \param[in] num_samples The number of samples to draw (default=0, return all samples).
/// \param[in] replacement If true, put the sample ID back for the next draw (default=true).
/// \par Example
/// \code
/// /* creates a WeightedRandomSampler that will sample 4 elements without replacement */
/// std::vector<double> weights = {0.9, 0.8, 0.68, 0.7, 0.71, 0.6, 0.5, 0.4, 0.3, 0.5, 0.2, 0.1};
/// sampler = std::make_shared<WeightedRandomSampler>(weights, 4);
/// std::string folder_path = "/path/to/image/folder";
/// std::shared_ptr<Dataset> ds = ImageFolder(folder_path, false, sampler);
/// \endcode
explicit WeightedRandomSampler(std::vector<double> weights, int64_t num_samples = 0, bool replacement = true);
/// \brief Destructor.

View File

@ -38,7 +38,7 @@ class TensorOperation;
namespace vision {
/// \brief AdjustGamma TensorTransform.
/// \notes Apply gamma correction on input image.
/// \note Apply gamma correction on input image.
class AdjustGamma final : public TensorTransform {
public:
/// \brief Constructor.