diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/config.h b/mindspore/ccsrc/minddata/dataset/include/dataset/config.h index 52b05d3f2da..71b5a816c5c 100644 --- a/mindspore/ccsrc/minddata/dataset/include/dataset/config.h +++ b/mindspore/ccsrc/minddata/dataset/include/dataset/config.h @@ -1,5 +1,5 @@ /** - * Copyright 2020 Huawei Technologies Co., Ltd + * Copyright 2020-2021 Huawei Technologies Co., Ltd * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -72,7 +72,7 @@ int32_t get_callback_timeout(); /// \brief Function to load configuration from a file. /// \param[in] file path of the configuration file to be loaded. -/// \note This api exists because std::string will constrained by ABI compile macro but char don't. +/// \note This API exists because std::string will constrained by ABI compile option while char don't. bool load(const std::vector &file); /// \brief Function to load configuration from a file. diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/constants.h b/mindspore/ccsrc/minddata/dataset/include/dataset/constants.h index 7601e3d1489..080330384db 100644 --- a/mindspore/ccsrc/minddata/dataset/include/dataset/constants.h +++ b/mindspore/ccsrc/minddata/dataset/include/dataset/constants.h @@ -26,43 +26,43 @@ namespace dataset { using uchar = unsigned char; using dsize_t = int64_t; -// Target devices to perform map operation +/// \brief Target devices to perform map operation enum class MapTargetDevice { kCpu, kGpu, kAscend310 }; -// Possible dataset types for holding the data and client type +/// \brief Possible dataset types for holding the data and client type enum class DatasetType { kUnknown, kArrow, kTf }; -// Possible flavours of Tensor implementations +/// \brief Possible flavours of Tensor implementations enum class TensorImpl { kNone, kFlexible, kCv, kNP }; -// Possible values for shuffle +/// \brief Possible values for shuffle enum class ShuffleMode { kFalse = 0, kFiles = 1, kGlobal = 2, kInfile = 3 }; -// Possible values for Border types +/// \brief Possible values for Border types enum class BorderType { kConstant = 0, kEdge = 1, kReflect = 2, kSymmetric = 3 }; -// Possible values for Image format types in a batch +/// \brief Possible values for Image format types in a batch enum class ImageBatchFormat { kNHWC = 0, kNCHW = 1 }; -// Possible values for Image format types +/// \brief Possible values for Image format types enum class ImageFormat { HWC = 0, CHW = 1, HW = 2 }; -// Possible interpolation modes +/// \brief Possible interpolation modes enum class InterpolationMode { kLinear = 0, kNearestNeighbour = 1, kCubic = 2, kArea = 3, kCubicPil = 4 }; -// Possible JiebaMode modes +/// \brief Possible JiebaMode modes enum class JiebaMode { kMix = 0, kMp = 1, kHmm = 2 }; -// Possible values for SPieceTokenizerOutType +/// \brief Possible values for SPieceTokenizerOutType enum class SPieceTokenizerOutType { kString = 0, kInt = 1 }; -// Possible values for SPieceTokenizerLoadType +/// \brief Possible values for SPieceTokenizerLoadType enum class SPieceTokenizerLoadType { kFile = 0, kModel = 1 }; -// Possible values for SentencePieceModel +/// \brief Possible values for SentencePieceModel enum class SentencePieceModel { kUnigram = 0, kBpe = 1, kChar = 2, kWord = 3 }; -// Possible values for NormalizeForm +/// \brief Possible values for NormalizeForm enum class NormalizeForm { kNone = 0, kNfc, @@ -71,7 +71,7 @@ enum class NormalizeForm { kNfkd, }; -// Possible values for Mask +/// \brief Possible values for Mask enum class RelationalOp { kEqual = 0, // == kNotEqual, // != @@ -81,7 +81,7 @@ enum class RelationalOp { kGreaterEqual, // >= }; -// Possible values for SamplingStrategy +/// \brief Possible values for SamplingStrategy enum class SamplingStrategy { kRandom = 0, kEdgeWeight = 1 }; // convenience functions for 32bit int bitmask diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/datasets.h b/mindspore/ccsrc/minddata/dataset/include/dataset/datasets.h index 729dcbc99be..f65a56db35a 100644 --- a/mindspore/ccsrc/minddata/dataset/include/dataset/datasets.h +++ b/mindspore/ccsrc/minddata/dataset/include/dataset/datasets.h @@ -98,60 +98,60 @@ class Dataset : public std::enable_shared_from_this { /// \brief Gets the dataset size /// \param[in] estimate This is only supported by some of the ops and it's used to speed up the process of getting /// dataset size at the expense of accuracy. - /// \return dataset size. If failed, return -1 + /// \return Dataset size. If failed, return -1. int64_t GetDatasetSize(bool estimate = false); /// \brief Gets the output type - /// \return a vector of DataType. If failed, return an empty vector + /// \return A vector contains output DataType of dataset. If failed, return an empty vector. std::vector GetOutputTypes(); /// \brief Gets the output shape - /// \return a vector of TensorShape. If failed, return an empty vector + /// \return A vector contains output TensorShape of dataset. If failed, return an empty vector. std::vector> GetOutputShapes(); /// \brief Gets the batch size - /// \return int64_t + /// \return Batch size configuration of dataset. int64_t GetBatchSize(); /// \brief Gets the repeat count - /// \return int64_t + /// \return Repeat count configuration of dataset. int64_t GetRepeatCount(); /// \brief Gets the number of classes - /// \return number of classes. If failed, return -1 + /// \return Number of classes of dataset. If failed, return -1. int64_t GetNumClasses(); /// \brief Gets the column names - /// \return Names of the columns. If failed, return an empty vector + /// \return A vector contains all column names of dataset. If failed, return an empty vector. std::vector GetColumnNames() { return VectorCharToString(GetColumnNamesCharIF()); } /// \brief Gets the class indexing - /// \return a map of ClassIndexing. If failed, return an empty map + /// \return A map of ClassIndexing of dataset. If failed, return an empty map. std::vector>> GetClassIndexing() { return ClassIndexCharToString(GetClassIndexingCharIF()); } - /// \brief Setter function for runtime number of workers - /// \param[in] num_workers The number of threads in this operator - /// \return Shared pointer to the original object + /// \brief Setter function for runtime number of workers. + /// \param[in] num_workers The number of threads in this operator. + /// \return Shared pointer to the original object. std::shared_ptr SetNumWorkers(int32_t num_workers); - /// \brief Function to create an PullBasedIterator over the Dataset - /// \param[in] columns List of columns to be used to specify the order of columns - /// \return Shared pointer to the Iterator + /// \brief Function to create an PullBasedIterator over the Dataset. + /// \param[in] columns List of columns to be used to specify the order of columns. + /// \return Shared pointer to the Iterator. std::shared_ptr CreatePullBasedIterator(std::vector> columns = {}); - /// \brief Function to create an Iterator over the Dataset pipeline - /// \param[in] columns List of columns to be used to specify the order of columns + /// \brief Function to create an Iterator over the Dataset pipeline. + /// \param[in] columns List of columns to be used to specify the order of columns. /// \param[in] num_epochs Number of epochs to run through the pipeline, default -1 which means infinite epochs. - /// An empty row is returned at the end of each epoch - /// \return Shared pointer to the Iterator + /// An empty row is returned at the end of each epoch. + /// \return Shared pointer to the Iterator. std::shared_ptr CreateIterator(std::vector columns = {}, int32_t num_epochs = -1) { return CreateIteratorCharIF(VectorStringToChar(columns), num_epochs); } /// \brief Function to transfer data through a device. - /// \notes If device is Ascend, features of data will be transferred one by one. The limitation + /// \note If device is Ascend, features of data will be transferred one by one. The limitation /// of data transmission per time is 256M. /// \param[in] queue_name Channel name (default="", create new unique name). /// \param[in] device_type Type of device (default="", get from MSContext). @@ -160,7 +160,7 @@ class Dataset : public std::enable_shared_from_this { /// \param[in] send_epoch_end Whether to send end of sequence to device or not (default=true). /// \param[in] total_batches Number of batches to be sent to the device (default=0, all data). /// \param[in] create_data_info_queue Whether to create queue which stores types and shapes - /// of data or not(default=false). + /// of data or not (default=false). /// \return Returns true if no error encountered else false. bool DeviceQueue(std::string queue_name = "", std::string device_type = "", int32_t device_id = 0, int32_t num_epochs = -1, bool send_epoch_end = true, int32_t total_batches = 0, @@ -169,36 +169,36 @@ class Dataset : public std::enable_shared_from_this { total_batches, create_data_info_queue); } - /// \brief Function to create a Saver to save the dynamic data processed by the dataset pipeline + /// \brief Function to create a Saver to save the dynamic data processed by the dataset pipeline. /// \note Usage restrictions: - /// 1. Supported dataset formats: 'mindrecord' only + /// 1. Supported dataset formats: 'mindrecord' only. /// 2. To save the samples in order, set dataset's shuffle to false and num_files to 1. /// 3. Before calling the function, do not use batch operator, repeat operator or data augmentation operators /// with random attribute in map operator. /// 4. Mindrecord does not support bool, uint64, multi-dimensional uint8(drop dimension) nor /// multi-dimensional string. - /// \param[in] file_name Path to dataset file - /// \param[in] num_files Number of dataset files (default=1) - /// \param[in] file_type Dataset format (default="mindrecord") - /// \return Returns true if no error encountered else false + /// \param[in] file_name Path to dataset file. + /// \param[in] num_files Number of dataset files (default=1). + /// \param[in] file_type Dataset format (default="mindrecord"). + /// \return Returns true if no error encountered else false. bool Save(std::string dataset_path, int32_t num_files = 1, std::string dataset_type = "mindrecord") { return SaveCharIF(StringToChar(dataset_path), num_files, StringToChar(dataset_type)); } - /// \brief Function to create a BatchDataset - /// \notes Combines batch_size number of consecutive rows into batches - /// \param[in] batch_size The number of rows each batch is created with + /// \brief Function to create a BatchDataset. + /// \note Combines batch_size number of consecutive rows into batches. + /// \param[in] batch_size The number of rows each batch is created with. /// \param[in] drop_remainder Determines whether or not to drop the last possibly incomplete /// batch. If true, and if there are less than batch_size rows /// available to make the last batch, then those rows will - /// be dropped and not propagated to the next node - /// \return Shared pointer to the current BatchDataset + /// be dropped and not propagated to the next node. + /// \return Shared pointer to the current Dataset. std::shared_ptr Batch(int32_t batch_size, bool drop_remainder = false); - /// \brief Function to create a BucketBatchByLengthDataset - /// \notes Bucket elements according to their lengths. Each bucket will be padded and batched when + /// \brief Function to create a BucketBatchByLengthDataset. + /// \note Bucket elements according to their lengths. Each bucket will be padded and batched when /// they are full. - /// \param[in] column_names Columns passed to element_length_function + /// \param[in] column_names Columns passed to element_length_function. /// \param[in] bucket_boundaries A list consisting of the upper boundaries of the buckets. /// Must be strictly increasing. If there are n boundaries, n+1 buckets are created: One bucket for /// [0, bucket_boundaries[0]), one bucket for [bucket_boundaries[i], bucket_boundaries[i+1]) for each @@ -208,7 +208,7 @@ class Dataset : public std::enable_shared_from_this { /// \param[in] element_length_function A function pointer that takes in MSTensorVec and outputs a MSTensorVec. /// The output must contain a single tensor containing a single int32_t. If no value is provided, /// then size of column_names must be 1, and the size of the first dimension of that column will be taken - /// as the length (default=nullptr) + /// as the length (default=nullptr). /// \param[in] pad_info Represents how to batch each column. The key corresponds to the column name, the value must /// be a tuple of 2 elements. The first element corresponds to the shape to pad to, and the second element /// corresponds to the value to pad with. If a column is not specified, then that column will be padded to the @@ -220,7 +220,7 @@ class Dataset : public std::enable_shared_from_this { /// an error will occur (default=false). /// \param[in] drop_remainder If true, will drop the last batch for each bucket if it is not a full batch /// (default=false). - /// \return Shared pointer to the current BucketBatchByLengthDataset + /// \return Shared pointer to the current Dataset. std::shared_ptr BucketBatchByLength( const std::vector &column_names, const std::vector &bucket_boundaries, const std::vector &bucket_batch_sizes, @@ -232,16 +232,17 @@ class Dataset : public std::enable_shared_from_this { element_length_function, PadInfoStringToChar(pad_info), pad_to_bucket_boundary, drop_remainder); } - /// \brief Function to create a SentencePieceVocab from source dataset - /// \notes Build a SentencePieceVocab from a dataset. - /// \param[in] col_names Column names to get words from. It can be a vector of column names + /// \brief Function to create a SentencePieceVocab from source dataset. + /// \note Build a SentencePieceVocab from a dataset. + /// \param[in] col_names Column names to get words from. It can be a vector of column names. /// \param[in] vocab_size Vocabulary size. /// \param[in] character_coverage Percentage of characters covered by the model, must be between /// 0.98 and 1.0 Good defaults are: 0.9995 for languages with rich character sets like /// Japanese or Chinese character sets, and 1.0 for other languages with small character sets. /// \param[in] model_type Model type. Choose from unigram (default), bpe, char, or word. /// The input sentence must be pretokenized when using word type. - /// \param[in] params A vector contains more option parameters of sentencepiece library + /// \param[in] params A vector contains more option parameters of sentencepiece library. + /// \return Shared pointer to the SentencePieceVocab. std::shared_ptr BuildSentencePieceVocab( const std::vector &col_names, int32_t vocab_size, float character_coverage, SentencePieceModel model_type, const std::unordered_map ¶ms) { @@ -249,19 +250,19 @@ class Dataset : public std::enable_shared_from_this { UnorderedMapStringToChar(params)); } - /// \brief Function to create a Vocab from source dataset - /// \notes Build a vocab from a dataset. This would collect all the unique words in a dataset and return a vocab - /// which contains top_k most frequent words (if top_k is specified) - /// \param[in] columns Column names to get words from. It can be a vector of column names + /// \brief Function to create a Vocab from source dataset. + /// \note Build a vocab from a dataset. This would collect all the unique words in a dataset and return a vocab + /// which contains top_k most frequent words (if top_k is specified). + /// \param[in] columns Column names to get words from. It can be a vector of column names. /// \param[in] freq_range A tuple of integers (min_frequency, max_frequency). Words within the frequency /// range would be kept. 0 <= min_frequency <= max_frequency <= total_words. min_frequency/max_frequency - /// can be set to default, which corresponds to 0/total_words separately + /// can be set to default, which corresponds to 0/total_words separately. /// \param[in] top_k Number of words to be built into vocab. top_k most frequent words are - /// taken. The top_k is taken after freq_range. If not enough top_k, all words will be taken - /// \param[in] special_tokens A list of strings, each one is a special token + /// taken. The top_k is taken after freq_range. If not enough top_k, all words will be taken. + /// \param[in] special_tokens A list of strings, each one is a special token. /// \param[in] special_first Whether special_tokens will be prepended/appended to vocab, If special_tokens - /// is specified and special_first is set to default, special_tokens will be prepended - /// \return Shared pointer to the current Vocab + /// is specified and special_first is set to default, special_tokens will be prepended. + /// \return Shared pointer to the Vocab. std::shared_ptr BuildVocab(const std::vector &columns = {}, const std::pair &freq_range = {0, kDeMaxFreq}, int64_t top_k = kDeMaxTopk, const std::vector &special_tokens = {}, @@ -270,42 +271,42 @@ class Dataset : public std::enable_shared_from_this { special_first); } - /// \brief Function to create a ConcatDataset - /// \notes Concat the datasets in the input - /// \param[in] datasets List of shared pointers to the dataset that should be concatenated together - /// \return Shared pointer to the current ConcatDataset + /// \brief Function to create a ConcatDataset. + /// \note Concat the datasets in the input. + /// \param[in] datasets List of shared pointers to the dataset that should be concatenated together. + /// \return Shared pointer to the current Dataset. std::shared_ptr Concat(const std::vector> &datasets) { std::vector> all_datasets{shared_from_this()}; all_datasets.insert(std::end(all_datasets), std::begin(datasets), std::end(datasets)); return std::make_shared(all_datasets); } - /// \brief Function to filter dataset by predicate - /// \notes If input_columns is not provided or empty, all columns will be used - /// \param[in] predicate Function callable which returns a boolean value. If false then filter the element - /// \param[in] input_columns List of names of the input columns to filter - /// \return Shared pointer to the current FilterNode + /// \brief Function to filter dataset by predicate. + /// \note If input_columns is not provided or empty, all columns will be used. + /// \param[in] predicate Function callable which returns a boolean value. If false then filter the element. + /// \param[in] input_columns List of names of the input columns to filter. + /// \return Shared pointer to the current Dataset. std::shared_ptr Filter(std::function predicate, const std::vector &input_columns = {}) { return std::make_shared(shared_from_this(), predicate, VectorStringToChar(input_columns)); } - /// \brief Function to create a MapDataset - /// \notes Applies each operation in operations to this dataset + /// \brief Function to create a MapDataset. + /// \note Applies each operation in operations to this dataset. /// \param[in] operations Vector of raw pointers to TensorTransform objects to be applied on the dataset. Operations - /// are applied in the order they appear in this list + /// are applied in the order they appear in this list. /// \param[in] input_columns Vector of the names of the columns that will be passed to the first /// operation as input. The size of this list must match the number of /// input columns expected by the first operator. The default input_columns - /// is the first column - /// \param[in] output_columns Vector of names assigned to the columns outputted by the last operation - /// This parameter is mandatory if len(input_columns) != len(output_columns) + /// is the first column. + /// \param[in] output_columns Vector of names assigned to the columns outputted by the last operation. + /// This parameter is mandatory if len(input_columns) != len(output_columns). /// The size of this list must match the number of output columns of the /// last operation. The default output_columns will have the same - /// name as the input columns, i.e., the columns will be replaced - /// \param[in] project_columns A list of column names to project - /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). - /// \return Shared pointer to the current MapDataset + /// name as the input columns, i.e., the columns will be replaced. + /// \param[in] project_columns A list of column names to project. + /// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). + /// \return Shared pointer to the current Dataset. std::shared_ptr Map(std::vector operations, const std::vector &input_columns = {}, const std::vector &output_columns = {}, @@ -321,22 +322,22 @@ class Dataset : public std::enable_shared_from_this { callbacks); } - /// \brief Function to create a MapDataset - /// \notes Applies each operation in operations to this dataset + /// \brief Function to create a MapDataset. + /// \note Applies each operation in operations to this dataset. /// \param[in] operations Vector of shared pointers to TensorTransform objects to be applied on the dataset. - /// Operations are applied in the order they appear in this list + /// Operations are applied in the order they appear in this list. /// \param[in] input_columns Vector of the names of the columns that will be passed to the first /// operation as input. The size of this list must match the number of /// input columns expected by the first operator. The default input_columns - /// is the first column - /// \param[in] output_columns Vector of names assigned to the columns outputted by the last operation - /// This parameter is mandatory if len(input_columns) != len(output_columns) + /// is the first column. + /// \param[in] output_columns Vector of names assigned to the columns outputted by the last operation. + /// This parameter is mandatory if len(input_columns) != len(output_columns). /// The size of this list must match the number of output columns of the /// last operation. The default output_columns will have the same - /// name as the input columns, i.e., the columns will be replaced - /// \param[in] project_columns A list of column names to project - /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). - /// \return Shared pointer to the current MapDataset + /// name as the input columns, i.e., the columns will be replaced. + /// \param[in] project_columns A list of column names to project. + /// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). + /// \return Shared pointer to the current Dataset. std::shared_ptr Map(std::vector> operations, const std::vector &input_columns = {}, const std::vector &output_columns = {}, @@ -353,22 +354,22 @@ class Dataset : public std::enable_shared_from_this { callbacks); } - /// \brief Function to create a MapDataset - /// \notes Applies each operation in operations to this dataset + /// \brief Function to create a MapDataset. + /// \note Applies each operation in operations to this dataset. /// \param[in] operations Vector of TensorTransform objects to be applied on the dataset. Operations are applied in - /// the order they appear in this list + /// the order they appear in this list. /// \param[in] input_columns Vector of the names of the columns that will be passed to the first /// operation as input. The size of this list must match the number of /// input columns expected by the first operator. The default input_columns - /// is the first column - /// \param[in] output_columns Vector of names assigned to the columns outputted by the last operation - /// This parameter is mandatory if len(input_columns) != len(output_columns) + /// is the first column. + /// \param[in] output_columns Vector of names assigned to the columns outputted by the last operation. + /// This parameter is mandatory if len(input_columns) != len(output_columns). /// The size of this list must match the number of output columns of the /// last operation. The default output_columns will have the same - /// name as the input columns, i.e., the columns will be replaced - /// \param[in] project_columns A list of column names to project - /// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). - /// \return Shared pointer to the current MapDataset + /// name as the input columns, i.e., the columns will be replaced. + /// \param[in] project_columns A list of column names to project. + /// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). + /// \return Shared pointer to the current Dataset. std::shared_ptr Map(const std::vector> operations, const std::vector &input_columns = {}, const std::vector &output_columns = {}, @@ -383,59 +384,59 @@ class Dataset : public std::enable_shared_from_this { callbacks); } - /// \brief Function to create a Project Dataset - /// \notes Applies project to the dataset - /// \param[in] columns The name of columns to project - /// \return Shared pointer to the current Dataset + /// \brief Function to create a Project Dataset. + /// \note Applies project to the dataset. + /// \param[in] columns The name of columns to project. + /// \return Shared pointer to the current Dataset. std::shared_ptr Project(const std::vector &columns) { return std::make_shared(shared_from_this(), VectorStringToChar(columns)); } - /// \brief Function to create a Rename Dataset - /// \notes Renames the columns in the input dataset - /// \param[in] input_columns List of the input columns to rename - /// \param[in] output_columns List of the output columns - /// \return Shared pointer to the current Dataset + /// \brief Function to create a Rename Dataset. + /// \note Renames the columns in the input dataset. + /// \param[in] input_columns List of the input columns to rename. + /// \param[in] output_columns List of the output columns. + /// \return Shared pointer to the current Dataset. std::shared_ptr Rename(const std::vector &input_columns, const std::vector &output_columns) { return std::make_shared(shared_from_this(), VectorStringToChar(input_columns), VectorStringToChar(output_columns)); } - /// \brief Function to create a RepeatDataset - /// \notes Repeats this dataset count times. Repeat indefinitely if count is -1 - /// \param[in] count Number of times the dataset should be repeated - /// \return Shared pointer to the current Dataset + /// \brief Function to create a RepeatDataset. + /// \note Repeats this dataset count times. Repeat indefinitely if count is -1. + /// \param[in] count Number of times the dataset should be repeated. + /// \return Shared pointer to the current Dataset. /// \note Repeat will return shared pointer to `Dataset` instead of `RepeatDataset` - /// due to a limitation in the current implementation + /// due to a limitation in the current implementation. std::shared_ptr Repeat(int32_t count = -1) { return std::make_shared(shared_from_this(), count); } - /// \brief Function to create a Shuffle Dataset - /// \notes Randomly shuffles the rows of this dataset + /// \brief Function to create a Shuffle Dataset. + /// \note Randomly shuffles the rows of this dataset. /// \param[in] buffer_size The size of the buffer (must be larger than 1) for shuffling - /// \return Shared pointer to the current ShuffleDataset + /// \return Shared pointer to the current Dataset. std::shared_ptr Shuffle(int32_t buffer_size) { return std::make_shared(shared_from_this(), buffer_size); } - /// \brief Function to create a SkipDataset - /// \notes Skips count elements in this dataset. + /// \brief Function to create a SkipDataset. + /// \note Skips count elements in this dataset. /// \param[in] count Number of elements the dataset to be skipped. - /// \return Shared pointer to the current SkipDataset + /// \return Shared pointer to the current Dataset. std::shared_ptr Skip(int32_t count) { return std::make_shared(shared_from_this(), count); } - /// \brief Function to create a TakeDataset - /// \notes Takes count elements in this dataset. + /// \brief Function to create a TakeDataset. + /// \note Takes count elements in this dataset. /// \param[in] count Number of elements the dataset to be taken. - /// \return Shared pointer to the current Dataset + /// \return Shared pointer to the current Dataset. std::shared_ptr Take(int32_t count = -1) { return std::make_shared(shared_from_this(), count); } - /// \brief Function to create a Zip Dataset - /// \notes Applies zip to the dataset - /// \param[in] datasets A list of shared pointers to the datasets that we want to zip - /// \return Shared pointer to the current Dataset + /// \brief Function to create a Zip Dataset. + /// \note Applies zip to the dataset. + /// \param[in] datasets A list of shared pointers to the datasets that we want to zip. + /// \return Shared pointer to the current Dataset. std::shared_ptr Zip(const std::vector> &datasets) { std::vector> all_datasets = datasets; all_datasets.push_back(shared_from_this()); @@ -671,15 +672,15 @@ class ZipDataset : public Dataset { ~ZipDataset() = default; }; -/// \brief Function to create a SchemaObj -/// \param[in] schema_file Path of schema file -/// \note This api exists because std::string will constrained by ABI compile macro but char don't. -/// \return Shared pointer to the current schema +/// \brief Function to create a SchemaObj. +/// \param[in] schema_file Path of schema file. +/// \note This API exists because std::string will constrained by ABI compile option while char don't. +/// \return Shared pointer to the current schema. std::shared_ptr SchemaCharIF(const std::vector &schema_file); -/// \brief Function to create a SchemaObj -/// \param[in] schema_file Path of schema file -/// \return Shared pointer to the current schema +/// \brief Function to create a SchemaObj. +/// \param[in] schema_file Path of schema file. +/// \return Shared pointer to the current schema. inline std::shared_ptr Schema(const std::string &schema_file = "") { return SchemaCharIF(StringToChar(schema_file)); } @@ -698,18 +699,17 @@ class AlbumDataset : public Dataset { ~AlbumDataset() = default; }; -/// \brief Function to create an AlbumDataset -/// \notes The generated dataset is specified through setting a schema -/// \param[in] dataset_dir Path to the root directory that contains the dataset -/// \param[in] data_schema Path to dataset schema file -/// \param[in] column_names Column names used to specify columns to load, if empty, will read all columns. -/// (default = {}) -/// \param[in] decode the option to decode the images in dataset (default = false) +/// \brief Function to create an AlbumDataset. +/// \note The generated dataset is specified through setting a schema. +/// \param[in] dataset_dir Path to the root directory that contains the dataset. +/// \param[in] data_schema Path to dataset schema file. +/// \param[in] column_names Column names used to specify columns to load, if empty, will read all columns +/// (default = {}). +/// \param[in] decode The option to decode the images in dataset (default = false). /// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. If sampler is not -/// given, -/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current Dataset +/// given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()). +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the AlbumDataset. inline std::shared_ptr Album(const std::string &dataset_dir, const std::string &data_schema, const std::vector &column_names = {}, bool decode = false, const std::shared_ptr &sampler = std::make_shared(), @@ -717,15 +717,15 @@ inline std::shared_ptr Album(const std::string &dataset_dir, const return std::make_shared(StringToChar(dataset_dir), StringToChar(data_schema), VectorStringToChar(column_names), decode, sampler, cache); } -/// \brief Function to create an AlbumDataset -/// \notes The generated dataset is specified through setting a schema -/// \param[in] dataset_dir Path to the root directory that contains the dataset -/// \param[in] data_schema Path to dataset schema file -/// \param[in] column_names Column names used to specify columns to load -/// \param[in] decode the option to decode the images in dataset +/// \brief Function to create an AlbumDataset. +/// \note The generated dataset is specified through setting a schema. +/// \param[in] dataset_dir Path to the root directory that contains the dataset. +/// \param[in] data_schema Path to dataset schema file. +/// \param[in] column_names Column names used to specify columns to load. +/// \param[in] decode The option to decode the images in dataset. /// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset. -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current Dataset +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the AlbumDataset inline std::shared_ptr Album(const std::string &dataset_dir, const std::string &data_schema, const std::vector &column_names, bool decode, const Sampler *sampler, @@ -733,15 +733,15 @@ inline std::shared_ptr Album(const std::string &dataset_dir, const return std::make_shared(StringToChar(dataset_dir), StringToChar(data_schema), VectorStringToChar(column_names), decode, sampler, cache); } -/// \brief Function to create an AlbumDataset -/// \notes The generated dataset is specified through setting a schema -/// \param[in] dataset_dir Path to the root directory that contains the dataset -/// \param[in] data_schema Path to dataset schema file -/// \param[in] column_names Column names used to specify columns to load -/// \param[in] decode the option to decode the images in dataset +/// \brief Function to create an AlbumDataset. +/// \note The generated dataset is specified through setting a schema. +/// \param[in] dataset_dir Path to the root directory that contains the dataset. +/// \param[in] data_schema Path to dataset schema file. +/// \param[in] column_names Column names used to specify columns to load. +/// \param[in] decode The option to decode the images in dataset. /// \param[in] sampler Sampler object used to choose samples from the dataset. -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current Dataset +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the AlbumDataset. inline std::shared_ptr Album(const std::string &dataset_dir, const std::string &data_schema, const std::vector &column_names, bool decode, const std::reference_wrapper sampler, @@ -764,18 +764,17 @@ class CelebADataset : public Dataset { ~CelebADataset() = default; }; -/// \brief Function to create a CelebADataset -/// \notes The generated dataset has two columns ['image', 'attr']. +/// \brief Function to create a CelebADataset. +/// \note The generated dataset has two columns ['image', 'attr']. /// The type of the image tensor is uint8. The attr tensor is uint32 and one hot type. /// \param[in] dataset_dir Path to the root directory that contains the dataset. /// \param[in] usage One of "all", "train", "valid" or "test" (default = "all"). /// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. If sampler is not -/// given, -/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) +/// given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()). /// \param[in] decode Decode the images after reading (default=false). /// \param[in] extensions Set of file extensions to be included in the dataset (default={}). -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current Dataset +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the CelebADataset. inline std::shared_ptr CelebA( const std::string &dataset_dir, const std::string &usage = "all", const std::shared_ptr &sampler = std::make_shared(), bool decode = false, @@ -784,16 +783,16 @@ inline std::shared_ptr CelebA( SetStringToChar(extensions), cache); } -/// \brief Function to create a CelebADataset -/// \notes The generated dataset has two columns ['image', 'attr']. +/// \brief Function to create a CelebADataset. +/// \note The generated dataset has two columns ['image', 'attr']. /// The type of the image tensor is uint8. The attr tensor is uint32 and one hot type. /// \param[in] dataset_dir Path to the root directory that contains the dataset. -/// \param[in] usage One of "all", "train", "valid" or "test" +/// \param[in] usage One of "all", "train", "valid" or "test". /// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset. /// \param[in] decode Decode the images after reading (default=false). /// \param[in] extensions Set of file extensions to be included in the dataset (default={}). -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current Dataset +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the CelebADataset. inline std::shared_ptr CelebA(const std::string &dataset_dir, const std::string &usage, const Sampler *sampler, bool decode = false, const std::set &extensions = {}, @@ -802,16 +801,16 @@ inline std::shared_ptr CelebA(const std::string &dataset_dir, con SetStringToChar(extensions), cache); } -/// \brief Function to create a CelebADataset -/// \notes The generated dataset has two columns ['image', 'attr']. +/// \brief Function to create a CelebADataset. +/// \note The generated dataset has two columns ['image', 'attr']. /// The type of the image tensor is uint8. The attr tensor is uint32 and one hot type. /// \param[in] dataset_dir Path to the root directory that contains the dataset. -/// \param[in] usage One of "all", "train", "valid" or "test" +/// \param[in] usage One of "all", "train", "valid" or "test". /// \param[in] sampler Sampler object used to choose samples from the dataset. /// \param[in] decode Decode the images after reading (default=false). /// \param[in] extensions Set of file extensions to be included in the dataset (default={}). -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current Dataset +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the CelebADataset. inline std::shared_ptr CelebA(const std::string &dataset_dir, const std::string &usage, const std::reference_wrapper sampler, bool decode = false, const std::set &extensions = {}, @@ -831,15 +830,14 @@ class Cifar10Dataset : public Dataset { ~Cifar10Dataset() = default; }; -/// \brief Function to create a Cifar10 Dataset -/// \notes The generated dataset has two columns ["image", "label"] -/// \param[in] dataset_dir Path to the root directory that contains the dataset -/// \param[in] usage of CIFAR10, can be "train", "test" or "all" (default = "all"). +/// \brief Function to create a Cifar10 Dataset. +/// \note The generated dataset has two columns ["image", "label"]. +/// \param[in] dataset_dir Path to the root directory that contains the dataset. +/// \param[in] usage Part of dataset of CIFAR10, can be "train", "test" or "all" (default = "all"). /// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. If sampler is not -/// given, -/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current Dataset +/// given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()). +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the Cifar10Dataset. inline std::shared_ptr Cifar10( const std::string &dataset_dir, const std::string &usage = "all", const std::shared_ptr &sampler = std::make_shared(), @@ -847,26 +845,26 @@ inline std::shared_ptr Cifar10( return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, cache); } -/// \brief Function to create a Cifar10 Dataset -/// \notes The generated dataset has two columns ["image", "label"] -/// \param[in] dataset_dir Path to the root directory that contains the dataset -/// \param[in] usage of CIFAR10, can be "train", "test" or "all" +/// \brief Function to create a Cifar10 Dataset. +/// \note The generated dataset has two columns ["image", "label"]. +/// \param[in] dataset_dir Path to the root directory that contains the dataset. +/// \param[in] usage Part of dataset of CIFAR10, can be "train", "test" or "all". /// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset. -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current Dataset +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the Cifar10Dataset. inline std::shared_ptr Cifar10(const std::string &dataset_dir, const std::string &usage, const Sampler *sampler, const std::shared_ptr &cache = nullptr) { return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, cache); } -/// \brief Function to create a Cifar10 Dataset -/// \notes The generated dataset has two columns ["image", "label"] -/// \param[in] dataset_dir Path to the root directory that contains the dataset -/// \param[in] usage of CIFAR10, can be "train", "test" or "all" +/// \brief Function to create a Cifar10 Dataset. +/// \note The generated dataset has two columns ["image", "label"]. +/// \param[in] dataset_dir Path to the root directory that contains the dataset. +/// \param[in] usage Part of dataset of CIFAR10, can be "train", "test" or "all". /// \param[in] sampler Sampler object used to choose samples from the dataset. -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current Dataset +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the Cifar10Dataset. inline std::shared_ptr Cifar10(const std::string &dataset_dir, const std::string &usage, const std::reference_wrapper sampler, const std::shared_ptr &cache = nullptr) { @@ -884,15 +882,14 @@ class Cifar100Dataset : public Dataset { ~Cifar100Dataset() = default; }; -/// \brief Function to create a Cifar100 Dataset -/// \notes The generated dataset has three columns ["image", "coarse_label", "fine_label"] -/// \param[in] dataset_dir Path to the root directory that contains the dataset -/// \param[in] usage of CIFAR100, can be "train", "test" or "all" (default = "all"). +/// \brief Function to create a Cifar100 Dataset. +/// \note The generated dataset has three columns ["image", "coarse_label", "fine_label"]. +/// \param[in] dataset_dir Path to the root directory that contains the dataset. +/// \param[in] usage Part of dataset of CIFAR100, can be "train", "test" or "all" (default = "all"). /// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. If sampler is not -/// given, -/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current Dataset +/// given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()). +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the Cifar100Dataset. inline std::shared_ptr Cifar100( const std::string &dataset_dir, const std::string &usage = "all", const std::shared_ptr &sampler = std::make_shared(), @@ -900,26 +897,26 @@ inline std::shared_ptr Cifar100( return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, cache); } -/// \brief Function to create a Cifar100 Dataset -/// \notes The generated dataset has three columns ["image", "coarse_label", "fine_label"] -/// \param[in] dataset_dir Path to the root directory that contains the dataset -/// \param[in] usage of CIFAR100, can be "train", "test" or "all". +/// \brief Function to create a Cifar100 Dataset. +/// \note The generated dataset has three columns ["image", "coarse_label", "fine_label"]. +/// \param[in] dataset_dir Path to the root directory that contains the dataset. +/// \param[in] usage Part of dataset of CIFAR100, can be "train", "test" or "all". /// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset. -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current Dataset +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the Cifar100Dataset. inline std::shared_ptr Cifar100(const std::string &dataset_dir, const std::string &usage, const Sampler *sampler, const std::shared_ptr &cache = nullptr) { return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, cache); } -/// \brief Function to create a Cifar100 Dataset -/// \notes The generated dataset has three columns ["image", "coarse_label", "fine_label"] -/// \param[in] dataset_dir Path to the root directory that contains the dataset -/// \param[in] usage of CIFAR100, can be "train", "test" or "all". +/// \brief Function to create a Cifar100 Dataset. +/// \note The generated dataset has three columns ["image", "coarse_label", "fine_label"]. +/// \param[in] dataset_dir Path to the root directory that contains the dataset. +/// \param[in] usage Part of dataset of CIFAR100, can be "train", "test" or "all". /// \param[in] sampler Sampler object used to choose samples from the dataset. -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current Dataset +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the Cifar100Dataset. inline std::shared_ptr Cifar100(const std::string &dataset_dir, const std::string &usage, const std::reference_wrapper sampler, const std::shared_ptr &cache = nullptr) { @@ -934,14 +931,14 @@ class CLUEDataset : public Dataset { ~CLUEDataset() = default; }; -/// \brief Function to create a CLUEDataset -/// \notes The generated dataset has a variable number of columns depending on the task and usage +/// \brief Function to create a CLUEDataset. +/// \note The generated dataset has a variable number of columns depending on the task and usage. /// \param[in] dataset_files List of files to be read to search for a pattern of files. The list /// will be sorted in a lexicographical order. /// \param[in] task The kind of task, one of "AFQMC", "TNEWS", "IFLYTEK", "CMNLI", "WSC" and "CSL" (default="AFQMC"). -/// \param[in] usage Be used to "train", "test" or "eval" data (default="train"). -/// \param[in] num_samples The number of samples to be included in the dataset. -/// (Default = 0 means all samples.) +/// \param[in] usage Part of dataset of CLUE, can be "train", "test" or "eval" data (default="train"). +/// \param[in] num_samples The number of samples to be included in the dataset +/// (Default = 0 means all samples). /// \param[in] shuffle The mode for shuffling data every epoch. (Default=ShuffleMode.kGlobal) /// Can be any of: /// ShuffleMode::kFalse - No shuffling is performed. @@ -949,9 +946,9 @@ class CLUEDataset : public Dataset { /// ShuffleMode::kGlobal - Shuffle both the files and samples. /// \param[in] num_shards Number of shards that the dataset should be divided into. (Default = 1) /// \param[in] shard_id The shard ID within num_shards. This argument should be -/// specified only when num_shards is also specified. (Default = 0) -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current CLUEDataset +/// specified only when num_shards is also specified (Default = 0). +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the CLUEDataset. inline std::shared_ptr CLUE(const std::vector &dataset_files, const std::string &task = "AFQMC", const std::string &usage = "train", int64_t num_samples = 0, ShuffleMode shuffle = ShuffleMode::kGlobal, @@ -975,8 +972,8 @@ class CocoDataset : public Dataset { ~CocoDataset() = default; }; -/// \brief Function to create a CocoDataset -/// \notes The generated dataset has multi-columns : +/// \brief Function to create a CocoDataset. +/// \note The generated dataset has multi-columns : /// - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32], /// ['iscrowd', dtype=uint32]]. /// - task='Stuff', column: [['image', dtype=uint8], ['segmentation',dtype=float32], ['iscrowd', dtype=uint32]]. @@ -984,15 +981,14 @@ class CocoDataset : public Dataset { /// ['num_keypoints', dtype=uint32]]. /// - task='Panoptic', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32], /// ['iscrowd', dtype=uint32], ['area', dtype=uitn32]]. -/// \param[in] dataset_dir Path to the root directory that contains the dataset -/// \param[in] annotation_file Path to the annotation json -/// \param[in] task Set the task type of reading coco data, now support 'Detection'/'Stuff'/'Panoptic'/'Keypoint' -/// \param[in] decode Decode the images after reading +/// \param[in] dataset_dir Path to the root directory that contains the dataset. +/// \param[in] annotation_file Path to the annotation json. +/// \param[in] task Set the task type of reading coco data, now support 'Detection'/'Stuff'/'Panoptic'/'Keypoint'. +/// \param[in] decode Decode the images after reading. /// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. If sampler is not -/// given, -/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current Dataset +/// given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()). +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the CocoDataset. inline std::shared_ptr Coco(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task = "Detection", const bool &decode = false, const std::shared_ptr &sampler = std::make_shared(), @@ -1001,8 +997,8 @@ inline std::shared_ptr Coco(const std::string &dataset_dir, const s decode, sampler, cache); } -/// \brief Function to create a CocoDataset -/// \notes The generated dataset has multi-columns : +/// \brief Function to create a CocoDataset. +/// \note The generated dataset has multi-columns : /// - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32], /// ['iscrowd', dtype=uint32]]. /// - task='Stuff', column: [['image', dtype=uint8], ['segmentation',dtype=float32], ['iscrowd', dtype=uint32]]. @@ -1010,13 +1006,13 @@ inline std::shared_ptr Coco(const std::string &dataset_dir, const s /// ['num_keypoints', dtype=uint32]]. /// - task='Panoptic', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32], /// ['iscrowd', dtype=uint32], ['area', dtype=uitn32]]. -/// \param[in] dataset_dir Path to the root directory that contains the dataset -/// \param[in] annotation_file Path to the annotation json -/// \param[in] task Set the task type of reading coco data, now support 'Detection'/'Stuff'/'Panoptic'/'Keypoint' -/// \param[in] decode Decode the images after reading -/// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset. -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current Dataset +/// \param[in] dataset_dir Path to the root directory that contains the dataset. +/// \param[in] annotation_file Path to the annotation json. +/// \param[in] task Set the task type of reading coco data, now support 'Detection'/'Stuff'/'Panoptic'/'Keypoint'. +/// \param[in] decode Decode the images after reading. +/// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset.. +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the CocoDataset. inline std::shared_ptr Coco(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task, const bool &decode, const Sampler *sampler, const std::shared_ptr &cache = nullptr) { @@ -1024,8 +1020,8 @@ inline std::shared_ptr Coco(const std::string &dataset_dir, const s decode, sampler, cache); } -/// \brief Function to create a CocoDataset -/// \notes The generated dataset has multi-columns : +/// \brief Function to create a CocoDataset. +/// \note The generated dataset has multi-columns : /// - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32], /// ['iscrowd', dtype=uint32]]. /// - task='Stuff', column: [['image', dtype=uint8], ['segmentation',dtype=float32], ['iscrowd', dtype=uint32]]. @@ -1033,13 +1029,13 @@ inline std::shared_ptr Coco(const std::string &dataset_dir, const s /// ['num_keypoints', dtype=uint32]]. /// - task='Panoptic', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['category_id', dtype=uint32], /// ['iscrowd', dtype=uint32], ['area', dtype=uitn32]]. -/// \param[in] dataset_dir Path to the root directory that contains the dataset -/// \param[in] annotation_file Path to the annotation json -/// \param[in] task Set the task type of reading coco data, now support 'Detection'/'Stuff'/'Panoptic'/'Keypoint' -/// \param[in] decode Decode the images after reading +/// \param[in] dataset_dir Path to the root directory that contains the dataset. +/// \param[in] annotation_file Path to the annotation json. +/// \param[in] task Set the task type of reading coco data, now support 'Detection'/'Stuff'/'Panoptic'/'Keypoint'. +/// \param[in] decode Decode the images after reading. /// \param[in] sampler Sampler object used to choose samples from the dataset. -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current Dataset +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the CocoDataset. inline std::shared_ptr Coco(const std::string &dataset_dir, const std::string &annotation_file, const std::string &task, const bool &decode, const std::reference_wrapper sampler, @@ -1057,8 +1053,8 @@ class CSVDataset : public Dataset { ~CSVDataset() = default; }; -/// \brief Function to create a CSVDataset -/// \notes The generated dataset has a variable number of columns +/// \brief Function to create a CSVDataset. +/// \note The generated dataset has a variable number of columns. /// \param[in] dataset_files List of files to be read to search for a pattern of files. The list /// will be sorted in a lexicographical order. /// \param[in] field_delim A char that indicates the delimiter to separate fields (default=','). @@ -1067,17 +1063,17 @@ class CSVDataset : public Dataset { /// \param[in] column_names List of column names of the dataset (default={}). If this is not provided, infers the /// column_names from the first row of CSV file. /// \param[in] num_samples The number of samples to be included in the dataset. -/// (Default = 0 means all samples.) -/// \param[in] shuffle The mode for shuffling data every epoch. (Default=ShuffleMode::kGlobal) +/// (Default = 0 means all samples). +/// \param[in] shuffle The mode for shuffling data every epoch (Default=ShuffleMode::kGlobal). /// Can be any of: /// ShuffleMode::kFalse - No shuffling is performed. /// ShuffleMode::kFiles - Shuffle files only. /// ShuffleMode::kGlobal - Shuffle both the files and samples. /// \param[in] num_shards Number of shards that the dataset should be divided into. (Default = 1) /// \param[in] shard_id The shard ID within num_shards. This argument should be -/// specified only when num_shards is also specified. (Default = 0) -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current Dataset +/// specified only when num_shards is also specified (Default = 0). +/// \param[in] cache Tensor cache to use.(default=nullptr which means no cache is used). +/// \return Shared pointer to the CSVDataset inline std::shared_ptr CSV(const std::vector &dataset_files, char field_delim = ',', const std::vector> &column_defaults = {}, const std::vector &column_names = {}, int64_t num_samples = 0, @@ -1106,19 +1102,18 @@ class ImageFolderDataset : public Dataset { ~ImageFolderDataset() = default; }; -/// \brief Function to create an ImageFolderDataset -/// \notes A source dataset that reads images from a tree of directories -/// All images within one folder have the same label +/// \brief Function to create an ImageFolderDataset. +/// \note A source dataset that reads images from a tree of directories. +/// All images within one folder have the same label. /// The generated dataset has two columns ["image", "label"] -/// \param[in] dataset_dir Path to the root directory that contains the dataset -/// \param[in] decode A flag to decode in ImageFolder +/// \param[in] dataset_dir Path to the root directory that contains the dataset. +/// \param[in] decode A flag to decode in ImageFolder. /// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. If sampler is not -/// given, -/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) -/// \param[in] extensions File extensions to be read -/// \param[in] class_indexing a class name to label map -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current ImageFolderDataset +/// given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()). +/// \param[in] extensions File extensions to be read. +/// \param[in] class_indexing a class name to label map. +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the ImageFolderDataset. inline std::shared_ptr ImageFolder( const std::string &dataset_dir, bool decode = false, const std::shared_ptr &sampler = std::make_shared(), @@ -1129,16 +1124,16 @@ inline std::shared_ptr ImageFolder( } /// \brief Function to create an ImageFolderDataset -/// \notes A source dataset that reads images from a tree of directories -/// All images within one folder have the same label -/// The generated dataset has two columns ["image", "label"] -/// \param[in] dataset_dir Path to the root directory that contains the dataset -/// \param[in] decode A flag to decode in ImageFolder +/// \note A source dataset that reads images from a tree of directories. +/// All images within one folder have the same label. +/// The generated dataset has two columns ["image", "label"]. +/// \param[in] dataset_dir Path to the root directory that contains the dataset. +/// \param[in] decode A flag to decode in ImageFolder. /// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset. -/// \param[in] extensions File extensions to be read -/// \param[in] class_indexing a class name to label map -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current ImageFolderDataset +/// \param[in] extensions File extensions to be read. +/// \param[in] class_indexing a class name to label map. +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the ImageFolderDataset. inline std::shared_ptr ImageFolder(const std::string &dataset_dir, bool decode, const Sampler *sampler, const std::set &extensions = {}, @@ -1148,17 +1143,17 @@ inline std::shared_ptr ImageFolder(const std::string &datase MapStringToChar(class_indexing), cache); } -/// \brief Function to create an ImageFolderDataset -/// \notes A source dataset that reads images from a tree of directories -/// All images within one folder have the same label -/// The generated dataset has two columns ["image", "label"] -/// \param[in] dataset_dir Path to the root directory that contains the dataset -/// \param[in] decode A flag to decode in ImageFolder +/// \brief Function to create an ImageFolderDataset. +/// \note A source dataset that reads images from a tree of directories. +/// All images within one folder have the same label. +/// The generated dataset has two columns ["image", "label"]. +/// \param[in] dataset_dir Path to the root directory that contains the dataset. +/// \param[in] decode A flag to decode in ImageFolder. /// \param[in] sampler Sampler object used to choose samples from the dataset. -/// \param[in] extensions File extensions to be read -/// \param[in] class_indexing a class name to label map -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current ImageFolderDataset +/// \param[in] extensions File extensions to be read. +/// \param[in] class_indexing a class name to label map. +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the ImageFolderDataset. inline std::shared_ptr ImageFolder(const std::string &dataset_dir, bool decode, const std::reference_wrapper sampler, const std::set &extensions = {}, @@ -1184,18 +1179,17 @@ class ManifestDataset : public Dataset { ~ManifestDataset() = default; }; -/// \brief Function to create a ManifestDataset -/// \notes The generated dataset has two columns ["image", "label"] -/// \param[in] dataset_file The dataset file to be read -/// \param[in] usage Need "train", "eval" or "inference" data (default="train") +/// \brief Function to create a ManifestDataset. +/// \note The generated dataset has two columns ["image", "label"]. +/// \param[in] dataset_file The dataset file to be read. +/// \param[in] usage Part of dataset of ManifestDataset, can be "train", "eval" or "inference" data (default="train"). /// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. If sampler is not -/// given, -/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) +/// given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()). /// \param[in] class_indexing A str-to-int mapping from label name to index (default={}, the folder /// names will be sorted alphabetically and each class will be given a unique index starting from 0). /// \param[in] decode Decode the images after reading (default=false). -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current ManifestDataset +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the ManifestDataset. inline std::shared_ptr Manifest( const std::string &dataset_file, const std::string &usage = "train", const std::shared_ptr &sampler = std::make_shared(), @@ -1205,16 +1199,16 @@ inline std::shared_ptr Manifest( MapStringToChar(class_indexing), decode, cache); } -/// \brief Function to create a ManifestDataset -/// \notes The generated dataset has two columns ["image", "label"] -/// \param[in] dataset_file The dataset file to be read -/// \param[in] usage Need "train", "eval" or "inference" data +/// \brief Function to create a ManifestDataset. +/// \note The generated dataset has two columns ["image", "label"]. +/// \param[in] dataset_file The dataset file to be read. +/// \param[in] usage Part of dataset of ManifestDataset, can be "train", "eval" or "inference" data. /// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset. /// \param[in] class_indexing A str-to-int mapping from label name to index (default={}, the folder /// names will be sorted alphabetically and each class will be given a unique index starting from 0). /// \param[in] decode Decode the images after reading (default=false). -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current ManifestDataset +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the ManifestDataset. inline std::shared_ptr Manifest(const std::string &dataset_file, const std::string &usage, const Sampler *sampler, const std::map &class_indexing = {}, @@ -1224,16 +1218,16 @@ inline std::shared_ptr Manifest(const std::string &dataset_file MapStringToChar(class_indexing), decode, cache); } -/// \brief Function to create a ManifestDataset -/// \notes The generated dataset has two columns ["image", "label"] -/// \param[in] dataset_file The dataset file to be read -/// \param[in] usage Need "train", "eval" or "inference" data +/// \brief Function to create a ManifestDataset. +/// \note The generated dataset has two columns ["image", "label"]. +/// \param[in] dataset_file The dataset file to be read. +/// \param[in] usage Part of dataset of ManifestDataset, can be "train", "eval" or "inference" data. /// \param[in] sampler Sampler object used to choose samples from the dataset. /// \param[in] class_indexing A str-to-int mapping from label name to index (default={}, the folder /// names will be sorted alphabetically and each class will be given a unique index starting from 0). /// \param[in] decode Decode the images after reading (default=false). -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current ManifestDataset +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the ManifestDataset. inline std::shared_ptr Manifest(const std::string &dataset_file, const std::string &usage, const std::reference_wrapper sampler, const std::map &class_indexing = {}, @@ -1275,18 +1269,23 @@ class MindDataDataset : public Dataset { ~MindDataDataset() = default; }; -/// \brief Function to create a MindDataDataset +/// \brief Function to create a MindDataDataset. /// \param[in] dataset_file File name of one component of a mindrecord source. Other files with identical source /// in the same path will be found and loaded automatically. -/// \param[in] columns_list List of columns to be read (default={}) +/// \param[in] columns_list List of columns to be read (default={}). /// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. If sampler is not -/// given, -/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()), +/// given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()), /// supported sampler list: SubsetRandomSampler, PkSampler, RandomSampler, SequentialSampler, DistributedSampler. /// \param[in] padded_sample Samples will be appended to dataset, where keys are the same as column_list. /// \param[in] num_padded Number of padding samples. Dataset size plus num_padded should be divisible by num_shards. -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current MindDataDataset +/// \param[in] shuffle_mode The mode for shuffling data every epoch (Default=ShuffleMode::kGlobal). +/// Can be any of: +/// ShuffleMode::kFalse - No shuffling is performed. +/// ShuffleMode::kFiles - Shuffle files only. +/// ShuffleMode::kGlobal - Shuffle both the files and samples. +/// ShuffleMode::kInfile - Shuffle samples in file. +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the current MindDataDataset. inline std::shared_ptr MindData( const std::string &dataset_file, const std::vector &columns_list = {}, const std::shared_ptr &sampler = std::make_shared(), nlohmann::json *padded_sample = nullptr, @@ -1296,16 +1295,22 @@ inline std::shared_ptr MindData( padded_sample, num_padded, shuffle_mode, cache); } -/// \brief Function to create a MindDataDataset +/// \brief Function to create a MindDataDataset. /// \param[in] dataset_file File name of one component of a mindrecord source. Other files with identical source /// in the same path will be found and loaded automatically. -/// \param[in] columns_list List of columns to be read +/// \param[in] columns_list List of columns to be read. /// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset. /// supported sampler list: SubsetRandomSampler, PkSampler, RandomSampler, SequentialSampler, DistributedSampler. /// \param[in] padded_sample Samples will be appended to dataset, where keys are the same as column_list. /// \param[in] num_padded Number of padding samples. Dataset size plus num_padded should be divisible by num_shards. -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current MindDataDataset +/// \param[in] shuffle_mode The mode for shuffling data every epoch (Default=ShuffleMode::kGlobal). +/// Can be any of: +/// ShuffleMode::kFalse - No shuffling is performed. +/// ShuffleMode::kFiles - Shuffle files only. +/// ShuffleMode::kGlobal - Shuffle both the files and samples. +/// ShuffleMode::kInfile - Shuffle samples in file. +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the MindDataDataset. inline std::shared_ptr MindData(const std::string &dataset_file, const std::vector &columns_list, const Sampler *sampler, nlohmann::json *padded_sample = nullptr, int64_t num_padded = 0, @@ -1314,17 +1319,22 @@ inline std::shared_ptr MindData(const std::string &dataset_file return std::make_shared(StringToChar(dataset_file), VectorStringToChar(columns_list), sampler, padded_sample, num_padded, shuffle_mode, cache); } - -/// \brief Function to create a MindDataDataset +/// \brief Function to create a MindDataDataset. /// \param[in] dataset_file File name of one component of a mindrecord source. Other files with identical source /// in the same path will be found and loaded automatically. -/// \param[in] columns_list List of columns to be read +/// \param[in] columns_list List of columns to be read. /// \param[in] sampler Sampler object used to choose samples from the dataset. /// supported sampler list: SubsetRandomSampler, PkSampler, RandomSampler, SequentialSampler, DistributedSampler. /// \param[in] padded_sample Samples will be appended to dataset, where keys are the same as column_list. /// \param[in] num_padded Number of padding samples. Dataset size plus num_padded should be divisible by num_shards. -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current MindDataDataset +/// \param[in] shuffle_mode The mode for shuffling data every epoch (Default=ShuffleMode::kGlobal). +/// Can be any of: +/// ShuffleMode::kFalse - No shuffling is performed. +/// ShuffleMode::kFiles - Shuffle files only. +/// ShuffleMode::kGlobal - Shuffle both the files and samples. +/// ShuffleMode::kInfile - Shuffle samples in file. +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the MindDataDataset. inline std::shared_ptr MindData(const std::string &dataset_file, const std::vector &columns_list, const std::reference_wrapper sampler, @@ -1335,17 +1345,23 @@ inline std::shared_ptr MindData(const std::string &dataset_file padded_sample, num_padded, shuffle_mode, cache); } -/// \brief Function to create a MindDataDataset +/// \brief Function to create a MindDataDataset. /// \param[in] dataset_files List of dataset files to be read directly. -/// \param[in] columns_list List of columns to be read (default={}) +/// \param[in] columns_list List of columns to be read (default={}). /// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. If sampler is not -/// given, -/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()), +/// given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()), /// supported sampler list: SubsetRandomSampler, PkSampler, RandomSampler, SequentialSampler, DistributedSampler. /// \param[in] padded_sample Samples will be appended to dataset, where keys are the same as column_list. /// \param[in] num_padded Number of padding samples. Dataset size plus num_padded should be divisible by num_shards. -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current MindDataDataset +/// \param[in] shuffle_mode The mode for shuffling data every epoch (Default=ShuffleMode::kGlobal). +/// Can be any of: +/// ShuffleMode::kFalse - No shuffling is performed. +/// ShuffleMode::kFiles - Shuffle files only. +/// ShuffleMode::kGlobal - Shuffle both the files and samples. +/// ShuffleMode::kInfile - Shuffle samples in file. +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the MindDataDataset. inline std::shared_ptr MindData( const std::vector &dataset_files, const std::vector &columns_list = {}, const std::shared_ptr &sampler = std::make_shared(), nlohmann::json *padded_sample = nullptr, @@ -1355,15 +1371,21 @@ inline std::shared_ptr MindData( padded_sample, num_padded, shuffle_mode, cache); } -/// \brief Function to create a MindDataDataset +/// \brief Function to create a MindDataDataset. /// \param[in] dataset_files List of dataset files to be read directly. -/// \param[in] columns_list List of columns to be read +/// \param[in] columns_list List of columns to be read. /// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset. /// supported sampler list: SubsetRandomSampler, PkSampler, RandomSampler, SequentialSampler, DistributedSampler. /// \param[in] padded_sample Samples will be appended to dataset, where keys are the same as column_list. /// \param[in] num_padded Number of padding samples. Dataset size plus num_padded should be divisible by num_shards. -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current MindDataDataset +/// \param[in] shuffle_mode The mode for shuffling data every epoch (Default=ShuffleMode::kGlobal). +/// Can be any of: +/// ShuffleMode::kFalse - No shuffling is performed. +/// ShuffleMode::kFiles - Shuffle files only. +/// ShuffleMode::kGlobal - Shuffle both the files and samples. +/// ShuffleMode::kInfile - Shuffle samples in file. +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the MindDataDataset. inline std::shared_ptr MindData(const std::vector &dataset_files, const std::vector &columns_list, const Sampler *sampler, nlohmann::json *padded_sample = nullptr, int64_t num_padded = 0, @@ -1373,15 +1395,21 @@ inline std::shared_ptr MindData(const std::vector padded_sample, num_padded, shuffle_mode, cache); } -/// \brief Function to create a MindDataDataset +/// \brief Function to create a MindDataDataset. /// \param[in] dataset_files List of dataset files to be read directly. -/// \param[in] columns_list List of columns to be read +/// \param[in] columns_list List of columns to be read. /// \param[in] sampler Sampler object used to choose samples from the dataset. /// supported sampler list: SubsetRandomSampler, PkSampler, RandomSampler, SequentialSampler, DistributedSampler. /// \param[in] padded_sample Samples will be appended to dataset, where keys are the same as column_list. /// \param[in] num_padded Number of padding samples. Dataset size plus num_padded should be divisible by num_shards. -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current MindDataDataset +/// \param[in] shuffle_mode The mode for shuffling data every epoch (Default=ShuffleMode::kGlobal). +/// Can be any of: +/// ShuffleMode::kFalse - No shuffling is performed. +/// ShuffleMode::kFiles - Shuffle files only. +/// ShuffleMode::kGlobal - Shuffle both the files and samples. +/// ShuffleMode::kInfile - Shuffle samples in file. +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the MindDataDataset. inline std::shared_ptr MindData(const std::vector &dataset_files, const std::vector &columns_list, const std::reference_wrapper sampler, @@ -1403,52 +1431,51 @@ class MnistDataset : public Dataset { ~MnistDataset() = default; }; -/// \brief Function to create a MnistDataset -/// \notes The generated dataset has two columns ["image", "label"] -/// \param[in] dataset_dir Path to the root directory that contains the dataset -/// \param[in] usage of MNIST, can be "train", "test" or "all" (default = "all"). +/// \brief Function to create a MnistDataset. +/// \note The generated dataset has two columns ["image", "label"]. +/// \param[in] dataset_dir Path to the root directory that contains the dataset. +/// \param[in] usage Part of dataset of MNIST, can be "train", "test" or "all" (default = "all"). /// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. If sampler is not -/// given, -/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current MnistDataset +/// given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()). +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the MnistDataset. inline std::shared_ptr Mnist(const std::string &dataset_dir, const std::string &usage = "all", const std::shared_ptr &sampler = std::make_shared(), const std::shared_ptr &cache = nullptr) { return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, cache); } -/// \brief Function to create a MnistDataset -/// \notes The generated dataset has two columns ["image", "label"] -/// \param[in] dataset_dir Path to the root directory that contains the dataset -/// \param[in] usage of MNIST, can be "train", "test" or "all" +/// \brief Function to create a MnistDataset. +/// \note The generated dataset has two columns ["image", "label"]. +/// \param[in] dataset_dir Path to the root directory that contains the dataset. +/// \param[in] usage Part of dataset of MNIST, can be "train", "test" or "all". /// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset. -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current MnistDataset +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the MnistDataset. inline std::shared_ptr Mnist(const std::string &dataset_dir, const std::string &usage, const Sampler *sampler, const std::shared_ptr &cache = nullptr) { return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, cache); } -/// \brief Function to create a MnistDataset -/// \notes The generated dataset has two columns ["image", "label"] -/// \param[in] dataset_dir Path to the root directory that contains the dataset -/// \param[in] usage of MNIST, can be "train", "test" or "all" +/// \brief Function to create a MnistDataset. +/// \note The generated dataset has two columns ["image", "label"]. +/// \param[in] dataset_dir Path to the root directory that contains the dataset. +/// \param[in] usage Part of dataset of MNIST, can be "train", "test" or "all". /// \param[in] sampler Sampler object used to choose samples from the dataset. -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current MnistDataset +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the MnistDataset. inline std::shared_ptr Mnist(const std::string &dataset_dir, const std::string &usage, const std::reference_wrapper sampler, const std::shared_ptr &cache = nullptr) { return std::make_shared(StringToChar(dataset_dir), StringToChar(usage), sampler, cache); } -/// \brief Function to create a ConcatDataset -/// \notes Reload "+" operator to concat two datasets -/// \param[in] datasets1 Shared pointer to the first dataset to be concatenated -/// \param[in] datasets2 Shared pointer to the second dataset to be concatenated -/// \return Shared pointer to the current ConcatDataset +/// \brief Function to create a ConcatDataset. +/// \note Reload "+" operator to concat two datasets. +/// \param[in] datasets1 Shared pointer to the first dataset to be concatenated. +/// \param[in] datasets2 Shared pointer to the second dataset to be concatenated. +/// \return Shared pointer to the current Dataset. inline std::shared_ptr operator+(const std::shared_ptr &datasets1, const std::shared_ptr &datasets2) { return std::make_shared(std::vector({datasets1, datasets2})); @@ -1465,12 +1492,12 @@ class RandomDataDataset : public Dataset { ~RandomDataDataset() = default; }; -/// \brief Function to create a RandomDataset -/// \param[in] total_rows Number of rows for the dataset to generate (default=0, number of rows is random) -/// \param[in] schema SchemaObj to set column type, data type and data shape -/// \param[in] columns_list List of columns to be read (default={}, read all columns) -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current Dataset +/// \brief Function to create a RandomDataset. +/// \param[in] total_rows Number of rows for the dataset to generate (default=0, number of rows is random). +/// \param[in] schema SchemaObj to set column type, data type and data shape. +/// \param[in] columns_list List of columns to be read (default={}, read all columns). +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the RandomDataset. template > std::shared_ptr RandomData(const int32_t &total_rows = 0, const T &schema = nullptr, const std::vector &columns_list = {}, @@ -1494,22 +1521,22 @@ class TextFileDataset : public Dataset { ~TextFileDataset() = default; }; -/// \brief Function to create a TextFileDataset -/// \notes The generated dataset has one column ['text'] +/// \brief Function to create a TextFileDataset. +/// \note The generated dataset has one column ['text']. /// \param[in] dataset_files List of files to be read to search for a pattern of files. The list /// will be sorted in a lexicographical order. -/// \param[in] num_samples The number of samples to be included in the dataset. -/// (Default = 0 means all samples.) -/// \param[in] shuffle The mode for shuffling data every epoch. (Default=ShuffleMode.kGlobal) +/// \param[in] num_samples The number of samples to be included in the dataset +/// (Default = 0 means all samples). +/// \param[in] shuffle The mode for shuffling data every epoch (Default=ShuffleMode.kGlobal). /// Can be any of: /// ShuffleMode.kFalse - No shuffling is performed. /// ShuffleMode.kFiles - Shuffle files only. /// ShuffleMode.kGlobal - Shuffle both the files and samples. -/// \param[in] num_shards Number of shards that the dataset should be divided into. (Default = 1) +/// \param[in] num_shards Number of shards that the dataset should be divided into (Default = 1). /// \param[in] shard_id The shard ID within num_shards. This argument should be -/// specified only when num_shards is also specified. (Default = 0) -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current TextFileDataset +/// specified only when num_shards is also specified (Default = 0). +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the TextFileDataset. inline std::shared_ptr TextFile(const std::vector &dataset_files, int64_t num_samples = 0, ShuffleMode shuffle = ShuffleMode::kGlobal, int32_t num_shards = 1, int32_t shard_id = 0, @@ -1533,14 +1560,14 @@ class TFRecordDataset : public Dataset { ~TFRecordDataset() = default; }; -/// \brief Function to create a TFRecordDataset +/// \brief Function to create a TFRecordDataset. /// \param[in] dataset_files List of files to be read to search for a pattern of files. The list /// will be sorted in a lexicographical order. /// \param[in] schema SchemaObj or string to schema path. (Default = nullptr, which means that the -/// meta data from the TFData file is considered the schema.) -/// \param[in] columns_list List of columns to be read. (Default = {}, read all columns) -/// \param[in] num_samples The number of samples to be included in the dataset. -/// (Default = 0 means all samples.) +/// meta data from the TFData file is considered the schema). +/// \param[in] columns_list List of columns to be read (Default = {}, read all columns). +/// \param[in] num_samples The number of samples to be included in the dataset +/// (Default = 0 means all samples). /// If num_samples is 0 and numRows(parsed from schema) does not exist, read the full dataset; /// If num_samples is 0 and numRows(parsed from schema) is greater than 0, read numRows rows; /// If both num_samples and numRows(parsed from schema) are greater than 0, read num_samples rows. @@ -1549,13 +1576,13 @@ class TFRecordDataset : public Dataset { /// ShuffleMode::kFalse - No shuffling is performed. /// ShuffleMode::kFiles - Shuffle files only. /// ShuffleMode::kGlobal - Shuffle both the files and samples. -/// \param[in] num_shards Number of shards that the dataset should be divided into. (Default = 1) +/// \param[in] num_shards Number of shards that the dataset should be divided into (Default = 1). /// \param[in] shard_id The shard ID within num_shards. This argument should be specified only -/// when num_shards is also specified. (Default = 0) -/// \param[in] shard_equal_rows Get equal rows for all shards. (Default = False, number of rows of -/// each shard may be not equal) -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \return Shared pointer to the current TFRecordDataset +/// when num_shards is also specified (Default = 0). +/// \param[in] shard_equal_rows Get equal rows for all shards (Default = False, number of rows of +/// each shard may be not equal). +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \return Shared pointer to the TFRecordDataset. template > std::shared_ptr TFRecord(const std::vector &dataset_files, const T &schema = nullptr, const std::vector &columns_list = {}, int64_t num_samples = 0, @@ -1601,22 +1628,21 @@ class VOCDataset : public Dataset { ~VOCDataset() = default; }; -/// \brief Function to create a VOCDataset -/// \notes The generated dataset has multi-columns : +/// \brief Function to create a VOCDataset. +/// \note The generated dataset has multi-columns : /// - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['label', dtype=uint32], /// ['difficult', dtype=uint32], ['truncate', dtype=uint32]]. /// - task='Segmentation', column: [['image', dtype=uint8], ['target',dtype=uint8]]. -/// \param[in] dataset_dir Path to the root directory that contains the dataset -/// \param[in] task Set the task type of reading voc data, now only support "Segmentation" or "Detection" +/// \param[in] dataset_dir Path to the root directory that contains the dataset. +/// \param[in] task Set the task type of reading voc data, now only support "Segmentation" or "Detection". /// \param[in] usage The type of data list text file to be read (default = "train"). -/// \param[in] class_indexing A str-to-int mapping from label name to index, only valid in "Detection" task -/// \param[in] decode Decode the images after reading +/// \param[in] class_indexing A str-to-int mapping from label name to index, only valid in "Detection" task. +/// \param[in] decode Decode the images after reading. /// \param[in] sampler Shared pointer to a sampler object used to choose samples from the dataset. If sampler is not -/// given, -/// a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()) -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \param[in] extra_metadata Flag to add extra meta-data to row. (default=false) -/// \return Shared pointer to the current Dataset +/// given, a `RandomSampler` will be used to randomly iterate the entire dataset (default = RandomSampler()). +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \param[in] extra_metadata Flag to add extra meta-data to row (default=false). +/// \return Shared pointer to the VOCDataset inline std::shared_ptr VOC(const std::string &dataset_dir, const std::string &task = "Segmentation", const std::string &usage = "train", const std::map &class_indexing = {}, bool decode = false, @@ -1627,20 +1653,20 @@ inline std::shared_ptr VOC(const std::string &dataset_dir, const std MapStringToChar(class_indexing), decode, sampler, cache, extra_metadata); } -/// \brief Function to create a VOCDataset -/// \notes The generated dataset has multi-columns : +/// \brief Function to create a VOCDataset. +/// \note The generated dataset has multi-columns : /// - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['label', dtype=uint32], /// ['difficult', dtype=uint32], ['truncate', dtype=uint32]]. /// - task='Segmentation', column: [['image', dtype=uint8], ['target',dtype=uint8]]. -/// \param[in] dataset_dir Path to the root directory that contains the dataset -/// \param[in] task Set the task type of reading voc data, now only support "Segmentation" or "Detection" +/// \param[in] dataset_dir Path to the root directory that contains the dataset. +/// \param[in] task Set the task type of reading voc data, now only support "Segmentation" or "Detection". /// \param[in] usage The type of data list text file to be read. -/// \param[in] class_indexing A str-to-int mapping from label name to index, only valid in "Detection" task -/// \param[in] decode Decode the images after reading +/// \param[in] class_indexing A str-to-int mapping from label name to index, only valid in "Detection" task. +/// \param[in] decode Decode the images after reading. /// \param[in] sampler Raw pointer to a sampler object used to choose samples from the dataset. -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \param[in] extra_metadata Flag to add extra meta-data to row. (default=false) -/// \return Shared pointer to the current Dataset +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \param[in] extra_metadata Flag to add extra meta-data to row (default=false). +/// \return Shared pointer to the VOCDataset. inline std::shared_ptr VOC(const std::string &dataset_dir, const std::string &task, const std::string &usage, const std::map &class_indexing, bool decode, const Sampler *sampler, @@ -1650,20 +1676,20 @@ inline std::shared_ptr VOC(const std::string &dataset_dir, const std MapStringToChar(class_indexing), decode, sampler, cache, extra_metadata); } -/// \brief Function to create a VOCDataset -/// \notes The generated dataset has multi-columns : +/// \brief Function to create a VOCDataset. +/// \note The generated dataset has multi-columns : /// - task='Detection', column: [['image', dtype=uint8], ['bbox', dtype=float32], ['label', dtype=uint32], /// ['difficult', dtype=uint32], ['truncate', dtype=uint32]]. /// - task='Segmentation', column: [['image', dtype=uint8], ['target',dtype=uint8]]. -/// \param[in] dataset_dir Path to the root directory that contains the dataset -/// \param[in] task Set the task type of reading voc data, now only support "Segmentation" or "Detection" +/// \param[in] dataset_dir Path to the root directory that contains the dataset. +/// \param[in] task Set the task type of reading voc data, now only support "Segmentation" or "Detection". /// \param[in] usage The type of data list text file to be read. -/// \param[in] class_indexing A str-to-int mapping from label name to index, only valid in "Detection" task -/// \param[in] decode Decode the images after reading +/// \param[in] class_indexing A str-to-int mapping from label name to index, only valid in "Detection" task. +/// \param[in] decode Decode the images after reading. /// \param[in] sampler Sampler object used to choose samples from the dataset. -/// \param[in] cache Tensor cache to use. (default=nullptr which means no cache is used). -/// \param[in] extra_metadata Flag to add extra meta-data to row. (default=false) -/// \return Shared pointer to the current Dataset +/// \param[in] cache Tensor cache to use (default=nullptr which means no cache is used). +/// \param[in] extra_metadata Flag to add extra meta-data to row (default=false). +/// \return Shared pointer to the VOCDataset. inline std::shared_ptr VOC(const std::string &dataset_dir, const std::string &task, const std::string &usage, const std::map &class_indexing, bool decode, const std::reference_wrapper sampler, @@ -1679,7 +1705,7 @@ std::shared_ptr CreateDatasetCacheCharIF(session_id_type id, uint6 std::optional num_connections = std::nullopt, std::optional prefetch_sz = std::nullopt); -/// \brief Function the create a cache to be attached to a dataset +/// \brief Function the create a cache to be attached to a dataset. /// \param id A user assigned session id for the current pipeline. /// \param mem_sz Size of the memory set aside for the row caching (default=0 which means unlimited, /// note that it might bring in the risk of running out of memory on the machine). @@ -1698,10 +1724,10 @@ inline std::shared_ptr CreateDatasetCache(session_id_type id, uint prefetch_sz); } -/// \brief Function to create a ZipDataset -/// \notes Applies zip to the dataset -/// \param[in] datasets List of shared pointers to the datasets that we want to zip -/// \return Shared pointer to the current Dataset +/// \brief Function to create a ZipDataset. +/// \note Applies zip to the dataset. +/// \param[in] datasets List of shared pointers to the datasets that we want to zip. +/// \return Shared pointer to the ZipDataset. inline std::shared_ptr Zip(const std::vector> &datasets) { return std::make_shared(datasets); } diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/execute.h b/mindspore/ccsrc/minddata/dataset/include/dataset/execute.h index 45962bd09b6..df37ca0acbe 100644 --- a/mindspore/ccsrc/minddata/dataset/include/dataset/execute.h +++ b/mindspore/ccsrc/minddata/dataset/include/dataset/execute.h @@ -32,38 +32,74 @@ class DeviceResource; // class to run tensor operations in eager mode class Execute { public: - /// \brief Constructor - // FIXME - Temporarily overload Execute to support both TensorOperation and TensorTransform + /// \brief Constructor. + /// \param[in] op TensorOperation to be applied in Eager mode, it accepts op in type of shared pointer. + /// \param[in] deviceType Target device env to perform operation, can be kCPU/kGPU/kAscend310 (default=kCPU). + /// \param[in] device_id Target device id to perform operation, only valid when deviceType=kAscend310 (default=0). explicit Execute(std::shared_ptr op, MapTargetDevice deviceType = MapTargetDevice::kCpu, uint32_t device_id = 0); + + /// \brief Constructor. + /// \param[in] op TensorTransform to be applied in Eager mode, it accepts op in type of shared pointer. + /// \param[in] deviceType Target device env to perform operation, can be kCPU/kGPU/kAscend310 (default=kCPU). + /// \param[in] device_id Target device id to perform operation, only valid when deviceType=kAscend310 (default=0). explicit Execute(std::shared_ptr op, MapTargetDevice deviceType = MapTargetDevice::kCpu, uint32_t device_id = 0); + + /// \brief Constructor. + /// \param[in] op TensorTransform to be applied in Eager mode, it accepts op in type of reference. + /// \param[in] deviceType Target device env to perform operation, can be kCPU/kGPU/kAscend310 (default=kCPU). + /// \param[in] device_id Target device id to perform operation, only valid when deviceType=kAscend310 (default=0). explicit Execute(std::reference_wrapper op, MapTargetDevice deviceType = MapTargetDevice::kCpu, uint32_t device_id = 0); + + /// \brief Constructor. + /// \param[in] op TensorTransform to be applied in Eager mode, it accepts op in type of raw pointer. + /// \param[in] deviceType Target device env to perform operation, can be kCPU/kGPU/kAscend310 (default=kCPU). + /// \param[in] device_id Target device id to perform operation, only valid when deviceType=kAscend310 (default=0). explicit Execute(TensorTransform *op, MapTargetDevice deviceType = MapTargetDevice::kCpu, uint32_t device_id = 0); + /// \brief Constructor. + /// \param[in] ops A vector of TensorOperations to be applied in Eager mode, it accepts op in type of shared pointer. + /// \param[in] deviceType Target device env to perform operation, can be kCPU/kGPU/kAscend310 (default=kCPU). + /// \param[in] device_id Target device id to perform operation, only valid when deviceType=kAscend310 (default=0). explicit Execute(std::vector> ops, MapTargetDevice deviceType = MapTargetDevice::kCpu, uint32_t device_id = 0); + + /// \brief Constructor. + /// \param[in] ops A vector of TensorTransforms to be applied in Eager mode, it accepts op in type of shared pointer. + /// \param[in] deviceType Target device env to perform operation, can be kCPU/kGPU/kAscend310 (default=kCPU). + /// \param[in] device_id Target device id to perform operation, only valid when deviceType=kAscend310 (default=0). explicit Execute(std::vector> ops, MapTargetDevice deviceType = MapTargetDevice::kCpu, uint32_t device_id = 0); + + /// \brief Constructor. + /// \param[in] ops A vector of TensorTransforms to be applied in Eager mode, it accepts op in type of raw pointer. + /// \param[in] deviceType Target device env to perform operation, can be kCPU/kGPU/kAscend310 (default=kCPU). + /// \param[in] device_id Target device id to perform operation, only valid when deviceType=kAscend310 (default=0). explicit Execute(const std::vector> ops, MapTargetDevice deviceType = MapTargetDevice::kCpu, uint32_t device_id = 0); + + /// \brief Constructor. + /// \param[in] ops A vector of TensorTransforms to be applied in Eager mode, it accepts op in type of raw pointer. + /// \param[in] deviceType Target device env to perform operation, can be kCPU/kGPU/kAscend310 (default=kCPU). + /// \param[in] device_id Target device id to perform operation, only valid when deviceType=kAscend310 (default=0). explicit Execute(const std::vector &ops, MapTargetDevice deviceType = MapTargetDevice::kCpu, uint32_t device_id = 0); - /// \brief Destructor + /// \brief Destructor. ~Execute(); - /// \brief callable function to execute the TensorOperation in eager mode - /// \param[in] input Tensor to be transformed - /// \param[out] output Transformed tensor - /// \return Status code + /// \brief Callable function to execute the TensorTransform in eager mode. + /// \param[in] input Tensor to be transformed. + /// \param[out] output Transformed tensor. + /// \return Status error code, returns OK if no error encountered. Status operator()(const mindspore::MSTensor &input, mindspore::MSTensor *output); - /// \brief callable function to execute the TensorOperation in eager mode - /// \param[in] input_tensor_list List of Tensor to be transformed - /// \param[out] out Result tensor after transform - /// \return - Status + /// \brief Callable function to execute the TensorTransform in eager mode. + /// \param[in] input_tensor_list List of Tensor to be transformed. + /// \param[out] out Result tensor after transform. + /// \return Status error code, returns OK if no error encountered. Status operator()(const std::vector &input_tensor_list, std::vector *out); Status DeviceMemoryRelease(); diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/iterator.h b/mindspore/ccsrc/minddata/dataset/include/dataset/iterator.h index 300a347245d..4b041fd843c 100644 --- a/mindspore/ccsrc/minddata/dataset/include/dataset/iterator.h +++ b/mindspore/ccsrc/minddata/dataset/include/dataset/iterator.h @@ -56,13 +56,13 @@ class Iterator { /// \brief Method for building and launching the pipeline. /// \param[in] ops - a vector of DatasetOp in the data pipeline. /// \param[in] num_epochs Number of epochs passed down to EpochCtrlNode, default -1, infinite epochs - /// \return - a Status error code, returns OK if no error encountered. + /// \return Status error code, returns OK if no error encountered. Status BuildAndLaunchTree(std::shared_ptr ds, int32_t num_epochs); /// \brief Function to get the next row from the data pipeline. /// \note Type of return data is a map(with column name). /// \param[out] row - the output tensor row. - /// \return - a Status error code, returns OK if no error encountered. + /// \return Status error code, returns OK if no error encountered. Status GetNextRow(MSTensorMap *row) { MSTensorMapChar row_; row_.clear(); @@ -73,13 +73,13 @@ class Iterator { } // Char interface(CharIF) of GetNextRow - // This api exists because std::string will constrained by ABI compile macro but char don't. + // This This API exists because std::string will constrained by ABI compile option while char don't. Status GetNextRowCharIF(MSTensorMapChar *row); /// \brief Function to get the next row from the data pipeline. /// \note Type of return data is a vector(without column name). /// \param[out] row - the output tensor row. - /// \return - a Status error code, returns OK if no error encountered. + /// \return Status error code, returns OK if no error encountered. virtual Status GetNextRow(MSTensorVec *row); /// \brief Function to shut down the data pipeline. @@ -127,22 +127,22 @@ class PullIterator : public Iterator { /// \brief Function to get next row from the data pipeline. /// \note Type of return data is a vector(without column name). - /// \param[out] row - the output tensor row. - /// \return Returns true if no error encountered else false. + /// \param[out] row The output tensor row. + /// \return Status error code, returns OK if no error encountered else false. Status GetNextRow(MSTensorVec *const row) override; /// \brief Function to get specified rows from the data pipeline. /// \note Type of return data is a vector(without column name). /// \note This behavior is subject to change - /// \param[in] num_rows - the number of rows to fetch. - /// \param[out] row - the output tensor row. - /// \return Returns true if no error encountered else false. + /// \param[in] num_rows The number of rows to fetch. + /// \param[out] row The output tensor row. + /// \return Status error code, returns OK if no error encountered else false. Status GetRows(int32_t num_rows, std::vector *const row); /// \brief Method for building and launching the pipeline. /// \note Consider making this function protected. /// \param[in] ds - The root node that calls the function - /// \return - a Status error code, returns OK if no error encountered. + /// \return Status error code, returns OK if no error encountered. Status BuildAndLaunchTree(std::shared_ptr ds); private: diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/samplers.h b/mindspore/ccsrc/minddata/dataset/include/dataset/samplers.h index d507ba299cc..753e042759f 100644 --- a/mindspore/ccsrc/minddata/dataset/include/dataset/samplers.h +++ b/mindspore/ccsrc/minddata/dataset/include/dataset/samplers.h @@ -67,19 +67,19 @@ class Sampler : std::enable_shared_from_this { }; /// \brief A class to represent a Distributed Sampler in the data pipeline. -/// \notes A Sampler that accesses a shard of the dataset. +/// \note A Sampler that accesses a shard of the dataset. class DistributedSampler final : public Sampler { friend std::shared_ptr SelectSampler(int64_t, bool, int32_t, int32_t); public: /// \brief Constructor - /// \param[in] num_shards - Number of shards to divide the dataset into. - /// \param[in] shard_id - Shard ID of the current shard within num_shards. - /// \param[in] shuffle - If true, the indices are shuffled. - /// \param[in] num_samples - The number of samples to draw (default to all elements). - /// \param[in] seed - The seed in use when shuffle is true. - /// \param[in] offset - The starting position where access to elements in the dataset begins. - /// \param[in] even_dist - If true, each shard would return the same number of rows (default to true). + /// \param[in] num_shards Number of shards to divide the dataset into. + /// \param[in] shard_id Shard ID of the current shard within num_shards. + /// \param[in] shuffle If true, the indices are shuffled (default=true). + /// \param[in] num_samples The number of samples to draw (default=0, return all samples). + /// \param[in] seed The seed in use when shuffle is true (default=1). + /// \param[in] offset The starting position where access to elements in the dataset begins (default=-1). + /// \param[in] even_dist If true, each shard would return the same number of rows (default=true). /// If false the total rows returned by all the shards would not have overlap. explicit DistributedSampler(int64_t num_shards, int64_t shard_id, bool shuffle = true, int64_t num_samples = 0, uint32_t seed = 1, int64_t offset = -1, bool even_dist = true); @@ -102,16 +102,16 @@ class DistributedSampler final : public Sampler { }; /// \brief A class to represent a PK Sampler in the data pipeline. -/// \notes Samples K elements for each P class in the dataset. +/// \note Samples K elements for each P class in the dataset. /// This will sample all classes. class PKSampler final : public Sampler { friend std::shared_ptr SelectSampler(int64_t, bool, int32_t, int32_t); public: /// \brief Constructor - /// \param[in] num_val - Number of elements to sample for each class. - /// \param[in] shuffle - If true, the class IDs are shuffled. - /// \param[in] num_samples - The number of samples to draw (default to all elements). + /// \param[in] num_val Number of elements to sample for each class. + /// \param[in] shuffle If true, the class IDs are shuffled (default=false). + /// \param[in] num_samples The number of samples to draw (default=0, return all samples). explicit PKSampler(int64_t num_val, bool shuffle = false, int64_t num_samples = 0); /// \brief Destructor. @@ -129,14 +129,14 @@ class PKSampler final : public Sampler { }; /// \brief A class to represent a Random Sampler in the data pipeline. -/// \notes Samples the elements randomly. +/// \note Samples the elements randomly. class RandomSampler final : public Sampler { friend std::shared_ptr SelectSampler(int64_t, bool, int32_t, int32_t); public: /// \brief Constructor - /// \param[in] replacement - If true, put the sample ID back for the next draw. - /// \param[in] num_samples - The number of samples to draw (default to all elements). + /// \param[in] replacement If true, put the sample ID back for the next draw (default=false). + /// \param[in] num_samples The number of samples to draw (default=0, return all samples). explicit RandomSampler(bool replacement = false, int64_t num_samples = 0); /// \brief Destructor. @@ -153,14 +153,14 @@ class RandomSampler final : public Sampler { }; /// \brief A class to represent a Sequential Sampler in the data pipeline. -/// \notes Samples the dataset elements sequentially, same as not having a sampler. +/// \note Samples the dataset elements sequentially, same as not having a sampler. class SequentialSampler final : public Sampler { friend std::shared_ptr SelectSampler(int64_t, bool, int32_t, int32_t); public: /// \brief Constructor - /// \param[in] start_index - Index to start sampling at (default to start at first id). - /// \param[in] num_samples - The number of samples to draw (default to all elements). + /// \param[in] start_index Index to start sampling at (default=0, start at first id). + /// \param[in] num_samples The number of samples to draw (default=0, return all samples). explicit SequentialSampler(int64_t start_index = 0, int64_t num_samples = 0); /// \brief Destructor. @@ -177,14 +177,14 @@ class SequentialSampler final : public Sampler { }; /// \brief A class to represent a Subset Sampler in the data pipeline. -/// \notes Samples the elements from a sequence of indices. +/// \note Samples the elements from a sequence of indices. class SubsetSampler : public Sampler { friend std::shared_ptr SelectSampler(int64_t, bool, int32_t, int32_t); public: /// \brief Constructor - /// \param[in] indices - A vector sequence of indices. - /// \param[in] num_samples - The number of samples to draw (default to all elements). + /// \param[in] indices A vector sequence of indices. + /// \param[in] num_samples The number of samples to draw (default=0, return all samples). explicit SubsetSampler(std::vector indices, int64_t num_samples = 0); /// \brief Destructor. @@ -200,14 +200,14 @@ class SubsetSampler : public Sampler { }; /// \brief A class to represent a Subset Random Sampler in the data pipeline. -/// \notes Samples the elements randomly from a sequence of indices. +/// \note Samples the elements randomly from a sequence of indices. class SubsetRandomSampler final : public SubsetSampler { friend std::shared_ptr SelectSampler(int64_t, bool, int32_t, int32_t); public: /// \brief Constructor - /// \param[in] indices - A vector sequence of indices. - /// \param[in] num_samples - The number of samples to draw (default to all elements). + /// \param[in] indices A vector sequence of indices. + /// \param[in] num_samples The number of samples to draw (default=0, return all samples). explicit SubsetRandomSampler(std::vector indices, int64_t num_samples = 0); /// \brief Destructor. @@ -220,16 +220,16 @@ class SubsetRandomSampler final : public SubsetSampler { }; /// \brief A class to represent a Weighted Random Sampler in the data pipeline. -/// \notes Samples the elements from [0, len(weights) - 1] randomly with the given +/// \note Samples the elements from [0, len(weights) - 1] randomly with the given /// weights (probabilities). class WeightedRandomSampler final : public Sampler { friend std::shared_ptr SelectSampler(int64_t, bool, int32_t, int32_t); public: /// \brief Constructor - /// \param[in] weights - A vector sequence of weights, not necessarily summing up to 1. - /// \param[in] num_samples - The number of samples to draw (default to all elements). - /// \param[in] replacement - If true, put the sample ID back for the next draw. + /// \param[in] weights A vector sequence of weights, not necessarily summing up to 1. + /// \param[in] num_samples The number of samples to draw (default=0, return all samples). + /// \param[in] replacement If true, put the sample ID back for the next draw (default=true). explicit WeightedRandomSampler(std::vector weights, int64_t num_samples = 0, bool replacement = true); /// \brief Destructor. diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/text.h b/mindspore/ccsrc/minddata/dataset/include/dataset/text.h index 78ba4633f0d..842807dc202 100644 --- a/mindspore/ccsrc/minddata/dataset/include/dataset/text.h +++ b/mindspore/ccsrc/minddata/dataset/include/dataset/text.h @@ -40,11 +40,11 @@ namespace text { #ifndef _WIN32 /// \brief Tokenize a scalar tensor of UTF-8 string by specific rules. -/// \notes BasicTokenizer is not supported on Windows platform yet. +/// \note BasicTokenizer is not supported on Windows platform yet. class BasicTokenizer final : public TensorTransform { public: /// \brief Constructor. - /// \param[in] lower_case If true, apply CaseFold, NormalizeUTF8(NFD mode), RegexReplace operation on input text to + /// \param[in] lower_case If true, apply CaseFold, NormalizeUTF8 (NFD mode), RegexReplace operation on input text to /// fold the text to lower case and strip accents characters. If false, only apply /// NormalizeUTF8('normalization_form' mode) operation on input text (default=false). /// \param[in] keep_whitespace If true, the whitespace will be kept in out tokens (default=false). @@ -71,7 +71,7 @@ class BasicTokenizer final : public TensorTransform { }; /// \brief Tokenizer used for Bert text process. -/// \notes BertTokenizer is not supported on Windows platform yet. +/// \note BertTokenizer is not supported on Windows platform yet. class BertTokenizer final : public TensorTransform { public: /// \brief Constructor. @@ -80,7 +80,7 @@ class BertTokenizer final : public TensorTransform { /// \param[in] max_bytes_per_token Tokens exceeding this length will not be further split (default=100). /// \param[in] unknown_token When a token cannot be found, return the token directly if 'unknown_token' is an empty /// string, else return the string specified(default='[UNK]'). - /// \param[in] lower_case If true, apply CaseFold, NormalizeUTF8(NFD mode), RegexReplace operation on input text to + /// \param[in] lower_case If true, apply CaseFold, NormalizeUTF8 (NFD mode), RegexReplace operation on input text to /// fold the text to lower case and strip accents characters. If false, only apply /// NormalizeUTF8('normalization_form' mode) operation on input text (default=false). /// \param[in] keep_whitespace If true, the whitespace will be kept in out tokens (default=false). @@ -127,13 +127,13 @@ class CaseFold final : public TensorTransform { protected: /// \brief Function to convert TensorTransform object into a TensorOperation object. - //// \return Shared pointer to TensorOperation object. + /// \return Shared pointer to TensorOperation object. std::shared_ptr Parse() override; }; #endif /// \brief Tokenize Chinese string into words based on dictionary. -/// \notes The integrity of the HMMSEgment algorithm and MPSegment algorithm files must be confirmed. +/// \note The integrity of the HMMSEgment algorithm and MPSegment algorithm files must be confirmed. class JiebaTokenizer final : public TensorTransform { public: /// \brief Constructor. @@ -161,10 +161,12 @@ class JiebaTokenizer final : public TensorTransform { /// The added word will not be written into the built-in dictionary on disk. /// \param[in] freq The frequency of the word to be added. The higher the frequency, /// the better chance the word will be tokenized (default=None, use default frequency). + /// \return Status error code, returns OK if no error encountered. Status AddWord(const std::string &word, int64_t freq = 0) { return AddWordChar(StringToChar(word), freq); } /// \brief Add user defined dictionary of word-freq pairs to JiebaTokenizer's dictionary. /// \param[in] user_dict Vector of word-freq pairs to be added to JiebaTokenizer's dictionary. + /// \return Status error code, returns OK if no error encountered. Status AddDict(const std::vector> &user_dict) { return AddDictChar(PairStringInt64ToPairCharInt64(user_dict)); } @@ -173,6 +175,7 @@ class JiebaTokenizer final : public TensorTransform { /// Only valid word-freq pairs in user provided file will be added into the dictionary. /// Rows containing invalid input will be ignored, no error nor warning Status is returned. /// \param[in] file_path Path to the dictionary which includes user defined word-freq pairs. + /// \return Status error code, returns OK if no error encountered. Status AddDict(const std::string &file_path) { return AddDictChar(StringToChar(file_path)); } protected: @@ -322,7 +325,7 @@ class RegexTokenizer final : public TensorTransform { /// \brief Constructor. /// \param[in] delim_pattern The pattern of regex delimiters. /// \param[in] keep_delim_pattern The string matched by 'delim_pattern' can be kept as a token if it can be - /// matched by 'keep_delim_pattern'. The default value is an empty string ("") + /// matched by 'keep_delim_pattern'. The default value is an empty string (""). /// which means that delimiters will not be kept as an output token (default=""). /// \param[in] with_offsets Whether or not output offsets of tokens (default=false). explicit RegexTokenizer(std::string delim_pattern, std::string keep_delim_pattern = "", bool with_offsets = false) diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/transforms.h b/mindspore/ccsrc/minddata/dataset/include/dataset/transforms.h index 14791e8f2a5..a7d4eab3007 100644 --- a/mindspore/ccsrc/minddata/dataset/include/dataset/transforms.h +++ b/mindspore/ccsrc/minddata/dataset/include/dataset/transforms.h @@ -128,7 +128,7 @@ class SliceOption { namespace transforms { /// \brief Compose Op. -/// \notes Compose a list of transforms into a single transform. +/// \note Compose a list of transforms into a single transform. class Compose final : public TensorTransform { public: /// \brief Constructor. @@ -155,7 +155,7 @@ class Compose final : public TensorTransform { }; /// \brief Concatenate Op. -/// \notes Tensor operation that concatenates all columns into a single tensor. +/// \note Tensor operation that concatenates all columns into a single tensor. class Concatenate final : public TensorTransform { public: /// \brief Constructor. @@ -178,7 +178,7 @@ class Concatenate final : public TensorTransform { }; /// \brief Duplicate Op. -/// \notes Duplicate the input tensor to a new output tensor. +/// \note Duplicate the input tensor to a new output tensor. /// The input tensor is carried over to the output list. class Duplicate final : public TensorTransform { public: @@ -195,7 +195,7 @@ class Duplicate final : public TensorTransform { }; /// \brief Fill Op. -/// \notes Tensor operation to fill all elements in the tensor with the specified value. +/// \note Tensor operation to fill all elements in the tensor with the specified value. /// The output tensor will have the same shape and type as the input tensor. class Fill final : public TensorTransform { public: @@ -219,7 +219,7 @@ class Fill final : public TensorTransform { }; /// \brief Mask Op. -/// \notes Mask content of the input tensor with the given predicate. +/// \note Mask content of the input tensor with the given predicate. /// Any element of the tensor that matches the predicate will be evaluated to True, otherwise False. class Mask final : public TensorTransform { public: @@ -246,7 +246,7 @@ class Mask final : public TensorTransform { }; /// \brief OneHot Op. -/// \notes Convert the labels into OneHot format. +/// \note Convert the labels into OneHot format. class OneHot final : public TensorTransform { public: /// \brief Constructor. @@ -267,7 +267,7 @@ class OneHot final : public TensorTransform { }; /// \brief PadEnd Op. -/// \notes Pad input tensor according to pad_shape, need to have same rank. +/// \note Pad input tensor according to pad_shape, need to have same rank. class PadEnd final : public TensorTransform { public: /// \brief Constructor. @@ -291,20 +291,20 @@ class PadEnd final : public TensorTransform { }; /// \brief RandomApply Op. -/// \notes Randomly perform a series of transforms with a given probability. +/// \note Randomly perform a series of transforms with a given probability. class RandomApply final : public TensorTransform { public: /// \brief Constructor. /// \param[in] transforms A vector of raw pointers to TensorTransform objects to be applied. - /// \param[in] prob The probability to apply the transformation list (default=0.5) + /// \param[in] prob The probability to apply the transformation list (default=0.5). explicit RandomApply(const std::vector &transforms, double prob = 0.5); /// \brief Constructor. /// \param[in] transforms A vector of shared pointers to TensorTransform objects to be applied. - /// \param[in] prob The probability to apply the transformation list (default=0.5) + /// \param[in] prob The probability to apply the transformation list (default=0.5). explicit RandomApply(const std::vector> &transforms, double prob = 0.5); /// \brief Constructor. /// \param[in] transforms A vector of TensorTransform objects to be applied. - /// \param[in] prob The probability to apply the transformation list (default=0.5) + /// \param[in] prob The probability to apply the transformation list (default=0.5). explicit RandomApply(const std::vector> &transforms, double prob = 0.5); /// \brief Destructor @@ -321,7 +321,7 @@ class RandomApply final : public TensorTransform { }; /// \brief RandomChoice Op. -/// \notes Randomly selects one transform from a list of transforms to perform operation. +/// \note Randomly selects one transform from a list of transforms to perform operation. class RandomChoice final : public TensorTransform { public: /// \brief Constructor. @@ -348,7 +348,7 @@ class RandomChoice final : public TensorTransform { }; /// \brief Slice Op. -/// \notes Slice operation to extract a tensor out using the given n slices. +/// \note Slice operation to extract a tensor out using the given n slices. /// The functionality of Slice is similar to NumPy's indexing feature. /// (Currently only rank-1 tensors are supported). class Slice final : public TensorTransform { @@ -371,7 +371,7 @@ class Slice final : public TensorTransform { }; /// \brief TypeCast Op. -/// \notes Tensor operation to cast to a given MindSpore data type. +/// \note Tensor operation to cast to a given MindSpore data type. class TypeCast final : public TensorTransform { public: /// \brief Constructor. @@ -392,7 +392,7 @@ class TypeCast final : public TensorTransform { }; /// \brief Unique Op. -/// \notes Return an output tensor containing all the unique elements of the input tensor in +/// \note Return an output tensor containing all the unique elements of the input tensor in /// the same order that they occur in the input tensor. class Unique final : public TensorTransform { public: diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/vision.h b/mindspore/ccsrc/minddata/dataset/include/dataset/vision.h index 4aa0f210703..30de2576005 100644 --- a/mindspore/ccsrc/minddata/dataset/include/dataset/vision.h +++ b/mindspore/ccsrc/minddata/dataset/include/dataset/vision.h @@ -37,7 +37,7 @@ class TensorOperation; // Transform operations for performing computer vision. namespace vision { /// \brief AutoContrast TensorTransform. -/// \notes Apply automatic contrast on input image. +/// \note Apply automatic contrast on input image. class AutoContrast final : public TensorTransform { public: /// \brief Constructor. @@ -59,7 +59,7 @@ class AutoContrast final : public TensorTransform { }; /// \brief BoundingBoxAugment TensorTransform. -/// \notes Apply a given image transform on a random selection of bounding box regions of a given image. +/// \note Apply a given image transform on a random selection of bounding box regions of a given image. class BoundingBoxAugment final : public TensorTransform { public: /// \brief Constructor. @@ -90,15 +90,15 @@ class BoundingBoxAugment final : public TensorTransform { std::shared_ptr data_; }; -/// \brief Constructor to apply CutMix on a batch of images -/// \notes Masks a random section of each image with the corresponding part of another randomly -/// selected image in that batch +/// \brief Constructor to apply CutMix on a batch of images. +/// \note Masks a random section of each image with the corresponding part of another randomly +/// selected image in that batch. class CutMixBatch final : public TensorTransform { public: /// \brief Constructor. - /// \param[in] image_batch_format The format of the batch - /// \param[in] alpha The hyperparameter of beta distribution (default = 1.0) - /// \param[in] prob The probability by which CutMix is applied to each image (default = 1.0) + /// \param[in] image_batch_format The format of the batch. + /// \param[in] alpha The hyperparameter of beta distribution (default = 1.0). + /// \param[in] prob The probability by which CutMix is applied to each image (default = 1.0). explicit CutMixBatch(ImageBatchFormat image_batch_format, float alpha = 1.0, float prob = 1.0); /// \brief Destructor. @@ -114,13 +114,13 @@ class CutMixBatch final : public TensorTransform { std::shared_ptr data_; }; -/// \brief CutOut TensorOp -/// \notes Randomly cut (mask) out a given number of square patches from the input image +/// \brief CutOut TensorOp. +/// \note Randomly cut (mask) out a given number of square patches from the input image. class CutOut final : public TensorTransform { public: /// \brief Constructor. - /// \param[in] length Integer representing the side length of each square patch - /// \param[in] num_patches Integer representing the number of patches to be cut out of an image + /// \param[in] length Integer representing the side length of each square patch. + /// \param[in] num_patches Integer representing the number of patches to be cut out of an image. explicit CutOut(int32_t length, int32_t num_patches = 1); /// \brief Destructor. @@ -137,7 +137,7 @@ class CutOut final : public TensorTransform { }; /// \brief Equalize TensorTransform. -/// \notes Apply histogram equalization on input image. +/// \note Apply histogram equalization on input image. class Equalize final : public TensorTransform { public: /// \brief Constructor. @@ -153,7 +153,7 @@ class Equalize final : public TensorTransform { }; /// \brief HwcToChw TensorTransform. -/// \notes Transpose the input image; shape (H, W, C) to shape (C, H, W). +/// \note Transpose the input image; shape (H, W, C) to shape (C, H, W). class HWC2CHW final : public TensorTransform { public: /// \brief Constructor. @@ -169,7 +169,7 @@ class HWC2CHW final : public TensorTransform { }; /// \brief Invert TensorTransform. -/// \notes Apply invert on input image in RGB mode. +/// \note Apply invert on input image in RGB mode. class Invert final : public TensorTransform { public: /// \brief Constructor. @@ -185,12 +185,12 @@ class Invert final : public TensorTransform { }; /// \brief MixUpBatch TensorTransform. -/// \notes Apply MixUp transformation on an input batch of images and labels. The labels must be in +/// \note Apply MixUp transformation on an input batch of images and labels. The labels must be in /// one-hot format and Batch must be called before calling this function. class MixUpBatch final : public TensorTransform { public: /// \brief Constructor. - /// \param[in] alpha hyperparameter of beta distribution (default = 1.0) + /// \param[in] alpha hyperparameter of beta distribution (default = 1.0). explicit MixUpBatch(float alpha = 1); /// \brief Destructor. @@ -207,7 +207,7 @@ class MixUpBatch final : public TensorTransform { }; /// \brief NormalizePad TensorTransform. -/// \notes Normalize the input image with respect to mean and standard deviation and pad an extra +/// \note Normalize the input image with respect to mean and standard deviation and pad an extra /// channel with value zero. class NormalizePad final : public TensorTransform { public: @@ -215,9 +215,9 @@ class NormalizePad final : public TensorTransform { /// \param[in] mean A vector of mean values for each channel, w.r.t channel order. /// The mean values must be in range [0.0, 255.0]. /// \param[in] std A vector of standard deviations for each channel, w.r.t. channel order. - /// The standard deviation values must be in range (0.0, 255.0] + /// The standard deviation values must be in range (0.0, 255.0]. /// \param[in] dtype The output datatype of Tensor. - /// The standard deviation values must be "float32" or "float16"(default = "float32") + /// The standard deviation values must be "float32" or "float16"(default = "float32"). explicit NormalizePad(const std::vector &mean, const std::vector &std, const std::string &dtype = "float32") : NormalizePad(mean, std, StringToChar(dtype)) {} @@ -237,8 +237,8 @@ class NormalizePad final : public TensorTransform { std::shared_ptr data_; }; -/// \brief Pad TensorOp -/// \notes Pads the image according to padding parameters +/// \brief Pad TensorOp. +/// \note Pads the image according to padding parameters. class Pad final : public TensorTransform { public: /// \brief Constructor. @@ -251,7 +251,7 @@ class Pad final : public TensorTransform { /// \param[in] fill_value A vector representing the pixel intensity of the borders if the padding_mode is /// BorderType.kConstant. If 1 value is provided, it is used for all RGB channels. If 3 values are provided, /// it is used to fill R, G, B channels respectively. - /// \param[in] padding_mode The method of padding (default=BorderType.kConstant) + /// \param[in] padding_mode The method of padding (default=BorderType.kConstant). /// Can be any of /// [BorderType.kConstant, BorderType.kEdge, BorderType.kReflect, BorderType.kSymmetric] /// - BorderType.kConstant, means it fills the border with constant values @@ -276,12 +276,12 @@ class Pad final : public TensorTransform { /// \brief Blends an image with its grayscale version with random weights /// t and 1 - t generated from a given range. If the range is trivial -/// then the weights are determinate and t equals the bound of the interval +/// then the weights are determinate and t equals the bound of the interval. class RandomColor final : public TensorTransform { public: /// \brief Constructor. - /// \param[in] t_lb Lower bound on the range of random weights - /// \param[in] t_lb Upper bound on the range of random weights + /// \param[in] t_lb Lower bound on the range of random weights. + /// \param[in] t_lb Upper bound on the range of random weights. explicit RandomColor(float t_lb, float t_ub); /// \brief Destructor. @@ -298,19 +298,19 @@ class RandomColor final : public TensorTransform { }; /// \brief RandomColorAdjust TensorTransform. -/// \brief Randomly adjust the brightness, contrast, saturation, and hue of the input image +/// \brief Randomly adjust the brightness, contrast, saturation, and hue of the input image. class RandomColorAdjust final : public TensorTransform { public: /// \brief Constructor. /// \param[in] brightness Brightness adjustment factor. Must be a vector of one or two values - /// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1} + /// if it's a vector of two values it needs to be in the form of [min, max] (Default={1, 1}). /// \param[in] contrast Contrast adjustment factor. Must be a vector of one or two values - /// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1} + /// if it's a vector of two values it needs to be in the form of [min, max] (Default={1, 1}). /// \param[in] saturation Saturation adjustment factor. Must be a vector of one or two values - /// if it's a vector of two values it needs to be in the form of [min, max]. Default value is {1, 1} + /// if it's a vector of two values it needs to be in the form of [min, max] (Default={1, 1}). /// \param[in] hue Brightness adjustment factor. Must be a vector of one or two values /// if it's a vector of two values it must be in the form of [min, max] where -0.5 <= min <= max <= 0.5 - /// Default value is {0, 0} + /// (Default={0, 0}). explicit RandomColorAdjust(std::vector brightness = {1.0, 1.0}, std::vector contrast = {1.0, 1.0}, std::vector saturation = {1.0, 1.0}, std::vector hue = {0.0, 0.0}); @@ -328,7 +328,7 @@ class RandomColorAdjust final : public TensorTransform { }; /// \brief RandomCrop TensorTransform. -/// \notes Crop the input image at a random location. +/// \note Crop the input image at a random location. class RandomCrop final : public TensorTransform { public: /// \brief Constructor. @@ -364,7 +364,7 @@ class RandomCrop final : public TensorTransform { }; /// \brief RandomCropDecodeResize TensorTransform. -/// \notes Equivalent to RandomResizedCrop, but crops before decodes. +/// \note Equivalent to RandomResizedCrop, but crops before decodes. class RandomCropDecodeResize final : public TensorTransform { public: /// \brief Constructor. @@ -372,10 +372,10 @@ class RandomCropDecodeResize final : public TensorTransform { /// If size is a single value, a square crop of size (size, size) is returned. /// If size has 2 values, it should be (height, width). /// \param[in] scale Range [min, max) of respective size of the - /// original size to be cropped (default=(0.08, 1.0)) + /// original size to be cropped (default=(0.08, 1.0)). /// \param[in] ratio Range [min, max) of aspect ratio to be - /// cropped (default=(3. / 4., 4. / 3.)) - /// \param[in] interpolation An enum for the mode of interpolation + /// cropped (default=(3. / 4., 4. / 3.)). + /// \param[in] interpolation An enum for the mode of interpolation. /// \param[in] The maximum number of attempts to propose a valid crop_area (default=10). /// If exceeded, fall back to use center_crop instead. explicit RandomCropDecodeResize(std::vector size, std::vector scale = {0.08, 1.0}, @@ -397,7 +397,7 @@ class RandomCropDecodeResize final : public TensorTransform { }; /// \brief RandomCropWithBBox TensorTransform. -/// \notes Crop the input image at a random location and adjust bounding boxes accordingly. +/// \note Crop the input image at a random location and adjust bounding boxes accordingly. /// If cropped area is out of bbox, the return bbox will be empty. class RandomCropWithBBox final : public TensorTransform { public: @@ -436,7 +436,7 @@ class RandomCropWithBBox final : public TensorTransform { }; /// \brief RandomHorizontalFlip TensorTransform. -/// \notes Tensor operation to perform random horizontal flip. +/// \note Tensor operation to perform random horizontal flip. class RandomHorizontalFlip final : public TensorTransform { public: /// \brief Constructor. @@ -457,7 +457,7 @@ class RandomHorizontalFlip final : public TensorTransform { }; /// \brief RandomHorizontalFlipWithBBox TensorTransform. -/// \notes Flip the input image horizontally, randomly with a given probability and adjust bounding boxes accordingly. +/// \note Flip the input image horizontally, randomly with a given probability and adjust bounding boxes accordingly. class RandomHorizontalFlipWithBBox final : public TensorTransform { public: /// \brief Constructor. @@ -478,11 +478,11 @@ class RandomHorizontalFlipWithBBox final : public TensorTransform { }; /// \brief RandomPosterize TensorTransform. -/// \notes Tensor operation to perform random posterize. +/// \note Tensor operation to perform random posterize. class RandomPosterize final : public TensorTransform { public: /// \brief Constructor. - /// \param[in] bit_range - uint8_t vector representing the minimum and maximum bit in range. (Default={4, 8}) + /// \param[in] bit_range - uint8_t vector representing the minimum and maximum bit in range (Default={4, 8}). explicit RandomPosterize(const std::vector &bit_range = {4, 8}); /// \brief Destructor. @@ -499,13 +499,13 @@ class RandomPosterize final : public TensorTransform { }; /// \brief RandomResize TensorTransform. -/// \notes Resize the input image using a randomly selected interpolation mode. +/// \note Resize the input image using a randomly selected interpolation mode. // the same image aspect ratio. If size has 2 values, it should be (height, width). class RandomResize final : public TensorTransform { public: /// \brief Constructor. /// \param[in] size A vector representing the output size of the resized image. - /// If size is a single value, the smaller edge of the image will be resized to this value with + /// If size is a single value, the smaller edge of the image will be resized to this value with. explicit RandomResize(std::vector size); /// \brief Destructor. @@ -522,7 +522,7 @@ class RandomResize final : public TensorTransform { }; /// \brief RandomResizeWithBBox TensorTransform. -/// \notes Resize the input image using a randomly selected interpolation mode and adjust +/// \note Resize the input image using a randomly selected interpolation mode and adjust /// bounding boxes accordingly. class RandomResizeWithBBox final : public TensorTransform { public: @@ -546,7 +546,7 @@ class RandomResizeWithBBox final : public TensorTransform { }; /// \brief RandomResizedCrop TensorTransform. -/// \notes Crop the input image to a random size and aspect ratio. +/// \note Crop the input image to a random size and aspect ratio. class RandomResizedCrop final : public TensorTransform { public: /// \brief Constructor. @@ -554,11 +554,11 @@ class RandomResizedCrop final : public TensorTransform { /// If size is a single value, a square crop of size (size, size) is returned. /// If size has 2 values, it should be (height, width). /// \param[in] scale Range [min, max) of respective size of the original - /// size to be cropped (default=(0.08, 1.0)) + /// size to be cropped (default=(0.08, 1.0)). /// \param[in] ratio Range [min, max) of aspect ratio to be cropped /// (default=(3. / 4., 4. / 3.)). - /// \param[in] interpolation Image interpolation mode (default=InterpolationMode::kLinear) - /// \param[in] max_attempts The maximum number of attempts to propose a valid + /// \param[in] interpolation Image interpolation mode (default=InterpolationMode::kLinear). + /// \param[in] max_attempts The maximum number of attempts to propose a valid. /// crop_area (default=10). If exceeded, fall back to use center_crop instead. explicit RandomResizedCrop(std::vector size, std::vector scale = {0.08, 1.0}, std::vector ratio = {3. / 4., 4. / 3.}, @@ -578,7 +578,7 @@ class RandomResizedCrop final : public TensorTransform { }; /// \brief RandomResizedCropWithBBox TensorTransform. -/// \notes Crop the input image to a random size and aspect ratio. +/// \note Crop the input image to a random size and aspect ratio. /// If cropped area is out of bbox, the return bbox will be empty. class RandomResizedCropWithBBox final : public TensorTransform { public: @@ -587,10 +587,10 @@ class RandomResizedCropWithBBox final : public TensorTransform { /// If size is a single value, a square crop of size (size, size) is returned. /// If size has 2 values, it should be (height, width). /// \param[in] scale Range [min, max) of respective size of the original - /// size to be cropped (default=(0.08, 1.0)) + /// size to be cropped (default=(0.08, 1.0)). /// \param[in] ratio Range [min, max) of aspect ratio to be cropped /// (default=(3. / 4., 4. / 3.)). - /// \param[in] interpolation Image interpolation mode (default=InterpolationMode::kLinear) + /// \param[in] interpolation Image interpolation mode (default=InterpolationMode::kLinear). /// \param[in] max_attempts The maximum number of attempts to propose a valid /// crop_area (default=10). If exceeded, fall back to use center_crop instead. RandomResizedCropWithBBox(std::vector size, std::vector scale = {0.08, 1.0}, @@ -610,16 +610,16 @@ class RandomResizedCropWithBBox final : public TensorTransform { std::shared_ptr data_; }; -/// \brief RandomRotation TensorOp -/// \notes Rotates the image according to parameters +/// \brief RandomRotation TensorOp. +/// \note Rotates the image according to parameters. class RandomRotation final : public TensorTransform { public: /// \brief Constructor. - /// \param[in] degrees A float vector of size, representing the starting and ending degree - /// \param[in] resample An enum for the mode of interpolation - /// \param[in] expand A boolean representing whether the image is expanded after rotation + /// \param[in] degrees A float vector of size, representing the starting and ending degree. + /// \param[in] resample An enum for the mode of interpolation. + /// \param[in] expand A boolean representing whether the image is expanded after rotation. /// \param[in] center A float vector of size 2, representing the x and y center of rotation. - /// \param[in] fill_value A vector representing the value to fill the area outside the transform + /// \param[in] fill_value A vector representing the value to fill the area outside the transform. /// in the output image. If 1 value is provided, it is used for all RGB channels. /// If 3 values are provided, it is used to fill R, G, B channels respectively. RandomRotation(std::vector degrees, InterpolationMode resample = InterpolationMode::kNearestNeighbour, @@ -640,23 +640,23 @@ class RandomRotation final : public TensorTransform { }; /// \brief RandomSelectSubpolicy TensorTransform. -/// \notes Choose a random sub-policy from a list to be applied on the input image. A sub-policy is a list of tuples +/// \note Choose a random sub-policy from a list to be applied on the input image. A sub-policy is a list of tuples /// (op, prob), where op is a TensorTransform operation and prob is the probability that this op will be applied. /// Once a sub-policy is selected, each op within the sub-policy with be applied in sequence according to its /// probability. class RandomSelectSubpolicy final : public TensorTransform { public: /// \brief Constructor. - /// \param[in] policy Vector of sub-policies to choose from, in which the TensorTransform objects are raw pointers + /// \param[in] policy Vector of sub-policies to choose from, in which the TensorTransform objects are raw pointers. explicit RandomSelectSubpolicy(const std::vector>> &policy); /// \brief Constructor. - /// \param[in] policy Vector of sub-policies to choose from, in which the TensorTransform objects are shared pointers + /// \param[in] policy Vector of sub-policies to choose from, in which the TensorTransform objects are shared pointers. explicit RandomSelectSubpolicy( const std::vector, double>>> &policy); /// \brief Constructor. - /// \param[in] policy Vector of sub-policies to choose from, in which the TensorTransform objects are object pointers + /// \param[in] policy Vector of sub-policies to choose from, in which the TensorTransform objects are object pointers. explicit RandomSelectSubpolicy( const std::vector, double>>> &policy); @@ -674,11 +674,11 @@ class RandomSelectSubpolicy final : public TensorTransform { }; /// \brief RandomSharpness TensorTransform. -/// \notes Tensor operation to perform random sharpness. +/// \note Tensor operation to perform random sharpness. class RandomSharpness final : public TensorTransform { public: /// \brief Constructor. - /// \param[in] degrees A float vector of size 2, representing the starting and ending degree to uniformly + /// \param[in] degrees A float vector of size 2, representing the starting and ending degree to uniformly. /// sample from, to select a degree to adjust sharpness. explicit RandomSharpness(std::vector degrees = {0.1, 1.9}); @@ -696,8 +696,8 @@ class RandomSharpness final : public TensorTransform { }; /// \brief RandomSolarize TensorTransform. -/// \notes Invert pixels randomly within specified range. If min=max, it is a single fixed magnitude operation -/// to inverts all pixel above that threshold +/// \note Invert pixels randomly within specified range. If min=max, it is a single fixed magnitude operation +/// to inverts all pixel above that threshold. class RandomSolarize final : public TensorTransform { public: /// \brief Constructor. @@ -718,7 +718,7 @@ class RandomSolarize final : public TensorTransform { }; /// \brief RandomVerticalFlip TensorTransform. -/// \notes Tensor operation to perform random vertical flip. +/// \note Tensor operation to perform random vertical flip. class RandomVerticalFlip final : public TensorTransform { public: /// \brief Constructor. @@ -739,7 +739,7 @@ class RandomVerticalFlip final : public TensorTransform { }; /// \brief RandomVerticalFlipWithBBox TensorTransform. -/// \notes Flip the input image vertically, randomly with a given probability and adjust bounding boxes accordingly. +/// \note Flip the input image vertically, randomly with a given probability and adjust bounding boxes accordingly. class RandomVerticalFlipWithBBox final : public TensorTransform { public: /// \brief Constructor. @@ -760,7 +760,7 @@ class RandomVerticalFlipWithBBox final : public TensorTransform { }; /// \brief RescaleOperation TensorTransform. -/// \notes Tensor operation to rescale the input image. +/// \note Tensor operation to rescale the input image. class Rescale final : public TensorTransform { public: /// \brief Constructor. @@ -782,7 +782,7 @@ class Rescale final : public TensorTransform { }; /// \brief ResizeWithBBox TensorTransform. -/// \notes Resize the input image to the given size and adjust bounding boxes accordingly. +/// \note Resize the input image to the given size and adjust bounding boxes accordingly. class ResizeWithBBox final : public TensorTransform { public: /// \brief Constructor. @@ -806,7 +806,7 @@ class ResizeWithBBox final : public TensorTransform { }; /// \brief RgbaToBgr TensorTransform. -/// \notes Changes the input 4 channel RGBA tensor to 3 channel BGR. +/// \note Changes the input 4 channel RGBA tensor to 3 channel BGR. class RGBA2BGR final : public TensorTransform { public: /// \brief Constructor. @@ -822,7 +822,7 @@ class RGBA2BGR final : public TensorTransform { }; /// \brief RgbaToRgb TensorTransform. -/// \notes Changes the input 4 channel RGBA tensor to 3 channel RGB. +/// \note Changes the input 4 channel RGBA tensor to 3 channel RGB. class RGBA2RGB final : public TensorTransform { public: /// \brief Constructor. @@ -838,7 +838,7 @@ class RGBA2RGB final : public TensorTransform { }; /// \brief SoftDvppDecodeRandomCropResizeJpeg TensorTransform. -/// \notes Tensor operation to decode, random crop and resize JPEG image using the simulation algorithm of +/// \note Tensor operation to decode, random crop and resize JPEG image using the simulation algorithm of /// Ascend series chip DVPP module. The usage scenario is consistent with SoftDvppDecodeResizeJpeg. /// The input image size should be in range [32*32, 8192*8192]. /// The zoom-out and zoom-in multiples of the image length and width should in the range [1/32, 16]. @@ -872,7 +872,7 @@ class SoftDvppDecodeRandomCropResizeJpeg final : public TensorTransform { }; /// \brief SoftDvppDecodeResizeJpeg TensorTransform. -/// \notes Tensor operation to decode and resize JPEG image using the simulation algorithm of Ascend series +/// \note Tensor operation to decode and resize JPEG image using the simulation algorithm of Ascend series /// chip DVPP module. It is recommended to use this algorithm in the following scenarios: /// When training, the DVPP of the Ascend chip is not used, /// and the DVPP of the Ascend chip is used during inference, @@ -901,8 +901,8 @@ class SoftDvppDecodeResizeJpeg final : public TensorTransform { std::shared_ptr data_; }; -/// \brief SwapRedBlue TensorOp -/// \notes Swaps the red and blue channels in image +/// \brief SwapRedBlue TensorOp. +/// \note Swaps the red and blue channels in image. class SwapRedBlue final : public TensorTransform { public: /// \brief Constructor. @@ -918,7 +918,7 @@ class SwapRedBlue final : public TensorTransform { }; /// \brief UniformAugment TensorTransform. -/// \notes Tensor operation to perform randomly selected augmentation. +/// \note Tensor operation to perform randomly selected augmentation. class UniformAugment final : public TensorTransform { public: /// \brief Constructor. diff --git a/mindspore/ccsrc/minddata/dataset/include/dataset/vision_lite.h b/mindspore/ccsrc/minddata/dataset/include/dataset/vision_lite.h index 5abc3d35d28..07e730206ff 100644 --- a/mindspore/ccsrc/minddata/dataset/include/dataset/vision_lite.h +++ b/mindspore/ccsrc/minddata/dataset/include/dataset/vision_lite.h @@ -36,16 +36,16 @@ namespace vision { class RotateOperation; /// \brief Affine TensorTransform. -/// \notes Apply affine transform on input image. +/// \note Apply affine transform on input image. class Affine final : public TensorTransform { public: /// \brief Constructor. - /// \param[in] degrees The degrees to rotate the image by - /// \param[in] translation The value representing vertical and horizontal translation (default = {0.0, 0.0}) + /// \param[in] degrees The degrees to rotate the image by. + /// \param[in] translation The value representing vertical and horizontal translation (default = {0.0, 0.0}). /// The first value represent the x axis translation while the second represents y axis translation. - /// \param[in] scale The scaling factor for the image (default = 0.0) - /// \param[in] shear A float vector of size 2, representing the shear degrees (default = {0.0, 0.0}) - /// \param[in] interpolation An enum for the mode of interpolation + /// \param[in] scale The scaling factor for the image (default = 0.0). + /// \param[in] shear A float vector of size 2, representing the shear degrees (default = {0.0, 0.0}). + /// \param[in] interpolation An enum for the mode of interpolation. /// \param[in] fill_value A vector representing the value to fill the area outside the transform /// in the output image. If 1 value is provided, it is used for all RGB channels. /// If 3 values are provided, it is used to fill R, G, B channels respectively. @@ -67,7 +67,7 @@ class Affine final : public TensorTransform { }; /// \brief CenterCrop TensorTransform. -/// \notes Crops the input image at the center to the given size. +/// \note Crops the input image at the center to the given size. class CenterCrop final : public TensorTransform { public: /// \brief Constructor. @@ -92,7 +92,7 @@ class CenterCrop final : public TensorTransform { }; /// \brief RGB2GRAY TensorTransform. -/// \notes Convert RGB image or color image to grayscale image +/// \note Convert RGB image or color image to grayscale image. class RGB2GRAY final : public TensorTransform { public: /// \brief Constructor. @@ -108,11 +108,11 @@ class RGB2GRAY final : public TensorTransform { }; /// \brief Crop TensorTransform. -/// \notes Crop an image based on location and crop size +/// \note Crop an image based on location and crop size. class Crop final : public TensorTransform { public: /// \brief Constructor. - /// \param[in] coordinates Starting location of crop. Must be a vector of two values, in the form of {x_coor, y_coor} + /// \param[in] coordinates Starting location of crop. Must be a vector of two values, in the form of {x_coor, y_coor}. /// \param[in] size Size of the cropped area. /// If size is a single value, a square crop of size (size, size) is returned. /// If size has 2 values, it should be (height, width). @@ -132,7 +132,7 @@ class Crop final : public TensorTransform { }; /// \brief Decode TensorTransform. -/// \notes Decode the input image in RGB mode. +/// \note Decode the input image in RGB mode. class Decode final : public TensorTransform { public: /// \brief Constructor. @@ -155,14 +155,14 @@ class Decode final : public TensorTransform { }; /// \brief Normalize TensorTransform. -/// \notes Normalize the input image with respect to mean and standard deviation. +/// \note Normalize the input image with respect to mean and standard deviation. class Normalize final : public TensorTransform { public: /// \brief Constructor. /// \param[in] mean A vector of mean values for each channel, w.r.t channel order. /// The mean values must be in range [0.0, 255.0]. /// \param[in] std A vector of standard deviations for each channel, w.r.t. channel order. - /// The standard deviation values must be in range (0.0, 255.0] + /// The standard deviation values must be in range (0.0, 255.0]. Normalize(std::vector mean, std::vector std); /// \brief Destructor. @@ -181,21 +181,21 @@ class Normalize final : public TensorTransform { }; /// \brief RandomAffine TensorTransform. -/// \notes Applies a Random Affine transformation on input image in RGB or Greyscale mode. +/// \note Applies a Random Affine transformation on input image in RGB or Greyscale mode. class RandomAffine final : public TensorTransform { public: /// \brief Constructor. - /// \param[in] degrees A float vector of size 2, representing the starting and ending degree + /// \param[in] degrees A float vector of size 2, representing the starting and ending degree. /// \param[in] translate_range A float vector of size 2 or 4, representing percentages of translation on x and y axes. - /// if size is 2, (min_dx, max_dx, 0, 0) - /// if size is 4, (min_dx, max_dx, min_dy, max_dy) - /// all values are in range [-1, 1] + /// If size is 2, (min_dx, max_dx, 0, 0). + /// if size is 4, (min_dx, max_dx, min_dy, max_dy), + /// all values are in range [-1, 1]. /// \param[in] scale_range A float vector of size 2, representing the starting and ending scales in the range. /// \param[in] shear_ranges A float vector of size 2 or 4, representing the starting and ending shear degrees /// vertically and horizontally. - /// if size is 2, (min_shear_x, max_shear_x, 0, 0) - /// if size is 4, (min_shear_x, max_shear_x, min_shear_y, max_shear_y) - /// \param[in] interpolation An enum for the mode of interpolation + /// If size is 2, (min_shear_x, max_shear_x, 0, 0), + /// if size is 4, (min_shear_x, max_shear_x, min_shear_y, max_shear_y). + /// \param[in] interpolation An enum for the mode of interpolation. /// \param[in] fill_value A vector representing the value to fill the area outside the transform /// in the output image. If 1 value is provided, it is used for all RGB channels. /// If 3 values are provided, it is used to fill R, G, B channels respectively. @@ -219,14 +219,14 @@ class RandomAffine final : public TensorTransform { }; /// \brief Resize TensorTransform. -/// \notes Resize the input image to the given size. +/// \note Resize the input image to the given size. class Resize final : public TensorTransform { public: /// \brief Constructor. /// \param[in] size A vector representing the output size of the resized image. /// If size is a single value, the image will be resized to this value with /// the same image aspect ratio. If size has 2 values, it should be (height, width). - /// \param[in] interpolation An enum for the mode of interpolation + /// \param[in] interpolation An enum for the mode of interpolation. explicit Resize(std::vector size, InterpolationMode interpolation = InterpolationMode::kLinear); /// \brief Destructor. @@ -245,7 +245,7 @@ class Resize final : public TensorTransform { }; /// \brief ResizePreserveAR TensorTransform. -/// \notes Keep the original picture ratio and fill the rest. +/// \note Keep the original picture ratio and fill the rest. class ResizePreserveAR final : public TensorTransform { public: /// \brief Constructor. @@ -268,7 +268,7 @@ class ResizePreserveAR final : public TensorTransform { }; /// \brief Rotate TensorTransform. -/// \notes Rotate the input image using a specified angle id. +/// \note Rotate the input image using a specified angle id. class Rotate final : public TensorTransform { public: /// \brief Constructor.