diff --git a/mindspore/mindrecord/__init__.py b/mindspore/mindrecord/__init__.py index ee23b68cb66..cb04b93d8b6 100644 --- a/mindspore/mindrecord/__init__.py +++ b/mindspore/mindrecord/__init__.py @@ -13,12 +13,12 @@ # limitations under the License. # ============================================================================== """ -Introduction to mindrecord: +Introduction of mindrecord: Mindrecord is a module to implement reading, writing, search and converting for MindSpore format dataset. Users could load(modify) mindrecord data through FileReader(FileWriter). Users could also -convert other format dataset to mindrecord data through +convert other format datasets to mindrecord data through corresponding sub-module. """ diff --git a/mindspore/mindrecord/filereader.py b/mindspore/mindrecord/filereader.py index 5c38bbc23d4..5869f87de53 100644 --- a/mindspore/mindrecord/filereader.py +++ b/mindspore/mindrecord/filereader.py @@ -29,10 +29,10 @@ class FileReader: Class to read MindRecord File series. Args: - file_name (str, list[str]): One of MindRecord File or file list. + file_name (str, list[str]): One of MindRecord File or a file list. num_consumer(int, optional): Number of consumer threads which load data to memory (default=4). - It should not be smaller than 1 or larger than the number of CPU. - columns (list[str], optional): List of fields which corresponding data would be read (default=None). + It should not be smaller than 1 or larger than the number of CPUs. + columns (list[str], optional): A list of fields where corresponding data would be read (default=None). operator(int, optional): Reserved parameter for operators (default=None). Raises: @@ -72,7 +72,7 @@ class FileReader: Yield a batch of data according to columns at a time. Yields: - dict: keys is the same as columns. + dictionary: keys are the same as columns. Raises: MRMUnsupportedSchemaError: If schema is invalid. diff --git a/mindspore/mindrecord/filewriter.py b/mindspore/mindrecord/filewriter.py index c2807bb6549..cfc9b3ec1e6 100644 --- a/mindspore/mindrecord/filewriter.py +++ b/mindspore/mindrecord/filewriter.py @@ -39,11 +39,11 @@ class FileWriter: Args: file_name (str): File name of MindRecord File. - shard_num (int, optional): Number of MindRecord File (default=1). + shard_num (int, optional): The Number of MindRecord File (default=1). It should be between [1, 1000]. Raises: - ParamValueError: If file_name or shard_num is invalid. + ParamValueError: If `file_name` or `shard_num` is invalid. """ def __init__(self, file_name, shard_num=1): check_filename(file_name) @@ -88,7 +88,7 @@ class FileWriter: Raises: ParamValueError: If file_name is invalid. - FileNameError: If path contains invalid character. + FileNameError: If path contains invalid characters. MRMOpenError: If failed to open MindRecord File. MRMOpenForAppendError: If failed to open file for appending data. """ @@ -111,14 +111,14 @@ class FileWriter: def add_schema(self, content, desc=None): """ - Returns a schema id if added schema successfully, or raise exception. + Return a schema id if schema is added successfully, or raise an exception. Args: - content (dict): Dict of user defined schema. + content (dict): Dictionary of user defined schema. desc (str, optional): String of schema description (default=None). Returns: - int, schema id. + An integer, schema id. Raises: MRMInvalidSchemaError: If schema is invalid. @@ -145,7 +145,7 @@ class FileWriter: ParamTypeError: If index field is invalid. MRMDefineIndexError: If index field is not primitive type. MRMAddIndexError: If failed to add index field. - MRMGetMetaError: If the schema is not set or get meta failed. + MRMGetMetaError: If the schema is not set or failed to get meta. """ if not index_fields or not isinstance(index_fields, list): raise ParamTypeError('index_fields', 'list') @@ -205,7 +205,7 @@ class FileWriter: def open_and_set_header(self): """ - Open writer and set header + Open writer and set header. """ if not self._writer.is_open: @@ -245,7 +245,7 @@ class FileWriter: """ Set the size of header which contains shard information, schema information, \ page meta information, etc. The larger the header, the more training data \ - a single mindrecord file can store. + a single Mindrecord file can store. Args: header_size (int): Size of header, between 16KB and 128MB. @@ -278,7 +278,7 @@ class FileWriter: def commit(self): """ - Flush data to disk and generate the corresponding db files. + Flush data to disk and generate the corresponding database files. Returns: MSRStatus, SUCCESS or FAILED. diff --git a/mindspore/mindrecord/mindpage.py b/mindspore/mindrecord/mindpage.py index 6b88ba85a6e..43811728471 100644 --- a/mindspore/mindrecord/mindpage.py +++ b/mindspore/mindrecord/mindpage.py @@ -28,12 +28,12 @@ class MindPage: Class to read MindRecord File series in pagination. Args: - file_name (str): One of MindRecord File or file list. - num_consumer(int, optional): Number of consumer threads which load data to memory (default=4). - It should not be smaller than 1 or larger than the number of CPU. + file_name (str): One of MindRecord File or a file list. + num_consumer(int, optional): The number of consumer threads which load data to memory (default=4). + It should not be smaller than 1 or larger than the number of CPUs. Raises: - ParamValueError: If file_name, num_consumer or columns is invalid. + ParamValueError: If `file_name`, `num_consumer` or columns is invalid. MRMInitSegmentError: If failed to initialize ShardSegment. """ def __init__(self, file_name, num_consumer=4): @@ -97,7 +97,7 @@ class MindPage: @property def category_field(self): - """Getter function for category field""" + """Getter function for category fields.""" return self._category_field @category_field.setter @@ -127,7 +127,7 @@ class MindPage: Query by category id in pagination. Args: - category_id (int): Category id, referred to the return of read_category_info. + category_id (int): Category id, referred to the return of `read_category_info`. page (int): Index of page. num_row (int): Number of rows in a page. @@ -153,7 +153,7 @@ class MindPage: Args: category_name (str): String of category field's value, - referred to the return of read_category_info. + referred to the return of `read_category_info`. page (int): Index of page. num_row (int): Number of row in a page. diff --git a/mindspore/mindrecord/tools/cifar100_to_mr.py b/mindspore/mindrecord/tools/cifar100_to_mr.py index fa2b94cd7dc..1c5f7458d87 100644 --- a/mindspore/mindrecord/tools/cifar100_to_mr.py +++ b/mindspore/mindrecord/tools/cifar100_to_mr.py @@ -35,7 +35,7 @@ __all__ = ['Cifar100ToMR'] class Cifar100ToMR: """ - Class is for transformation from cifar100 to MindRecord. + A class to transform from cifar100 to MindRecord. Args: source (str): the cifar100 directory to be transformed. @@ -71,10 +71,10 @@ class Cifar100ToMR: Executes transformation from cifar100 to MindRecord. Args: - fields (list[str]): list of index field, ie. ["fine_label", "coarse_label"]. + fields (list[str]): A list of index field, e.g.["fine_label", "coarse_label"]. Returns: - SUCCESS/FAILED, whether successfully written into MindRecord. + SUCCESS or FAILED, whether cifar100 is successfully transformed to MindRecord. """ if fields and not isinstance(fields, list): raise ValueError("The parameter fields should be None or list") diff --git a/mindspore/mindrecord/tools/cifar10_to_mr.py b/mindspore/mindrecord/tools/cifar10_to_mr.py index ff704249fd6..118d56ca991 100644 --- a/mindspore/mindrecord/tools/cifar10_to_mr.py +++ b/mindspore/mindrecord/tools/cifar10_to_mr.py @@ -34,7 +34,7 @@ __all__ = ['Cifar10ToMR'] class Cifar10ToMR: """ - Class is for transformation from cifar10 to MindRecord. + A class to transform from cifar10 to MindRecord. Args: source (str): the cifar10 directory to be transformed. @@ -70,10 +70,10 @@ class Cifar10ToMR: Executes transformation from cifar10 to MindRecord. Args: - fields (list[str], optional): list of index fields, ie. ["label"] (default=None). + fields (list[str], optional): A list of index fields, e.g.["label"] (default=None). Returns: - SUCCESS/FAILED, whether successfully written into MindRecord. + SUCCESS or FAILED, whether cifar10 is successfully transformed to MindRecord. """ if fields and not isinstance(fields, list): raise ValueError("The parameter fields should be None or list") diff --git a/mindspore/mindrecord/tools/csv_to_mr.py b/mindspore/mindrecord/tools/csv_to_mr.py index 574d3dd30f9..5b2fa5ec9dd 100644 --- a/mindspore/mindrecord/tools/csv_to_mr.py +++ b/mindspore/mindrecord/tools/csv_to_mr.py @@ -31,17 +31,17 @@ __all__ = ['CsvToMR'] class CsvToMR: """ - Class is for transformation from csv to MindRecord. + A class to transform from csv to MindRecord. Args: source (str): the file path of csv. destination (str): the MindRecord file path to transform into. - columns_list(list[str], optional): List of columns to be read(default=None). + columns_list(list[str], optional): A list of columns to be read(default=None). partition_number (int, optional): partition size (default=1). Raises: - ValueError: If source, destination, partition_number is invalid. - RuntimeError: If columns_list is invalid. + ValueError: If `source`, `destination`, `partition_number` is invalid. + RuntimeError: If `columns_list` is invalid. """ def __init__(self, source, destination, columns_list=None, partition_number=1): @@ -121,7 +121,7 @@ class CsvToMR: Executes transformation from csv to MindRecord. Returns: - SUCCESS/FAILED, whether successfully written into MindRecord. + SUCCESS or FAILED, whether csv is successfully transformed to MindRecord. """ if not os.path.exists(self.source): raise IOError("Csv file {} do not exist.".format(self.source)) diff --git a/mindspore/mindrecord/tools/imagenet_to_mr.py b/mindspore/mindrecord/tools/imagenet_to_mr.py index 411985d2a89..b7f8d145463 100644 --- a/mindspore/mindrecord/tools/imagenet_to_mr.py +++ b/mindspore/mindrecord/tools/imagenet_to_mr.py @@ -28,11 +28,10 @@ __all__ = ['ImageNetToMR'] class ImageNetToMR: """ - Class is for transformation from imagenet to MindRecord. + A class to transform from imagenet to MindRecord. Args: - map_file (str): the map file which indicates label. - the map file content should like this: + map_file (str): the map file that indicates label. The map file content should be like this: .. code-block:: @@ -41,12 +40,12 @@ class ImageNetToMR: n02110185 2 n02096294 3 - image_dir (str): image directory contains n02119789, n02100735, n02110185, n02096294 dir. + image_dir (str): image directory contains n02119789, n02100735, n02110185 and n02096294 directory. destination (str): the MindRecord file path to transform into. partition_number (int, optional): partition size (default=1). Raises: - ValueError: If map_file, image_dir or destination is invalid. + ValueError: If `map_file`, `image_dir` or `destination` is invalid. """ def __init__(self, map_file, image_dir, destination, partition_number=1): check_filename(map_file) @@ -123,7 +122,7 @@ class ImageNetToMR: Executes transformation from imagenet to MindRecord. Returns: - SUCCESS/FAILED, whether successfully written into MindRecord. + SUCCESS or FAILED, whether imagenet is successfully transformed to MindRecord. """ t0_total = time.time() diff --git a/mindspore/mindrecord/tools/mnist_to_mr.py b/mindspore/mindrecord/tools/mnist_to_mr.py index 9014143a128..5c3d5783fe4 100644 --- a/mindspore/mindrecord/tools/mnist_to_mr.py +++ b/mindspore/mindrecord/tools/mnist_to_mr.py @@ -34,17 +34,17 @@ __all__ = ['MnistToMR'] class MnistToMR: """ - Class is for transformation from Mnist to MindRecord. + A class to transform from Mnist to MindRecord. Args: - source (str): directory which contains t10k-images-idx3-ubyte.gz, - train-images-idx3-ubyte.gz, t10k-labels-idx1-ubyte.gz, - train-labels-idx1-ubyte.gz. + source (str): directory that contains t10k-images-idx3-ubyte.gz, + train-images-idx3-ubyte.gz, t10k-labels-idx1-ubyte.gz + and train-labels-idx1-ubyte.gz. destination (str): the MindRecord file directory to transform into. partition_number (int, optional): partition size (default=1). Raises: - ValueError: If source/destination/partition_number is invalid. + ValueError: If `source`, `destination`, `partition_number` is invalid. """ def __init__(self, source, destination, partition_number=1): @@ -173,7 +173,7 @@ class MnistToMR: Executes transformation from Mnist test part to MindRecord. Returns: - SUCCESS/FAILED, whether successfully written into MindRecord. + SUCCESS or FAILED, whether Mnist is successfully transformed to MindRecord. """ t0_total = time.time() diff --git a/mindspore/mindrecord/tools/tfrecord_to_mr.py b/mindspore/mindrecord/tools/tfrecord_to_mr.py index ce5b8400f01..2df5da29fec 100644 --- a/mindspore/mindrecord/tools/tfrecord_to_mr.py +++ b/mindspore/mindrecord/tools/tfrecord_to_mr.py @@ -99,25 +99,25 @@ def _cast_name(key): class TFRecordToMR: """ - Class is for tranformation from TFRecord to MindRecord. + A class to transform from TFRecord to MindRecord. Args: source (str): the TFRecord file to be transformed. destination (str): the MindRecord file path to tranform into. - feature_dict (dict): a dictionary that states the feature type, i.e. + feature_dict (dict): a dictionary that states the feature type, e.g. feature_dict = {"xxxx": tf.io.FixedLenFeature([], tf.string), \ "yyyy": tf.io.FixedLenFeature([], tf.int64)} - **Follow case which uses VarLenFeature not support** + **Follow case which uses VarLenFeature is not supported.** feature_dict = {"context": {"xxxx": tf.io.FixedLenFeature([], tf.string), \ "yyyy": tf.io.VarLenFeature(tf.int64)}, \ "sequence": {"zzzz": tf.io.FixedLenSequenceFeature([], tf.float32)}} - bytes_fields (list, optional): the bytes fields which are in feature_dict and can be images bytes. + bytes_fields (list, optional): the bytes fields which are in `feature_dict` and can be images bytes. Raises: ValueError: If parameter is invalid. - Exception: when tensorflow module not found or version is not correct. + Exception: when tensorflow module is not found or version is not correct. """ def __init__(self, source, destination, feature_dict, bytes_fields=None): if not tf: @@ -211,7 +211,7 @@ class TFRecordToMR: ms_dict[cast_key] = float(val.numpy()) def tfrecord_iterator(self): - """Yield a dict with key to be fields in schema, and value to be data.""" + """Yield a dictionary whose keys are fields in schema.""" dataset = tf.data.TFRecordDataset(self.source) dataset = dataset.map(self._parse_record) iterator = dataset.__iter__() @@ -237,10 +237,10 @@ class TFRecordToMR: def run(self): """ - Executes transform from TFRecord to MindRecord. + Execute transformation from TFRecord to MindRecord. Returns: - SUCCESS/FAILED, whether successfuly written into MindRecord. + SUCCESS or FAILED, whether TFRecord is successfuly transformed to MindRecord. """ writer = FileWriter(self.destination) logger.info("Transformed MindRecord schema is: {}, TFRecord feature dict is: {}"