From 0df7e0edf8e10d680142ac48b3038e237006f1ee Mon Sep 17 00:00:00 2001 From: liyong Date: Sat, 26 Mar 2022 16:37:40 +0800 Subject: [PATCH] fix chn api doc --- .../mindspore.dataset.OBSMindDataset.rst | 5 ++--- .../mindspore.mindrecord.Cifar100ToMR.rst | 12 +++++------ .../mindspore.mindrecord.Cifar10ToMR.rst | 10 +++++----- .../mindspore.mindrecord.CsvToMR.rst | 14 ++++++++----- .../mindspore.mindrecord.FileWriter.rst | 14 ++++++------- .../mindspore.mindrecord.ImageNetToMR.rst | 12 +++++++---- .../mindspore.mindrecord.MindPage.rst | 2 +- .../mindspore.mindrecord.MnistToMR.rst | 8 ++++++-- .../mindspore.mindrecord.TFRecordToMR.rst | 15 +++++++++----- docs/api/api_python/mindspore.mindrecord.rst | 2 +- .../engine/obs/obs_mindrecord_dataset.py | 2 +- .../python/mindspore/mindrecord/mindpage.py | 2 +- .../mindrecord/tools/cifar100_to_mr.py | 11 +++++----- .../mindrecord/tools/cifar10_to_mr.py | 14 ++++++++----- .../mindspore/mindrecord/tools/csv_to_mr.py | 14 ++++++++----- .../mindrecord/tools/imagenet_to_mr.py | 17 ++++++++++------ .../mindspore/mindrecord/tools/mnist_to_mr.py | 15 +++++++++----- .../mindrecord/tools/tfrecord_to_mr.py | 20 ++++++++++++------- 18 files changed, 115 insertions(+), 74 deletions(-) diff --git a/docs/api/api_python/dataset/mindspore.dataset.OBSMindDataset.rst b/docs/api/api_python/dataset/mindspore.dataset.OBSMindDataset.rst index b3dee6204c5..25cdd810b10 100644 --- a/docs/api/api_python/dataset/mindspore.dataset.OBSMindDataset.rst +++ b/docs/api/api_python/dataset/mindspore.dataset.OBSMindDataset.rst @@ -1,15 +1,14 @@ mindspore.dataset.OBSMindDataset ================================ -.. py:class:: mindspore.dataset.OBSMindDataset(dataset_files, server, ak, sk, sync_obs_path, columns_list=None, - shuffle=Shuffle.GLOBAL, num_shards=None, shard_id=None, shard_equal_rows=True) +.. py:class:: mindspore.dataset.OBSMindDataset(dataset_files, server, ak, sk, sync_obs_path, columns_list=None, shuffle=Shuffle.GLOBAL, num_shards=None, shard_id=None, shard_equal_rows=True) 读取和解析存放在OBS上的MindRecord格式数据集。生成的数据集的列名和列类型取决于MindRecord文件中的保存的列名与类型。 **参数:** - **dataset_files** (list[str]) - OBS上MindRecord格式数据集文件的路径列表,每个文件的路径前缀为s3://。 - - **server** (str) - 连接OBS的服务地址。可包含协议类型、域名、端口号。示例: 。 + - **server** (str) - 连接OBS的服务地址。可包含协议类型、域名、端口号。示例:。 - **ak** (str) - 访问密钥中的AK。 - **sk** (str) - 访问密钥中的SK。 - **sync_obs_path** (str) - 用于同步操作的OBS路径,用户需要提前创建,目录路径的前缀为s3://。 diff --git a/docs/api/api_python/mindrecord/mindspore.mindrecord.Cifar100ToMR.rst b/docs/api/api_python/mindrecord/mindspore.mindrecord.Cifar100ToMR.rst index ec33db3cae4..8bd1d341cce 100644 --- a/docs/api/api_python/mindrecord/mindspore.mindrecord.Cifar100ToMR.rst +++ b/docs/api/api_python/mindrecord/mindspore.mindrecord.Cifar100ToMR.rst @@ -8,12 +8,12 @@ **参数:** - - **source** (str) - 待转换的CIFAR-100数据集文件的目录路径。 - - **destination** (str) - 转换生成的MindRecord文件路径。 + - **source** (str) - 待转换的CIFAR-100数据集文件所在目录的路径。 + - **destination** (str) - 转换生成的MindRecord文件路径,需提前创建目录并且目录下不能存在同名文件。 **异常:** - - **ValueError** - `source` 或 `destination` 无效。 + - **ValueError** - 参数 `source` 或 `destination` 无效。 .. py:method:: run(fields=None) @@ -27,12 +27,12 @@ **返回:** - MSRStatus,CIFAR-100数据集是否成功转换为MindRecord格式数据集。 + MSRStatus,SUCCESS或FAILED。 .. py:method:: transform(fields=None) - :func:`mindspore.mindrecord.Cifar100ToMR.run` 函数的包装函数来保证异常时正常退出。 + :func:`mindspore.mindrecord.Cifar100ToMR.run` 的包装函数来保证异常时正常退出。 **参数:** @@ -41,4 +41,4 @@ **返回:** - MSRStatus,CIFAR-100数据集是否成功转换为MindRecord格式数据集。 + MSRStatus,SUCCESS或FAILED。 diff --git a/docs/api/api_python/mindrecord/mindspore.mindrecord.Cifar10ToMR.rst b/docs/api/api_python/mindrecord/mindspore.mindrecord.Cifar10ToMR.rst index 2982a25f264..833c4f82257 100644 --- a/docs/api/api_python/mindrecord/mindspore.mindrecord.Cifar10ToMR.rst +++ b/docs/api/api_python/mindrecord/mindspore.mindrecord.Cifar10ToMR.rst @@ -8,8 +8,8 @@ **参数:** - - **source** (str) - 待转换的CIFAR-10数据集文件的目录路径。 - - **destination** (str) - 转换生成的MindRecord文件路径。 + - **source** (str) - 待转换的CIFAR-10数据集文件所在目录的路径。 + - **destination** (str) - 转换生成的MindRecord文件路径,需提前创建目录并且目录下不能存在同名文件。 **异常:** @@ -27,12 +27,12 @@ **返回:** - MSRStatus,CIFAR-10数据集是否成功转换为MindRecord格式数据集。 + MSRStatus,SUCCESS或FAILED。 .. py:method:: transform(fields=None) - :func:`mindspore.mindrecord.Cifar10ToMR.run` 函数的包装函数来保证异常时正常退出。 + :func:`mindspore.mindrecord.Cifar10ToMR.run` 的包装函数来保证异常时正常退出。 **参数:** @@ -41,5 +41,5 @@ **返回:** - MSRStatus,CIFAR-10数据集是否成功转换为MindRecord格式数据集。 + MSRStatus,SUCCESS或FAILED。 diff --git a/docs/api/api_python/mindrecord/mindspore.mindrecord.CsvToMR.rst b/docs/api/api_python/mindrecord/mindspore.mindrecord.CsvToMR.rst index 81ea4405962..6f6c2313b0e 100644 --- a/docs/api/api_python/mindrecord/mindspore.mindrecord.CsvToMR.rst +++ b/docs/api/api_python/mindrecord/mindspore.mindrecord.CsvToMR.rst @@ -9,14 +9,14 @@ **参数:** - **source** (str) - 待转换的CSV文件路径。 - - **destination** (str) - 转换生成的MindRecord文件路径。 + - **destination** (str) - 转换生成的MindRecord文件路径,需提前创建目录并且目录下不能存在同名文件。 - **columns_list** (list[str],可选) - CSV中待读取数据列的列表。默认值:None,读取所有的数据列。 - **partition_number** (int,可选) - 生成MindRecord的文件个数。默认值:1。 **异常:** - - **ValueError** - `source` 、`destination` 、`partition_number` 无效。 - - **RuntimeError** - `columns_list` 无效。 + - **ValueError** - 参数 `source` 、`destination` 、`partition_number` 无效。 + - **RuntimeError** - 参数 `columns_list` 无效。 .. py:method:: run() @@ -25,9 +25,13 @@ **返回:** - MSRStatus,CSV数据集是否成功转换为MindRecord格式数据集。 + MSRStatus,SUCCESS或FAILED。 .. py:method:: transform() - :func:`mindspore.mindrecord.CsvToMR.run` 函数的包装函数来保证异常时正常退出。 + :func:`mindspore.mindrecord.CsvToMR.run` 的包装函数来保证异常时正常退出。 + + **返回:** + + MSRStatus,SUCCESS或FAILED。 diff --git a/docs/api/api_python/mindrecord/mindspore.mindrecord.FileWriter.rst b/docs/api/api_python/mindrecord/mindspore.mindrecord.FileWriter.rst index 057610b70a9..21391239d8b 100644 --- a/docs/api/api_python/mindrecord/mindspore.mindrecord.FileWriter.rst +++ b/docs/api/api_python/mindrecord/mindspore.mindrecord.FileWriter.rst @@ -19,12 +19,12 @@ .. py:method:: add_index(index_fields) - 指定schema中的字段作为索引来加速MindRecord文件的读取。schema可以通过 `add_schema` 通过来添加。 + 指定schema中的字段作为索引来加速MindRecord文件的读取。schema可以通过 `add_schema` 来添加。 .. note:: - 索引字段应为Primitive类型,例如 `int` 、`float` 、`str` 。 - 如果不调用该函数,则默认将schema中所有的Primitive类型的字段设置为索引。 - 请参考类的示例:`mindspore.mindrecord.FileWriter`。 + 请参考类的示例 :class:`mindspore.mindrecord.FileWriter` 。 **参数:** @@ -47,7 +47,7 @@ 增加描述用户自定义数据的schema。 .. note:: - 请参考类的示例:`mindspore.mindrecord.FileWriter`。 + 请参考类的示例 :class:`mindspore.mindrecord.FileWriter` 。 **参数:** @@ -70,7 +70,7 @@ 将内存中的数据同步到磁盘,并生成相应的数据库文件。 .. note:: - 请参考类的示例:`mindspore.mindrecord.FileWriter`。 + 请参考类的示例 :class:`mindspore.mindrecord.FileWriter` 。 **返回:** @@ -126,7 +126,7 @@ **参数:** - - **header_size** (int) - header大小,可设置范围为16*1024(16KB)和128*1024*1024(128MB)。 + - **header_size** (int) - header大小,可设置范围为16*1024(16KB)到128*1024*1024(128MB)。 **返回:** @@ -144,7 +144,7 @@ **参数:** - - **page_size** (int) - page大小,可设置范围为32*1024(32KB)和256*1024*1024(256MB)。 + - **page_size** (int) - page大小,可设置范围为32*1024(32KB)到256*1024*1024(256MB)。 **返回:** @@ -161,7 +161,7 @@ 根据schema校验用户自定义数据后,将数据转换为一系列连续的MindRecord格式的数据集文件。 .. note:: - 请参考类的示例:`mindspore.mindrecord.FileWriter`。 + 请参考类的示例 :class:`mindspore.mindrecord.FileWriter` 。 **参数:** diff --git a/docs/api/api_python/mindrecord/mindspore.mindrecord.ImageNetToMR.rst b/docs/api/api_python/mindrecord/mindspore.mindrecord.ImageNetToMR.rst index 91a00f14b45..326c5ea1a3f 100644 --- a/docs/api/api_python/mindrecord/mindspore.mindrecord.ImageNetToMR.rst +++ b/docs/api/api_python/mindrecord/mindspore.mindrecord.ImageNetToMR.rst @@ -18,12 +18,12 @@ n02096294 3 - **image_dir** (str) - ImageNet数据集的目录路径,目录中包含类似n02119789、n02100735、n02110185和n02096294的子目录。 - - **destination** (str) - 转换生成的MindRecord文件路径 + - **destination** (str) - 转换生成的MindRecord文件路径,需提前创建目录并且目录下不能存在同名文件。 - **partition_number** (int,可选) - 生成MindRecord的文件个数。默认值:1。 **异常:** - - **ValueError** - `map_file` 、`image_dir` 或 `destination` 无效。 + - **ValueError** - 参数 `map_file` 、`image_dir` 或 `destination` 无效。 .. py:method:: run() @@ -31,9 +31,13 @@ **返回:** - MSRStatus,ImageNet数据集是否成功转换为MindRecord格式数据集。 + MSRStatus,SUCCESS或FAILED。 .. py:method:: transform() - :func:`mindspore.mindrecord.ImageNetToMR.run` 函数的包装函数来保证异常时正常退出。 + :func:`mindspore.mindrecord.ImageNetToMR.run` 的包装函数来保证异常时正常退出。 + + **返回:** + + MSRStatus,SUCCESS或FAILED。 diff --git a/docs/api/api_python/mindrecord/mindspore.mindrecord.MindPage.rst b/docs/api/api_python/mindrecord/mindspore.mindrecord.MindPage.rst index 56dfcbc79a1..ade24b33124 100644 --- a/docs/api/api_python/mindrecord/mindspore.mindrecord.MindPage.rst +++ b/docs/api/api_python/mindrecord/mindspore.mindrecord.MindPage.rst @@ -5,7 +5,7 @@ **参数:** - - **file_name** (str) - MindRecord格式的数据集文件或文件列表。 + - **file_name** (Union[str, list[str]]) - MindRecord格式的数据集文件或文件列表。 - **num_consumer** (int,可选) - 加载数据的并发数。默认值:4。不应小于1或大于处理器的核数。 **异常:** diff --git a/docs/api/api_python/mindrecord/mindspore.mindrecord.MnistToMR.rst b/docs/api/api_python/mindrecord/mindspore.mindrecord.MnistToMR.rst index e3a4f1b6371..fcef5ec2351 100644 --- a/docs/api/api_python/mindrecord/mindspore.mindrecord.MnistToMR.rst +++ b/docs/api/api_python/mindrecord/mindspore.mindrecord.MnistToMR.rst @@ -6,12 +6,12 @@ **参数:** - **source** (str) - 包含t10k-images-idx3-ubyte.gz、train-images-idx3-ubyte.gz、t10k-labels-idx1-ubyte.gz和train-labels-idx1-ubyte.gz数据集文件的目录路径。 - - **destination** (str) - 转换生成的MindRecord文件路径。 + - **destination** (str) - 转换生成的MindRecord文件路径,需提前创建目录并且目录下不能存在同名文件。 - **partition_number** (int,可选) - 生成MindRecord的文件个数。默认值:1。 **异常:** - - **ValueError** - `source` 、 `destination` 、 `partition_number` 无效。 + - **ValueError** - 参数 `source` 、 `destination` 、 `partition_number` 无效。 .. py:method:: run() @@ -26,3 +26,7 @@ .. py:method:: transform() :func:`mindspore.mindrecord.MnistToMR.run` 函数的包装函数来保证异常时正常退出。 + + **返回:** + + MSRStatus,SUCCESS或FAILED。 diff --git a/docs/api/api_python/mindrecord/mindspore.mindrecord.TFRecordToMR.rst b/docs/api/api_python/mindrecord/mindspore.mindrecord.TFRecordToMR.rst index 18e08573f9d..aec5549e56d 100644 --- a/docs/api/api_python/mindrecord/mindspore.mindrecord.TFRecordToMR.rst +++ b/docs/api/api_python/mindrecord/mindspore.mindrecord.TFRecordToMR.rst @@ -9,9 +9,10 @@ **参数:** - **source** (str) - 待转换的TFRecord文件路径。 - - **destination** (str) - 转换生成的MindRecord文件路径。 - - **feature_dict** (dict) - TFRecord的feature类别的字典,不支持 `VarLenFeature` 类别。 - - **bytes_fields** (list,可选) - `feature_dict` 中的字节字段,可以为字节类型的图像字段。 + - **destination** (str) - 转换生成的MindRecord文件路径,需提前创建目录并且目录下不能存在同名文件。 + - **feature_dict** (dict[str, `FixedLenFeature `_ ]) - TFRecord的feature类别的字典, + 不支持 `VarLenFeature `_ 类别。 + - **bytes_fields** (list[str],可选) - `feature_dict` 中的字节字段,可以为字节类型的图像字段。 **异常:** @@ -25,7 +26,7 @@ **返回:** - MSRStatus,TFRecord格式的数据集是否成功转换为MindRecord格式数据集。 + MSRStatus,SUCCESS或FAILED。 .. py:method:: tfrecord_iterator() @@ -48,4 +49,8 @@ .. py:method:: transform() - :func:`mindspore.mindrecord.TFRecordToMR.run` 函数的包装函数来保证异常时正常退出。 + :func:`mindspore.mindrecord.TFRecordToMR.run` 的包装函数来保证异常时正常退出。 + + **返回:** + + MSRStatus,SUCCESS或FAILED。 diff --git a/docs/api/api_python/mindspore.mindrecord.rst b/docs/api/api_python/mindspore.mindrecord.rst index 3a6420e7703..02a4ff6f6da 100644 --- a/docs/api/api_python/mindspore.mindrecord.rst +++ b/docs/api/api_python/mindspore.mindrecord.rst @@ -22,4 +22,4 @@ mindspore.mindrecord .. include:: mindrecord/mindspore.mindrecord.TFRecordToMR.rst .. automodule:: mindspore.mindrecord - :members: \ No newline at end of file + :members: diff --git a/mindspore/python/mindspore/dataset/engine/obs/obs_mindrecord_dataset.py b/mindspore/python/mindspore/dataset/engine/obs/obs_mindrecord_dataset.py index cbd48739143..4289e47abf0 100644 --- a/mindspore/python/mindspore/dataset/engine/obs/obs_mindrecord_dataset.py +++ b/mindspore/python/mindspore/dataset/engine/obs/obs_mindrecord_dataset.py @@ -170,7 +170,7 @@ def _download_work(shard_id, current_idx, local_path, cache, q): used_disk = get_used_disk_per() while used_disk > float(config.DISK_THRESHOLD): logger.info("[{} FUNCTION] Used disk space is {}%, and the disk threshold is {}%.".format( - sys._getframe().f_code.co_name, used_disk*100, config.DISK_THRESHOLD*100)) # pylint: disable=W0212 + sys._getframe().f_code.co_name, used_disk*100, float(config.DISK_THRESHOLD)*100)) # pylint: disable=W0212 retry_cnt = 0 has_deleted = _delete_candidate_datasets( current_idx.value, idx, cache, q, local_path) diff --git a/mindspore/python/mindspore/mindrecord/mindpage.py b/mindspore/python/mindspore/mindrecord/mindpage.py index 4cbf036480d..765257987ca 100644 --- a/mindspore/python/mindspore/mindrecord/mindpage.py +++ b/mindspore/python/mindspore/mindrecord/mindpage.py @@ -29,7 +29,7 @@ class MindPage: Class to read MindRecord files in pagination. Args: - file_name (str): One of MindRecord files or a file list. + file_name (Union[str, list[str]]): One of MindRecord files or a file list. num_consumer(int, optional): The number of reader workers which load data. Default: 4. It should not be smaller than 1 or larger than the number of processor cores. diff --git a/mindspore/python/mindspore/mindrecord/tools/cifar100_to_mr.py b/mindspore/python/mindspore/mindrecord/tools/cifar100_to_mr.py index ffeffad4e80..d7f13415028 100644 --- a/mindspore/python/mindspore/mindrecord/tools/cifar100_to_mr.py +++ b/mindspore/python/mindspore/mindrecord/tools/cifar100_to_mr.py @@ -43,8 +43,9 @@ class Cifar100ToMR: www.mindspore.cn/docs/programming_guide/en/master/dataset_conversion.html#converting-the-cifar-10-dataset>`_. Args: - source (str): the cifar100 directory to be transformed. - destination (str): the MindRecord file path to transform into. + source (str): The cifar100 directory to be transformed. + destination (str): MindRecord file path to transform into, ensure that no file with the same name + exists in the directory. Raises: ValueError: If source or destination is invalid. @@ -80,7 +81,7 @@ class Cifar100ToMR: fields (list[str]): A list of index field, e.g.["fine_label", "coarse_label"]. Default: None. Returns: - MSRStatus, whether cifar100 is successfully transformed to MindRecord. + MSRStatus, SUCCESS or FAILED. """ if fields and not isinstance(fields, list): raise ValueError("The parameter fields should be None or list") @@ -119,7 +120,7 @@ class Cifar100ToMR: fields (list[str]): A list of index field, e.g.["fine_label", "coarse_label"]. Default: None. Returns: - MSRStatus, whether cifar100 is successfully transformed to MindRecord. + MSRStatus, SUCCESS or FAILED. """ t = ExceptionThread(target=self.run, kwargs={'fields': fields}) @@ -171,7 +172,7 @@ def _generate_mindrecord(file_name, raw_data, fields, schema_desc): schema_desc (str): String of schema description. Returns: - MSRStatus, whether successfully written into MindRecord. + MSRStatus, SUCCESS or FAILED. """ schema = {"id": {"type": "int64"}, "fine_label": {"type": "int64"}, "coarse_label": {"type": "int64"}, "data": {"type": "bytes"}} diff --git a/mindspore/python/mindspore/mindrecord/tools/cifar10_to_mr.py b/mindspore/python/mindspore/mindrecord/tools/cifar10_to_mr.py index 5935fee668b..a94670c3b37 100644 --- a/mindspore/python/mindspore/mindrecord/tools/cifar10_to_mr.py +++ b/mindspore/python/mindspore/mindrecord/tools/cifar10_to_mr.py @@ -43,8 +43,9 @@ class Cifar10ToMR: www.mindspore.cn/docs/programming_guide/en/master/dataset_conversion.html#converting-the-cifar-10-dataset>`_. Args: - source (str): the cifar10 directory to be transformed. - destination (str): the MindRecord file path to transform into. + source (str): The cifar10 directory to be transformed. + destination (str): MindRecord file path to transform into, ensure that no file with the same name + exists in the directory. Raises: ValueError: If source or destination is invalid. @@ -80,8 +81,9 @@ class Cifar10ToMR: fields (list[str], optional): A list of index fields. Default: None. Returns: - MSRStatus, whether cifar10 is successfully transformed to MindRecord. + MSRStatus, SUCCESS or FAILED. """ + if fields and not isinstance(fields, list): raise ValueError("The parameter fields should be None or list") @@ -115,7 +117,7 @@ class Cifar10ToMR: fields (list[str], optional): A list of index fields. Default: None. Returns: - MSRStatus, whether cifar10 is successfully transformed to MindRecord. + MSRStatus, SUCCESS or FAILED. """ t = ExceptionThread(target=self.run, kwargs={'fields': fields}) @@ -138,6 +140,7 @@ def _construct_raw_data(images, labels): Returns: list[dict], data dictionary constructed from cifar10. """ + if not cv2: raise ModuleNotFoundError("opencv-python module not found, please use pip install it.") @@ -164,8 +167,9 @@ def _generate_mindrecord(file_name, raw_data, fields, schema_desc): schema_desc (str): String of schema description. Returns: - MSRStatus, whether successfully written into MindRecord. + MSRStatus, SUCCESS or FAILED. """ + schema = {"id": {"type": "int64"}, "label": {"type": "int64"}, "data": {"type": "bytes"}} diff --git a/mindspore/python/mindspore/mindrecord/tools/csv_to_mr.py b/mindspore/python/mindspore/mindrecord/tools/csv_to_mr.py index 9d3579bbe60..2e15c34237d 100644 --- a/mindspore/python/mindspore/mindrecord/tools/csv_to_mr.py +++ b/mindspore/python/mindspore/mindrecord/tools/csv_to_mr.py @@ -39,10 +39,11 @@ class CsvToMR: www.mindspore.cn/docs/programming_guide/en/master/dataset_conversion.html#converting-csv-dataset>`_. Args: - source (str): the file path of csv. - destination (str): the MindRecord file path to transform into. + source (str): The file path of csv. + destination (str): The MindRecord file path to transform into, ensure that no file with the same name + exists in the directory. columns_list(list[str], optional): A list of columns to be read. Default: None. - partition_number (int, optional): partition size, Default: 1. + partition_number (int, optional): The partition size, Default: 1. Raises: ValueError: If `source`, `destination`, `partition_number` is invalid. @@ -130,7 +131,7 @@ class CsvToMR: Execute transformation from csv to MindRecord. Returns: - MSRStatus, whether csv is successfully transformed to MindRecord. + MSRStatus, SUCCESS or FAILED. """ if not os.path.exists(self.source): raise IOError("Csv file {} do not exist.".format(self.source)) @@ -178,7 +179,10 @@ class CsvToMR: def transform(self): """ - Encapsulate the run function to exit normally + Encapsulate the run function to exit normally. + + Returns: + MSRStatus, SUCCESS or FAILED. """ t = ExceptionThread(target=self.run) diff --git a/mindspore/python/mindspore/mindrecord/tools/imagenet_to_mr.py b/mindspore/python/mindspore/mindrecord/tools/imagenet_to_mr.py index b5bf3ba1525..b5ecd512aca 100644 --- a/mindspore/python/mindspore/mindrecord/tools/imagenet_to_mr.py +++ b/mindspore/python/mindspore/mindrecord/tools/imagenet_to_mr.py @@ -35,7 +35,7 @@ class ImageNetToMR: www.mindspore.cn/docs/programming_guide/en/master/dataset_conversion.html#converting-the-imagenet-dataset>`_. Args: - map_file (str): the map file that indicates label. The map file content should be like this: + map_file (str): The map file that indicates label. The map file content should be like this: .. code-block:: @@ -44,9 +44,10 @@ class ImageNetToMR: n02110185 2 n02096294 3 - image_dir (str): image directory contains n02119789, n02100735, n02110185 and n02096294 directory. - destination (str): the MindRecord file path to transform into. - partition_number (int, optional): partition size. Default: 1. + image_dir (str): Image directory contains n02119789, n02100735, n02110185 and n02096294 directory. + destination (str): MindRecord file path to transform into, ensure that no file with the same name + exists in the directory. + partition_number (int, optional): The partition size. Default: 1. Raises: ValueError: If `map_file`, `image_dir` or `destination` is invalid. @@ -129,8 +130,9 @@ class ImageNetToMR: Execute transformation from imagenet to MindRecord. Returns: - MSRStatus, whether imagenet is successfully transformed to MindRecord. + MSRStatus, SUCCESS or FAILED. """ + t0_total = time.time() imagenet_schema_json = {"label": {"type": "int32"}, @@ -179,7 +181,10 @@ class ImageNetToMR: def transform(self): """ - Encapsulate the run function to exit normally + Encapsulate the run function to exit normally. + + Returns: + MSRStatus, SUCCESS or FAILED. """ t = ExceptionThread(target=self.run) diff --git a/mindspore/python/mindspore/mindrecord/tools/mnist_to_mr.py b/mindspore/python/mindspore/mindrecord/tools/mnist_to_mr.py index 3a341c9ae29..dc0b1b406ae 100644 --- a/mindspore/python/mindspore/mindrecord/tools/mnist_to_mr.py +++ b/mindspore/python/mindspore/mindrecord/tools/mnist_to_mr.py @@ -38,11 +38,12 @@ class MnistToMR: A class to transform from Mnist to MindRecord. Args: - source (str): directory that contains t10k-images-idx3-ubyte.gz, + source (str): Directory that contains t10k-images-idx3-ubyte.gz, train-images-idx3-ubyte.gz, t10k-labels-idx1-ubyte.gz and train-labels-idx1-ubyte.gz. - destination (str): the MindRecord file directory to transform into. - partition_number (int, optional): partition size. Default: 1. + destination (str): MindRecord file path to transform into, ensure that no file with the same name + exists in the directory. + partition_number (int, optional): The partition size. Default: 1. Raises: ValueError: If `source`, `destination`, `partition_number` is invalid. @@ -225,8 +226,9 @@ class MnistToMR: Execute transformation from Mnist to MindRecord. Returns: - MSRStatus, whether successfully written into MindRecord. + MSRStatus, SUCCESS or FAILED. """ + if not cv2: raise ModuleNotFoundError("opencv-python module not found, please use pip install it.") @@ -239,7 +241,10 @@ class MnistToMR: def transform(self): """ - Encapsulate the run function to exit normally + Encapsulate the run function to exit normally. + + Returns: + MSRStatus, SUCCESS or FAILED. """ t = ExceptionThread(target=self.run) diff --git a/mindspore/python/mindspore/mindrecord/tools/tfrecord_to_mr.py b/mindspore/python/mindspore/mindrecord/tools/tfrecord_to_mr.py index 92c85fee123..b4074a69e33 100644 --- a/mindspore/python/mindspore/mindrecord/tools/tfrecord_to_mr.py +++ b/mindspore/python/mindspore/mindrecord/tools/tfrecord_to_mr.py @@ -72,11 +72,14 @@ class TFRecordToMR: www.mindspore.cn/docs/programming_guide/en/master/dataset_conversion.html#converting-tfrecord-dataset>`_. Args: - source (str): the TFRecord file to be transformed. - destination (str): the MindRecord file path to transform into. - feature_dict (dict): a dictionary that states the feature type, and - `VarLenFeature` is not supported. - bytes_fields (list, optional): the bytes fields which are in `feature_dict` and can be images bytes. + source (str): TFRecord file to be transformed. + destination (str): MindRecord file path to transform into, ensure that no file with the same name + exists in the directory. + feature_dict (dict[str, `FixedLenFeature + `_ ]): Dictionary that states + the feature type, and `VarLenFeature `_ + is not supported. + bytes_fields (list[str], optional): The bytes fields which are in `feature_dict` and can be images bytes. Default: None. Raises: @@ -282,7 +285,7 @@ class TFRecordToMR: Execute transformation from TFRecord to MindRecord. Returns: - MSRStatus, whether TFRecord is successfully transformed to MindRecord. + MSRStatus, SUCCESS or FAILED. """ writer = FileWriter(self.destination) logger.info("Transformed MindRecord schema is: {}, TFRecord feature dict is: {}" @@ -313,7 +316,10 @@ class TFRecordToMR: def transform(self): """ - Encapsulate the run function to exit normally + Encapsulate the run function to exit normally. + + Returns: + MSRStatus, SUCCESS or FAILED. """ t = ExceptionThread(target=self.run)