fix mindrecord examples

2021-10-09 10:06:52 +08:00 · 2021-10-09 10:06:52 +08:00 · 192b6b5499
parent 2c738757c3
commit 192b6b5499
1 changed files with 53 additions and 5 deletions
--- a/mindspore/mindrecord/filewriter.py
+++ b/mindspore/mindrecord/filewriter.py
@ -46,6 +46,22 @@ class FileWriter:

    Raises:
        ParamValueError: If `file_name` or `shard_num` is invalid.
+
+    Examples:
+        >>> from mindspore.mindrecord import FileWriter
+        >>> schema_json = {"file_name": {"type": "string"}, "label": {"type": "int32"}, "data": {"type": "bytes"}}
+        >>> indexes = ["file_name", "label"]
+        >>> data = [{"file_name": "1.jpg", "label": 0,
+        ...         "data": b"\x10c\xb3w\xa8\xee$o&<q\x8c\x8e(\xa2\x90\x90\x96\xbc\xb1\x1e\xd4QER\x13?\xff\xd9"},
+        ...         {"file_name": "2.jpg", "label": 56,
+        ...         "data": b"\xe6\xda\xd1\xae\x07\xb8>\xd4\x00\xf8\x129\x15\xd9\xf2q\xc0\xa2\x91YFUO\x1dsE1\x1ep"},
+        ...         {"file_name": "3.jpg", "label": 99,
+        ...         "data": b"\xaf\xafU<\xb8|6\xbd}\xc1\x99[\xeaj+\x8f\x84\xd3\xcc\xa0,i\xbb\xb9-\xcdz\xecp{T\xb1\xdb"}]
+        >>> writer = FileWriter(file_name="test.mindrecord", shard_num=1)
+        >>> writer.add_schema(schema_json, "test_schema")
+        >>> writer.add_index(indexes)
+        >>> writer.write_raw_data(data)
+        >>> writer.commit()
    """

    def __init__(self, file_name, shard_num=1):
@ -94,7 +110,13 @@ class FileWriter:
            FileNameError: If path contains invalid characters.
            MRMOpenError: If failed to open MindRecord file.
            MRMOpenForAppendError: If failed to open file for appending data.
+
+        Examples:
+        >>> write_append = FileWriter.open_for_append("test.mindrecord")
+        >>> write_append.write_raw_data(data)
+        >>> write_append.commit()
        """
+
        check_filename(file_name)
        # construct ShardHeader
        reader = ShardReader()
@ -116,6 +138,9 @@ class FileWriter:
        """
        The schema is added to describe the raw data to be written.

+        Note:
+            Please refer to the Examples of class: `mindspore.mindrecord.FileWriter`.
+
        Args:
            content (dict): Dictionary of schema content.
            desc (str, optional): String of schema description, Default: None.
@ -143,6 +168,8 @@ class FileWriter:
            If the function is not called, the fields of the primitive type
            in schema are set as indexes by default.

+            Please refer to the Examples of class: `mindspore.mindrecord.FileWriter`.
+
        Args:
            index_fields (list[str]): fields from schema.

@ -231,9 +258,12 @@ class FileWriter:

    def write_raw_data(self, raw_data, parallel_writer=False):
        """
-        Convert raw data into a seried of consecutive MindRecord \
+        Convert raw data into a series of consecutive MindRecord \
        files after the raw data is verified against the schema.

+        Note:
+            Please refer to the Examples of class: `mindspore.mindrecord.FileWriter`.
+
        Args:
           raw_data (list[dict]): List of raw data.
           parallel_writer (bool, optional): Write raw data in parallel if it equals to True. Default: False.
@ -264,11 +294,14 @@ class FileWriter:
        """
        Set the size of header which contains shard information, schema information, \
        page meta information, etc. The larger a header, the more data \
-        the MindRecord file can store.
+        the MindRecord file can store. If the size of header is larger than \
+        the default size(16MB), users need to call the API to set a proper size.
+

        Args:
            header_size (int): Size of header, between 16*1024(16KB) and
-                128*1024*1024(128MB).
+                128*1024*1024(128MB).(default=16MB)
+

        Returns:
            MSRStatus, SUCCESS or FAILED.
@ -276,6 +309,11 @@ class FileWriter:
        Raises:
            MRMInvalidHeaderSizeError: If failed to set header size.

+        Examples:
+            >>> from mindspore.mindrecord import FileWriter
+            >>> # define writer
+            >>> writer.set_header_size(1 << 25) # 32MB
+
        """
        return self._writer.set_header_size(header_size)

@ -283,17 +321,24 @@ class FileWriter:
        """
        Set the size of page that represents the area where data is stored, \
        and the areas are divided into two types: raw page and blob page. \
-        The larger a page, the more data the page can store.
+        The larger a page, the more data the page can store. If the size of \
+        a sample is larger than the default size(32MB), users need to call the API \
+        to set a proper size.

        Args:
           page_size (int): Size of page, between 32*1024(32KB) and
-               256*1024*1024(256MB).
+               256*1024*1024(256MB).(default=32MB)

        Returns:
            MSRStatus, SUCCESS or FAILED.

        Raises:
            MRMInvalidPageSizeError: If failed to set page size.
+
+        Examples:
+            >>> from mindspore.mindrecord import FileWriter
+            >>> # define writer
+            >>> writer.set_page_size(1 << 26) # 128MB
        """
        return self._writer.set_page_size(page_size)

@ -301,6 +346,9 @@ class FileWriter:
        """
        Flush data in memory to disk and generate the corresponding database files.

+        Note:
+            Please refer to the Examples of class: `mindspore.mindrecord.FileWriter`.
+
        Returns:
            MSRStatus, SUCCESS or FAILED.