fix mindrecord examples

This commit is contained in:
liyong 2021-10-09 10:06:52 +08:00
parent 2c738757c3
commit 192b6b5499
1 changed files with 53 additions and 5 deletions

View File

@ -46,6 +46,22 @@ class FileWriter:
Raises:
ParamValueError: If `file_name` or `shard_num` is invalid.
Examples:
>>> from mindspore.mindrecord import FileWriter
>>> schema_json = {"file_name": {"type": "string"}, "label": {"type": "int32"}, "data": {"type": "bytes"}}
>>> indexes = ["file_name", "label"]
>>> data = [{"file_name": "1.jpg", "label": 0,
... "data": b"\x10c\xb3w\xa8\xee$o&<q\x8c\x8e(\xa2\x90\x90\x96\xbc\xb1\x1e\xd4QER\x13?\xff\xd9"},
... {"file_name": "2.jpg", "label": 56,
... "data": b"\xe6\xda\xd1\xae\x07\xb8>\xd4\x00\xf8\x129\x15\xd9\xf2q\xc0\xa2\x91YFUO\x1dsE1\x1ep"},
... {"file_name": "3.jpg", "label": 99,
... "data": b"\xaf\xafU<\xb8|6\xbd}\xc1\x99[\xeaj+\x8f\x84\xd3\xcc\xa0,i\xbb\xb9-\xcdz\xecp{T\xb1\xdb"}]
>>> writer = FileWriter(file_name="test.mindrecord", shard_num=1)
>>> writer.add_schema(schema_json, "test_schema")
>>> writer.add_index(indexes)
>>> writer.write_raw_data(data)
>>> writer.commit()
"""
def __init__(self, file_name, shard_num=1):
@ -94,7 +110,13 @@ class FileWriter:
FileNameError: If path contains invalid characters.
MRMOpenError: If failed to open MindRecord file.
MRMOpenForAppendError: If failed to open file for appending data.
Examples:
>>> write_append = FileWriter.open_for_append("test.mindrecord")
>>> write_append.write_raw_data(data)
>>> write_append.commit()
"""
check_filename(file_name)
# construct ShardHeader
reader = ShardReader()
@ -116,6 +138,9 @@ class FileWriter:
"""
The schema is added to describe the raw data to be written.
Note:
Please refer to the Examples of class: `mindspore.mindrecord.FileWriter`.
Args:
content (dict): Dictionary of schema content.
desc (str, optional): String of schema description, Default: None.
@ -143,6 +168,8 @@ class FileWriter:
If the function is not called, the fields of the primitive type
in schema are set as indexes by default.
Please refer to the Examples of class: `mindspore.mindrecord.FileWriter`.
Args:
index_fields (list[str]): fields from schema.
@ -231,9 +258,12 @@ class FileWriter:
def write_raw_data(self, raw_data, parallel_writer=False):
"""
Convert raw data into a seried of consecutive MindRecord \
Convert raw data into a series of consecutive MindRecord \
files after the raw data is verified against the schema.
Note:
Please refer to the Examples of class: `mindspore.mindrecord.FileWriter`.
Args:
raw_data (list[dict]): List of raw data.
parallel_writer (bool, optional): Write raw data in parallel if it equals to True. Default: False.
@ -264,11 +294,14 @@ class FileWriter:
"""
Set the size of header which contains shard information, schema information, \
page meta information, etc. The larger a header, the more data \
the MindRecord file can store.
the MindRecord file can store. If the size of header is larger than \
the default size(16MB), users need to call the API to set a proper size.
Args:
header_size (int): Size of header, between 16*1024(16KB) and
128*1024*1024(128MB).
128*1024*1024(128MB).(default=16MB)
Returns:
MSRStatus, SUCCESS or FAILED.
@ -276,6 +309,11 @@ class FileWriter:
Raises:
MRMInvalidHeaderSizeError: If failed to set header size.
Examples:
>>> from mindspore.mindrecord import FileWriter
>>> # define writer
>>> writer.set_header_size(1 << 25) # 32MB
"""
return self._writer.set_header_size(header_size)
@ -283,17 +321,24 @@ class FileWriter:
"""
Set the size of page that represents the area where data is stored, \
and the areas are divided into two types: raw page and blob page. \
The larger a page, the more data the page can store.
The larger a page, the more data the page can store. If the size of \
a sample is larger than the default size(32MB), users need to call the API \
to set a proper size.
Args:
page_size (int): Size of page, between 32*1024(32KB) and
256*1024*1024(256MB).
256*1024*1024(256MB).(default=32MB)
Returns:
MSRStatus, SUCCESS or FAILED.
Raises:
MRMInvalidPageSizeError: If failed to set page size.
Examples:
>>> from mindspore.mindrecord import FileWriter
>>> # define writer
>>> writer.set_page_size(1 << 26) # 128MB
"""
return self._writer.set_page_size(page_size)
@ -301,6 +346,9 @@ class FileWriter:
"""
Flush data in memory to disk and generate the corresponding database files.
Note:
Please refer to the Examples of class: `mindspore.mindrecord.FileWriter`.
Returns:
MSRStatus, SUCCESS or FAILED.