!3318 [MD] add pydoc for save ops
Merge pull request !3318 from liyong126/fix_save_doc
This commit is contained in:
commit
7b54fd8304
|
@ -410,6 +410,7 @@ Status DEPipeline::SaveDataset(const std::vector<std::string> &file_names, const
|
||||||
std::vector<std::string> index_fields;
|
std::vector<std::string> index_fields;
|
||||||
s = FetchMetaFromTensorRow(column_name_id_map, row, &mr_json, &index_fields);
|
s = FetchMetaFromTensorRow(column_name_id_map, row, &mr_json, &index_fields);
|
||||||
RETURN_IF_NOT_OK(s);
|
RETURN_IF_NOT_OK(s);
|
||||||
|
MS_LOG(DEBUG) << "Schema of saved mindrecord: " << mr_json.dump();
|
||||||
if (mindrecord::SUCCESS !=
|
if (mindrecord::SUCCESS !=
|
||||||
mindrecord::ShardHeader::initialize(&mr_header, mr_json, index_fields, blob_fields, mr_schema_id)) {
|
mindrecord::ShardHeader::initialize(&mr_header, mr_json, index_fields, blob_fields, mr_schema_id)) {
|
||||||
RETURN_STATUS_UNEXPECTED("Error: failed to initialize ShardHeader.");
|
RETURN_STATUS_UNEXPECTED("Error: failed to initialize ShardHeader.");
|
||||||
|
@ -569,6 +570,7 @@ Status DEPipeline::FetchMetaFromTensorRow(const std::unordered_map<std::string,
|
||||||
if (column_name_id_map.empty()) {
|
if (column_name_id_map.empty()) {
|
||||||
RETURN_STATUS_UNEXPECTED("Error: column not found.");
|
RETURN_STATUS_UNEXPECTED("Error: column not found.");
|
||||||
}
|
}
|
||||||
|
json dataset_schema;
|
||||||
for (auto &col : column_name_id_map) {
|
for (auto &col : column_name_id_map) {
|
||||||
auto idx = col.second;
|
auto idx = col.second;
|
||||||
auto column_name = col.first;
|
auto column_name = col.first;
|
||||||
|
@ -580,6 +582,7 @@ Status DEPipeline::FetchMetaFromTensorRow(const std::unordered_map<std::string,
|
||||||
auto shapes = column_shape.AsVector();
|
auto shapes = column_shape.AsVector();
|
||||||
std::vector<int> mr_shape(shapes.begin(), shapes.end());
|
std::vector<int> mr_shape(shapes.begin(), shapes.end());
|
||||||
std::string el = column_type.ToString();
|
std::string el = column_type.ToString();
|
||||||
|
dataset_schema[column_name] = el;
|
||||||
if (mindrecord::kTypesMap.find(el) == mindrecord::kTypesMap.end()) {
|
if (mindrecord::kTypesMap.find(el) == mindrecord::kTypesMap.end()) {
|
||||||
std::string err_msg("Error: can not support data type: " + el);
|
std::string err_msg("Error: can not support data type: " + el);
|
||||||
RETURN_STATUS_UNEXPECTED(err_msg);
|
RETURN_STATUS_UNEXPECTED(err_msg);
|
||||||
|
@ -605,6 +608,7 @@ Status DEPipeline::FetchMetaFromTensorRow(const std::unordered_map<std::string,
|
||||||
if (mr_type == "bytes" || !mr_shape.empty()) continue;
|
if (mr_type == "bytes" || !mr_shape.empty()) continue;
|
||||||
index_fields->emplace_back(column_name); // candidate of index fields
|
index_fields->emplace_back(column_name); // candidate of index fields
|
||||||
}
|
}
|
||||||
|
MS_LOG(DEBUG) << "Schema of dataset: " << dataset_schema.dump();
|
||||||
return Status::OK();
|
return Status::OK();
|
||||||
}
|
}
|
||||||
Status DEPipeline::BuildMindrecordSamplerChain(const py::handle &handle,
|
Status DEPipeline::BuildMindrecordSamplerChain(const py::handle &handle,
|
||||||
|
|
|
@ -1042,12 +1042,61 @@ class Dataset:
|
||||||
"""
|
"""
|
||||||
Save the dynamic data processed by dataset pipeline as common dataset format, support: mindrecord.
|
Save the dynamic data processed by dataset pipeline as common dataset format, support: mindrecord.
|
||||||
|
|
||||||
|
Implicit type casting exists when saving data as mindrecord. Table below shows how to do type casting.
|
||||||
|
|
||||||
|
.. list-table:: Implicit Type Casting of Saving as mindrecord
|
||||||
|
:widths: 25 25 50
|
||||||
|
:header-rows: 1
|
||||||
|
|
||||||
|
* - type in 'dataset'
|
||||||
|
- type in 'mindrecord'
|
||||||
|
- detail
|
||||||
|
* - DE_BOOL
|
||||||
|
- None
|
||||||
|
- Not support
|
||||||
|
* - DE_INT8
|
||||||
|
- int32
|
||||||
|
-
|
||||||
|
* - DE_UINT8
|
||||||
|
- bytes(1D uint8)
|
||||||
|
- Drop dimension
|
||||||
|
* - DE_INT16
|
||||||
|
- int32
|
||||||
|
-
|
||||||
|
* - DE_UINT16
|
||||||
|
- int32
|
||||||
|
-
|
||||||
|
* - DE_INT32
|
||||||
|
- int32
|
||||||
|
-
|
||||||
|
* - DE_UINT32
|
||||||
|
- int64
|
||||||
|
-
|
||||||
|
* - DE_INT64
|
||||||
|
- int64
|
||||||
|
-
|
||||||
|
* - DE_UINT64
|
||||||
|
- None
|
||||||
|
- Not support
|
||||||
|
* - DE_FLOAT16
|
||||||
|
- float32
|
||||||
|
-
|
||||||
|
* - DE_FLOAT32
|
||||||
|
- float32
|
||||||
|
-
|
||||||
|
* - DE_FLOAT64
|
||||||
|
- float64
|
||||||
|
-
|
||||||
|
* - DE_STRING
|
||||||
|
- string
|
||||||
|
- Not support multi-dimensional DE_STRING
|
||||||
|
|
||||||
Note:
|
Note:
|
||||||
1. To save the samples in order, should set dataset's shuffle false and num_files 1.
|
1. To save the samples in order, should set dataset's shuffle false and num_files 1.
|
||||||
2. Before call the function, do not use batch, repeat operator or data augmentation operators
|
2. Before call the function, do not use batch, repeat operator or data augmentation operators
|
||||||
with random attribute in map operator.
|
with random attribute in map operator.
|
||||||
3. Mindreocrd do not support np.uint64, multi-dimensional np.uint8(drop dimension) and
|
3. Mindrecord does not support DE_UINT64, multi-dimensional DE_UINT8(drop dimension) and
|
||||||
multi-dimensional string.
|
multi-dimensional DE_STRING.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
file_name (str): Path to dataset file.
|
file_name (str): Path to dataset file.
|
||||||
|
|
Loading…
Reference in New Issue