!3318 [MD] add pydoc for save ops

Merge pull request !3318 from liyong126/fix_save_doc
2020-07-23 10:40:24 +08:00 · 2020-07-23 10:40:24 +08:00 · 7b54fd8304
parent 322c24e6c5 43e5db3d42
commit 7b54fd8304
2 changed files with 55 additions and 2 deletions
--- a/mindspore/ccsrc/minddata/dataset/api/de_pipeline.cc
+++ b/mindspore/ccsrc/minddata/dataset/api/de_pipeline.cc
@ -410,6 +410,7 @@ Status DEPipeline::SaveDataset(const std::vector<std::string> &file_names, const
      std::vector<std::string> index_fields;
      s = FetchMetaFromTensorRow(column_name_id_map, row, &mr_json, &index_fields);
      RETURN_IF_NOT_OK(s);
      MS_LOG(DEBUG) << "Schema of saved mindrecord: " << mr_json.dump();
      if (mindrecord::SUCCESS !=
          mindrecord::ShardHeader::initialize(&mr_header, mr_json, index_fields, blob_fields, mr_schema_id)) {
        RETURN_STATUS_UNEXPECTED("Error: failed to initialize ShardHeader.");
@ -569,6 +570,7 @@ Status DEPipeline::FetchMetaFromTensorRow(const std::unordered_map<std::string,
  if (column_name_id_map.empty()) {
    RETURN_STATUS_UNEXPECTED("Error: column not found.");
  }
  json dataset_schema;
  for (auto &col : column_name_id_map) {
    auto idx = col.second;
    auto column_name = col.first;
@ -580,6 +582,7 @@ Status DEPipeline::FetchMetaFromTensorRow(const std::unordered_map<std::string,
    auto shapes = column_shape.AsVector();
    std::vector<int> mr_shape(shapes.begin(), shapes.end());
    std::string el = column_type.ToString();
    dataset_schema[column_name] = el;
    if (mindrecord::kTypesMap.find(el) == mindrecord::kTypesMap.end()) {
      std::string err_msg("Error: can not support data type: " + el);
      RETURN_STATUS_UNEXPECTED(err_msg);
@ -605,6 +608,7 @@ Status DEPipeline::FetchMetaFromTensorRow(const std::unordered_map<std::string,
    if (mr_type == "bytes" || !mr_shape.empty()) continue;
    index_fields->emplace_back(column_name);  // candidate of index fields
  }
  MS_LOG(DEBUG) << "Schema of dataset: " << dataset_schema.dump();
  return Status::OK();
 }
 Status DEPipeline::BuildMindrecordSamplerChain(const py::handle &handle,
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@ -1042,12 +1042,61 @@ class Dataset:
        """
        Save the dynamic data processed by dataset pipeline as common dataset format, support: mindrecord.
        Implicit type casting exists when saving data as mindrecord. Table below shows how to do type casting.
        .. list-table:: Implicit Type Casting of Saving as mindrecord
           :widths: 25 25 50
           :header-rows: 1
           * - type in 'dataset'
             - type in 'mindrecord'
             - detail
           * - DE_BOOL
             - None
             - Not support
           * - DE_INT8
             - int32
             -
           * - DE_UINT8
             - bytes(1D uint8)
             - Drop dimension
           * - DE_INT16
             - int32
             -
           * - DE_UINT16
             - int32
             -
           * - DE_INT32
             - int32
             -
           * - DE_UINT32
             - int64
             -
           * - DE_INT64
             - int64
             -
           * - DE_UINT64
             - None
             - Not support
           * - DE_FLOAT16
             - float32
             -
           * - DE_FLOAT32
             - float32
             -
           * - DE_FLOAT64
             - float64
             -
           * - DE_STRING
             - string
             - Not support multi-dimensional DE_STRING
        Note:
            1. To save the samples in order, should set dataset's shuffle false and num_files 1.
            2. Before call the function, do not use batch, repeat operator or data augmentation operators
               with random attribute in map operator.
-            3. Mindreocrd do not support np.uint64, multi-dimensional np.uint8(drop dimension) and
+            3. Mindrecord does not support DE_UINT64, multi-dimensional DE_UINT8(drop dimension) and
-               multi-dimensional string.
+               multi-dimensional DE_STRING.
        Args:
            file_name (str): Path to dataset file.