!19979 dataset: modify some error msg

Merge pull request !19979 from ms_yan/err_msg
2021-08-03 08:53:30 +00:00 · 2021-08-03 08:53:30 +00:00 · 93af13f332
parent 0a3b4ff84b 65870045bc
commit 93af13f332
6 changed files with 30 additions and 11 deletions
--- a/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/consumers/tree_consumer.cc
@ -251,7 +251,7 @@ Status SaveToDisk::Save() {
  auto mr_writer = std::make_unique<mindrecord::ShardWriter>();
  std::vector<std::string> blob_fields;
  if (mindrecord::SUCCESS != mindrecord::ShardWriter::initialize(&mr_writer, file_names)) {
-    RETURN_STATUS_UNEXPECTED("Error: failed to initialize ShardWriter.");
+    RETURN_STATUS_UNEXPECTED("Error: failed to initialize ShardWriter, please check above `ERROR` level message.");
  }
  std::unordered_map<std::string, int32_t> column_name_id_map;
--- a/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc
+++ b/mindspore/ccsrc/minddata/dataset/engine/datasetops/rename_op.cc
@ -15,6 +15,7 @@
 */
 #include "minddata/dataset/engine/datasetops/rename_op.h"
 #include <set>
 #include <vector>
 #include <unordered_map>
@ -52,6 +53,7 @@ Status RenameOp::ComputeColMap() {
    std::unordered_map<std::string, int32_t> new_col_name_id_map = {};
    // parameter for input check
    size_t found = 0;
    std::set<std::string> new_col_name;
    // iterate over all the pairs and if there is a name match with rename, rename the column and add it to new map
    // by doing it this way we recreate a new ColNameIdMap and allow for switching
@ -67,12 +69,27 @@ Status RenameOp::ComputeColMap() {
        found += 1;
        int index = std::distance(in_columns_.begin(), it);
        MS_LOG(DEBUG) << "Rename operator index found " << index << " value " << id << ".";
-
+        if (new_col_name.find(out_columns_[index]) != new_col_name.end()) {
          std::string err_msg(
            "rename operation does not support rename one column name into another already exist column name, existed"
            " column name is: " +
            out_columns_[index] + ".");
          RETURN_STATUS_UNEXPECTED(err_msg);
        }
        new_col_name_id_map[out_columns_[index]] = id;
        new_col_name.insert(out_columns_[index]);
      } else {
        // not found
        if (new_col_name.find(name) != new_col_name.end()) {
          std::string err_msg(
            "rename operation does not support rename one column name into another already exist column name, existed"
            " column name is: " +
            name + ".");
          RETURN_STATUS_UNEXPECTED(err_msg);
        }
        MS_LOG(DEBUG) << "Rename operator index not found: " << id << " is the column id.";
        new_col_name_id_map[name] = id;
        new_col_name.insert(name);
      }
    }
    // only checks number of renamed columns have been found, this input check doesn't check everything
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@ -816,7 +816,7 @@ class Dataset:
            count (int): Number of elements in the dataset to be skipped.
        Returns:
-            SkipDataset, dataset skipped.
+            SkipDataset, dataset that containing rows like origin rows subtract skipped rows.
        Examples:
            >>> # dataset is an instance of Dataset object.
@ -1711,8 +1711,11 @@ class Dataset:
                (isinstance(num_batch, int) and num_batch <= 0):
            # throwing exception, disable all sync_wait in pipeline
            self.disable_sync()
-            raise RuntimeError("Sync_update batch size can only be positive, got : {}.".format(num_batch))
+            raise RuntimeError("Sync_update batch size can only be positive integer, got : {}.".format(num_batch))
        notifiers_dict = self.get_sync_notifiers()
        if not isinstance(condition_name, str):
            raise TypeError("Argument condition_name with value {} is not of type str, but got {}."
                            .format(condition_name, type(condition_name)))
        if condition_name not in notifiers_dict:
            # throwing exception, disable all sync_wait in pipeline
            self.disable_sync()
--- a/mindspore/dataset/engine/validators.py
+++ b/mindspore/dataset/engine/validators.py
@ -696,7 +696,7 @@ def check_skip(method):
        [count], _ = parse_user_args(method, *args, **kwargs)
        type_check(count, (int,), "count")
-        check_value(count, (-1, INT32_MAX), "count")
+        check_value(count, (0, INT32_MAX), "count")
        return method(self, *args, **kwargs)
@ -711,7 +711,8 @@ def check_take(method):
        [count], _ = parse_user_args(method, *args, **kwargs)
        type_check(count, (int,), "count")
        if (count <= 0 and count != -1) or count > INT32_MAX:
-            raise ValueError("count should be either -1 or positive integer.")
+            raise ValueError("count should be either -1 or within the required interval of ({}, {}], got {}."
                             .format(0, INT32_MAX, count))
        return method(self, *args, **kwargs)
--- a/tests/ut/python/dataset/test_skip.py
+++ b/tests/ut/python/dataset/test_skip.py
@ -17,7 +17,6 @@ import pytest
 import mindspore.dataset as ds
 import mindspore.dataset.vision.c_transforms as vision
 from mindspore import log as logger
 DATA_DIR_TF2 = ["../data/dataset/test_tf_file_3_images/train-0000-of-0001.data"]
@ -208,9 +207,8 @@ def test_skip_exception_1():
        for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True):
            num_iter += 1
-    except RuntimeError as e:
+    except ValueError as e:
-        logger.info("Got an exception in DE: {}".format(str(e)))
+        assert "Input count is not within the required interval" in str(e)
        assert "skip_count should not be negative, skip_count: -1" in str(e)
 def test_skip_exception_2():
--- a/tests/ut/python/dataset/test_take.py
+++ b/tests/ut/python/dataset/test_take.py
@ -351,7 +351,7 @@ def test_take_19():
        data1 = data1.batch(2)
        data1 = data1.take(0)
-    assert "positive integer" in str(info.value)
+    assert "within the required interval" in str(info.value)
 if __name__ == '__main__':
    test_take_01()