!8688 dataset: modify the error log msg in python layer to be more acceptable

From: @ms_yan Reviewed-by: Signed-off-by:
2020-11-18 21:43:48 +08:00 · 2020-11-18 21:43:48 +08:00 · 8e1ce841b4
parent bcc6e1ca28 0cb5c47856
commit 8e1ce841b4
21 changed files with 206 additions and 192 deletions
--- a/mindspore/dataset/callback/ds_callback.py
+++ b/mindspore/dataset/callback/ds_callback.py
@ -186,7 +186,7 @@ class WaitedDSCallback(Callback, DSCallback):
                success = self.epoch_event.wait(timeout=ds.config.get_callback_timeout())
                self.epoch_event.clear()
                if not success:
-                    raise RuntimeError(f"ds_epoch_begin timed out after {ds.config.get_callback_timeout()} second(s)")
+                    raise RuntimeError(f"ds_epoch_begin timed out after {ds.config.get_callback_timeout()} second(s).")
            # by the time this thread wakes up, self.epoch_run_context is already available
            self.sync_epoch_begin(self.epoch_run_context, ds_run_context)

@ -212,7 +212,7 @@ class WaitedDSCallback(Callback, DSCallback):
                success = self.step_event.wait(timeout=ds.config.get_callback_timeout())
                self.step_event.clear()
                if not success:
-                    raise RuntimeError(f"ds_step_begin timed out after {ds.config.get_callback_timeout()} second(s)")
+                    raise RuntimeError(f"ds_step_begin timed out after {ds.config.get_callback_timeout()} second(s).")
                # by the time this thread wakes up, self.epoch_run_context is already available
            self.sync_step_begin(self.step_run_context, ds_run_context)

--- a/mindspore/dataset/core/validator_helpers.py
+++ b/mindspore/dataset/core/validator_helpers.py
@ -122,7 +122,7 @@ def check_pos_float64(value, arg_name=""):

 def check_valid_detype(type_):
    if type_ not in valid_detype:
-        raise ValueError("Unknown column type")
+        raise TypeError("Unknown column type.")
    return True


@ -146,10 +146,10 @@ def check_columns(columns, name):
    type_check(columns, (list, str), name)
    if isinstance(columns, str):
        if not columns:
-            raise ValueError("{0} should not be an empty str".format(name))
+            raise ValueError("{0} should not be an empty str.".format(name))
    elif isinstance(columns, list):
        if not columns:
-            raise ValueError("{0} should not be empty".format(name))
+            raise ValueError("{0} should not be empty.".format(name))
        for i, column_name in enumerate(columns):
            if not column_name:
                raise ValueError("{0}[{1}] should not be empty.".format(name, i))
@ -250,10 +250,10 @@ def check_filename(path):
    forbidden_symbols = set(r'\/:*?"<>|`&\';')

    if set(filename) & forbidden_symbols:
-        raise ValueError(r"filename should not contains \/:*?\"<>|`&;\'")
+        raise ValueError(r"filename should not contain \/:*?\"<>|`&;\'")

    if filename.startswith(' ') or filename.endswith(' '):
-        raise ValueError("filename should not start/end with space")
+        raise ValueError("filename should not start/end with space.")

    return True

@ -374,4 +374,4 @@ def check_gnn_list_or_ndarray(param, param_name):
 def check_tensor_op(param, param_name):
    """check whether param is a tensor op or a callable Python function"""
    if not isinstance(param, cde.TensorOp) and not callable(param):
-        raise TypeError("{0} is not a c_transform op (TensorOp) nor a callable pyfunc.".format(param_name))
+        raise TypeError("{0} is neither a c_transform op (TensorOp) nor a callable pyfunc.".format(param_name))
--- a/mindspore/dataset/engine/datasets.py
+++ b/mindspore/dataset/engine/datasets.py
@ -117,11 +117,11 @@ def get_num_rows(num_rows, num_shards):
        ValueError: If num_shards is invalid (<= 0).
    """
    if num_rows < 0:
-        raise ValueError("num_rows is invalid (< 0)")
+        raise ValueError("num_rows is invalid, less than 0.")

    if num_shards is not None:
        if num_shards <= 0:
-            raise ValueError("num_shards is invalid (<= 0)")
+            raise ValueError("num_shards is invalid, less than or equal to 0.")
        if num_rows % num_shards == 0:
            num_rows = num_rows // num_shards
        else:
@ -869,7 +869,7 @@ class Dataset:
        elif isinstance(datasets, Dataset):
            datasets = (self, datasets)
        else:
-            raise TypeError("The zip function %s type error!" % (datasets))
+            raise TypeError("Invalid datasets, expected Dataset object or tuple of Dataset, but got %s!" % (datasets))
        return ZipDataset(datasets)

    @check_concat
@ -902,7 +902,7 @@ class Dataset:
        elif isinstance(datasets, list):
            datasets = [self] + datasets
        else:
-            raise TypeError("The concat_dataset function %s type error!" % (datasets))
+            raise TypeError("Invalid datasets, expected Dataset object or list of Dataset, but got %s!" % (datasets))
        return ConcatDataset(datasets)

    @check_rename
@ -1055,7 +1055,7 @@ class Dataset:
            raise TypeError("Please set device_type in context")

        if device_type not in ('Ascend', 'GPU', 'CPU'):
-            raise ValueError("Only support CPU, Ascend, GPU")
+            raise ValueError("Only CPU, Ascend or GPU device type is supported.")

        def get_distribution(output_dataset):
            dev_id = 0
@ -1072,7 +1072,7 @@ class Dataset:
                return "", dev_id

            if not output_dataset.children:
-                raise RuntimeError("Unknown output_dataset: {}".format(type(output_dataset)))
+                raise RuntimeError("Unknown output_dataset: {}.".format(type(output_dataset)))
            input_dataset = output_dataset.children[0]
            return get_distribution(input_dataset)

@ -1084,9 +1084,9 @@ class Dataset:
                dist = json.load(distribution_f)
                device_id = dist["deviceId"]
        except json.decoder.JSONDecodeError:
-            raise RuntimeError("Json decode error when load distribution file")
+            raise RuntimeError("Json decode error when load distribution file.")
        except Exception:
-            raise RuntimeError("Distribution file failed to read")
+            raise RuntimeError("Failed to read Distribution file.")

        return TransferDataset(self, queue_name, device_id, device_type, send_epoch_end)

@ -1346,12 +1346,12 @@ class Dataset:
        if isinstance(num_batch, int) and num_batch <= 0:
            # throwing exception, disable all sync_wait in pipeline
            self.disable_sync()
-            raise RuntimeError("Sync_update batch size can only be positive, got : {}".format(num_batch))
+            raise RuntimeError("Sync_update batch size can only be positive, got : {}.".format(num_batch))
        notifiers_dict = self.get_sync_notifiers()
        if condition_name not in notifiers_dict:
            # throwing exception, disable all sync_wait in pipeline
            self.disable_sync()
-            raise RuntimeError("Condition name not found")
+            raise RuntimeError("Condition name not found.")
        if num_batch is not None:
            num_batch *= self.get_batch_size()
        notifiers_dict[condition_name](num_batch, data)
@ -1429,7 +1429,7 @@ class SourceDataset(Dataset):
                unmatched_patterns.append(pattern)

        if unmatched_patterns:
-            raise ValueError("The following patterns did not match any files: ", unmatched_patterns)
+            raise ValueError("The following patterns did not match any files: {}.".format(unmatched_patterns))

        if file_list:  # not empty
            return file_list
@ -1873,7 +1873,7 @@ class BlockReleasePair:
                                            timeout=get_callback_timeout())
            # time_out will be False if time out occurs
            if not not_time_out:
-                logger.warning("Timeout happened in sync_wait, disabling lock")
+                logger.warning("Timeout happened in sync_wait, disabling lock.")
                self.disable = True
            self.row_count += 1
        return True
@ -1918,8 +1918,8 @@ class SyncWaitDataset(DatasetOp):

        self._pair = BlockReleasePair(num_batch, callback)
        if self._condition_name in self.children[0].get_sync_notifiers():
-            raise RuntimeError("Condition name is already in use")
-        logger.warning("Please remember to add dataset.sync_update(condition=%s), otherwise will result in hanging",
+            raise RuntimeError("Condition name is already in use.")
+        logger.warning("Please remember to add dataset.sync_update(condition=%s), otherwise hanging will result.",
                       condition_name)

    def get_sync_notifiers(self):
@ -1981,7 +1981,7 @@ class ShuffleDataset(DatasetOp):
        input_dataset.parent.append(self)
        self._input_indexs = input_dataset.input_indexs
        if self.is_sync():
-            raise RuntimeError("No shuffle after sync operators")
+            raise RuntimeError("No shuffle after sync operators.")

    def get_args(self):
        args = super().get_args()
@ -2045,7 +2045,7 @@ class _PythonCallable:
                    _set_iterator_cleanup()
                    self.pool.close()
                    self.pool.join()
-                    raise Exception("Multiprocess MapOp worker receives KeyboardInterrupt")
+                    raise Exception("Multiprocess MapOp worker receives KeyboardInterrupt.")
            return (None,)
        # Invoke original Python callable in master process in case the pool is gone.
        return self.py_callable(*args)
@ -2128,7 +2128,8 @@ class MapDataset(DatasetOp):
        if self.input_columns and self.output_columns \
                and len(self.input_columns) != len(self.output_columns) \
                and self.column_order is None:
-            raise ValueError("When (len(input_columns) != len(output_columns)), column_order must be specified.")
+            raise ValueError("When length of input_columns and output_columns are not equal,"
+                             " column_order must be specified.")

        input_dataset.parent.append(self)
        self._input_indexs = input_dataset.input_indexs
@ -2405,7 +2406,7 @@ class ZipDataset(DatasetOp):
        super().__init__()
        for dataset in datasets:
            if not isinstance(dataset, Dataset):
-                raise TypeError("The parameter %s of zip has type error!" % (dataset))
+                raise TypeError("Invalid dataset, expected Dataset object, but got %s!" % type(dataset))
        self.datasets = datasets
        for data in datasets:
            self.children.append(data)
@ -2457,7 +2458,7 @@ class ConcatDataset(DatasetOp):
        super().__init__()
        for dataset in datasets:
            if not isinstance(dataset, Dataset):
-                raise TypeError("The parameter %s of concat has type error!" % (dataset))
+                raise TypeError("Invalid dataset, expected Dataset object, but got %s!" % type(dataset))
        self.datasets = datasets
        self._sampler = None
        for data in datasets:
@ -2468,8 +2469,8 @@ class ConcatDataset(DatasetOp):
        child_index = 0
        for item in self.children_sizes_:
            if item == 0:
-                raise ValueError("There is no samples in the %dth dataset. Please make sure there are "
-                                 "valid samples in the dataset" % child_index)
+                raise ValueError("There are no samples in the dataset number %d. Please make sure there are "
+                                 "valid samples in the dataset." % child_index)
            child_index += 1

        # _children_flag_and_nums: A list of pair<int ,int>.The first element of pair is flag that characterizes
@ -2524,7 +2525,7 @@ class ConcatDataset(DatasetOp):
            raise TypeError("The parameter %s of concat must be DistributedSampler!" % (sampler))

        if sampler.is_shuffled():
-            raise ValueError("The parameter shuffle of DistributedSampler must to be False!")
+            raise ValueError("The parameter shuffle of DistributedSampler must be False!")

        if sampler.num_shards <= 0:
            raise ValueError("The parameter num_shards of DistributedSampler must be positive int!")
@ -2672,10 +2673,10 @@ class TransferDataset(DatasetOp):
        raise RuntimeError("TransferDataset is not iterable.")

    def output_shapes(self):
-        raise RuntimeError("TransferDataset does not support output_shapes.")
+        raise RuntimeError("TransferDataset does not support obtaining output_shapes.")

    def output_types(self):
-        raise RuntimeError("TransferDataset does not support output_types.")
+        raise RuntimeError("TransferDataset does not support obtaining output_types.")

    def send(self, num_epochs=-1):
        # need to keep iterator alive so the executionTree is not destroyed
@ -2757,7 +2758,7 @@ def _select_sampler(num_samples, input_sampler, shuffle, num_shards, shard_id, n
                (any(arg is not None for arg in [num_shards, shard_id, shuffle, num_samples]))):
            raise ValueError(
                'Conflicting arguments during sampler assignments. num_samples: {}, num_shards: {},'
-                ' shard_id: {}, shuffle: {})'.format(num_samples, num_shards, shard_id, shuffle))
+                ' shard_id: {}, shuffle: {}.'.format(num_samples, num_shards, shard_id, shuffle))
        return input_sampler
    if shuffle is None:
        if num_shards is not None:
@ -3360,13 +3361,13 @@ class SamplerFn:
            try:
                result = self.workers[i % self.num_worker].get()
            except queue.Empty:
-                raise Exception("Generator worker process timeout")
+                raise Exception("Generator worker process timeout.")
            except KeyboardInterrupt:
                self.eof.set()
                for w in self.workers:
                    w.terminate()
                    w.join()
-                raise Exception("Generator worker receives KeyboardInterrupt")
+                raise Exception("Generator worker receives KeyboardInterrupt.")
            if idx_cursor < len(indices):
                idx_cursor = _fill_worker_indices(self.workers, indices, idx_cursor)
            yield tuple([np.array(x, copy=False) for x in result])
@ -3384,7 +3385,7 @@ def _generator_worker_loop(dataset, idx_queue, result_queue, eof):
        try:
            idx = idx_queue.get(timeout=1)
        except KeyboardInterrupt:
-            raise Exception("Generator worker receives KeyboardInterrupt")
+            raise Exception("Generator worker receives KeyboardInterrupt.")
        except queue.Empty:
            if eof.is_set():
                return
@ -3404,7 +3405,7 @@ def _generator_worker_loop(dataset, idx_queue, result_queue, eof):
            try:
                result_queue.put(result, timeout=5)
            except KeyboardInterrupt:
-                raise Exception("Generator worker receives KeyboardInterrupt")
+                raise Exception("Generator worker receives KeyboardInterrupt.")
            except queue.Full:
                if eof.is_set():
                    return
@ -3416,7 +3417,7 @@ def _generator_worker_loop(dataset, idx_queue, result_queue, eof):

 class _GeneratorWorkerMt(threading.Thread):
    """
-    Worker process for multithread Generator.
+    Worker process for multi-thread Generator.
    """

    def __init__(self, dataset, eof):
@ -3472,10 +3473,10 @@ class _GeneratorWorkerMp(multiprocessing.Process):

    def queue_empty(self):
        if not self.idx_queue.empty():
-            logger.error("idx_queue is not empty")
+            logger.error("idx_queue is not empty.")
            return False
        if not self.res_queue.empty():
-            logger.error("res_queue is not empty")
+            logger.error("res_queue is not empty.")
            return False
        return True

@ -3773,7 +3774,8 @@ class TFRecordDataset(SourceDataset):
            self.num_samples = schema_obj.num_rows

        if not isinstance(shuffle, (bool, Shuffle)):
-            raise TypeError("shuffle must be of type boolean or enum 'Shuffle'.")
+            raise TypeError("shuffle must be of boolean or enum of 'Shuffle' values like"
+                            " 'Shuffle.GLOBAL' or 'Shuffle.FILES'.")
        if not isinstance(shuffle, Shuffle):
            if shuffle:
                self.shuffle_level = Shuffle.GLOBAL
@ -4535,11 +4537,11 @@ class Schema:
                try:
                    name = column.pop("name")
                except KeyError:
-                    raise RuntimeError("Column's name is missing")
+                    raise RuntimeError("Column's name is missing.")
                try:
                    de_type = column.pop("type")
                except KeyError:
-                    raise RuntimeError("Column' type is missing")
+                    raise RuntimeError("Column's type is missing.")
                shape = column.pop("shape", None)
                column.pop("t_impl", None)
                column.pop("rank", None)
@ -4552,7 +4554,7 @@ class Schema:
                try:
                    de_type = value.pop("type")
                except KeyError:
-                    raise RuntimeError("Column' type is missing")
+                    raise RuntimeError("Column's type is missing.")
                shape = value.pop("shape", None)
                value.pop("t_impl", None)
                value.pop("rank", None)
@ -4584,13 +4586,13 @@ class Schema:
            elif k == "columns":
                self.parse_columns(v)
            else:
-                raise RuntimeError("Unknown field %s" % k)
+                raise RuntimeError("Unknown field %s." % k)

        if self.columns is None:
            raise RuntimeError("Columns are missing.")
        if self.num_rows is not None:
            if not isinstance(self.num_rows, int) or self.num_rows <= 0:
-                raise ValueError("numRows must be greater than 0")
+                raise ValueError("numRows must be greater than 0.")

    def __str__(self):
        return self.to_json()
@ -5130,7 +5132,7 @@ class CelebADataset(MappableDataset):
                            if int(split_line[1]) == usage_type:
                                partition_num += 1
                except FileNotFoundError:
-                    raise RuntimeError("Partition file can not be found")
+                    raise RuntimeError("Partition file can not be found.")
                if partition_num < num_rows:
                    num_rows = partition_num

@ -5340,7 +5342,8 @@ class CLUEDataset(SourceDataset):
        self.cols_to_keyword = self.task_dict[task][usage]

        if not isinstance(shuffle, (bool, Shuffle)):
-            raise TypeError("shuffle must be of type boolean or enum 'Shuffle'.")
+            raise TypeError("shuffle must be of boolean or enum of 'Shuffle' values like"
+                            " 'Shuffle.GLOBAL' or 'Shuffle.FILES'.")
        if not isinstance(shuffle, Shuffle):
            if shuffle:
                self.shuffle_level = Shuffle.GLOBAL
@ -5455,7 +5458,8 @@ class CSVDataset(SourceDataset):
        self.num_samples = num_samples

        if not isinstance(shuffle, (bool, Shuffle)):
-            raise TypeError("shuffle must be of type boolean or enum 'Shuffle'.")
+            raise TypeError("shuffle must be of boolean or enum of 'Shuffle' values like"
+                            " 'Shuffle.GLOBAL' or 'Shuffle.FILES'.")
        if not isinstance(shuffle, Shuffle):
            if shuffle:
                self.shuffle_level = Shuffle.GLOBAL
@ -5563,7 +5567,8 @@ class TextFileDataset(SourceDataset):
        self.num_samples = num_samples

        if not isinstance(shuffle, (bool, Shuffle)):
-            raise TypeError("shuffle must be of type boolean or enum 'Shuffle'.")
+            raise TypeError("shuffle must be of boolean or enum of 'Shuffle' values like"
+                            " 'Shuffle.GLOBAL' or 'Shuffle.FILES'.")
        if not isinstance(shuffle, Shuffle):
            if shuffle:
                self.shuffle_level = Shuffle.GLOBAL
--- a/mindspore/dataset/engine/graphdata.py
+++ b/mindspore/dataset/engine/graphdata.py
@ -89,7 +89,7 @@ class GraphData:
                while self._graph_data.is_stoped() is not True:
                    time.sleep(1)
            except KeyboardInterrupt:
-                raise Exception("Graph data server receives KeyboardInterrupt")
+                raise Exception("Graph data server receives KeyboardInterrupt.")

    @check_gnn_get_all_nodes
    def get_all_nodes(self, node_type):
@ -112,7 +112,7 @@ class GraphData:
            TypeError: If `node_type` is not integer.
        """
        if self._working_mode == 'server':
-            raise Exception("This method is not supported when working mode is server")
+            raise Exception("This method is not supported when working mode is server.")
        return self._graph_data.get_all_nodes(node_type).as_array()

    @check_gnn_get_all_edges
@ -136,7 +136,7 @@ class GraphData:
            TypeError: If `edge_type` is not integer.
        """
        if self._working_mode == 'server':
-            raise Exception("This method is not supported when working mode is server")
+            raise Exception("This method is not supported when working mode is server.")
        return self._graph_data.get_all_edges(edge_type).as_array()

    @check_gnn_get_nodes_from_edges
@ -154,7 +154,7 @@ class GraphData:
            TypeError: If `edge_list` is not list or ndarray.
        """
        if self._working_mode == 'server':
-            raise Exception("This method is not supported when working mode is server")
+            raise Exception("This method is not supported when working mode is server.")
        return self._graph_data.get_nodes_from_edges(edge_list).as_array()

    @check_gnn_get_all_neighbors
@ -181,7 +181,7 @@ class GraphData:
            TypeError: If `neighbor_type` is not integer.
        """
        if self._working_mode == 'server':
-            raise Exception("This method is not supported when working mode is server")
+            raise Exception("This method is not supported when working mode is server.")
        return self._graph_data.get_all_neighbors(node_list, neighbor_type).as_array()

    @check_gnn_get_sampled_neighbors
@ -216,7 +216,7 @@ class GraphData:
            TypeError: If `neighbor_types` is not list or ndarray.
        """
        if self._working_mode == 'server':
-            raise Exception("This method is not supported when working mode is server")
+            raise Exception("This method is not supported when working mode is server.")
        return self._graph_data.get_sampled_neighbors(
            node_list, neighbor_nums, neighbor_types).as_array()

@ -246,7 +246,7 @@ class GraphData:
            TypeError: If `neg_neighbor_type` is not integer.
        """
        if self._working_mode == 'server':
-            raise Exception("This method is not supported when working mode is server")
+            raise Exception("This method is not supported when working mode is server.")
        return self._graph_data.get_neg_sampled_neighbors(
            node_list, neg_neighbor_num, neg_neighbor_type).as_array()

@ -274,7 +274,7 @@ class GraphData:
            TypeError: If `feature_types` is not list or ndarray.
        """
        if self._working_mode == 'server':
-            raise Exception("This method is not supported when working mode is server")
+            raise Exception("This method is not supported when working mode is server.")
        if isinstance(node_list, list):
            node_list = np.array(node_list, dtype=np.int32)
        return [
@ -306,7 +306,7 @@ class GraphData:
            TypeError: If `feature_types` is not list or ndarray.
        """
        if self._working_mode == 'server':
-            raise Exception("This method is not supported when working mode is server")
+            raise Exception("This method is not supported when working mode is server.")
        if isinstance(edge_list, list):
            edge_list = np.array(edge_list, dtype=np.int32)
        return [
@ -324,7 +324,7 @@ class GraphData:
            node_feature_type and edge_feature_type.
        """
        if self._working_mode == 'server':
-            raise Exception("This method is not supported when working mode is server")
+            raise Exception("This method is not supported when working mode is server.")
        return self._graph_data.graph_info()

    @check_gnn_random_walk
@ -360,6 +360,6 @@ class GraphData:
            TypeError: If `meta_path` is not list or ndarray.
        """
        if self._working_mode == 'server':
-            raise Exception("This method is not supported when working mode is server")
+            raise Exception("This method is not supported when working mode is server.")
        return self._graph_data.random_walk(target_nodes, meta_path, step_home_param, step_away_param,
                                            default_node).as_array()
--- a/mindspore/dataset/engine/iterators.py
+++ b/mindspore/dataset/engine/iterators.py
@ -29,20 +29,25 @@ from . import datasets as de

 _ITERATOR_CLEANUP = False

+
 def _set_iterator_cleanup():
    global _ITERATOR_CLEANUP
    _ITERATOR_CLEANUP = True

+
 def _unset_iterator_cleanup():
    global _ITERATOR_CLEANUP
    _ITERATOR_CLEANUP = False

+
 def check_iterator_cleanup():
    global _ITERATOR_CLEANUP
    return _ITERATOR_CLEANUP

+
 ITERATORS_LIST = list()

+
 def _cleanup():
    """Release all the Iterator."""
    _set_iterator_cleanup()
@ -51,6 +56,7 @@ def _cleanup():
        if itr is not None:
            itr.release()

+
 def alter_tree(node):
    """Traversing the Python dataset tree/graph to perform some alteration to some specific nodes."""
    if not node.children:
@ -73,6 +79,7 @@ def _alter_node(node):
            node.iterator_bootstrap()
    return node

+
 class Iterator:
    """
    General Iterator over a dataset.
@ -93,7 +100,7 @@ class Iterator:

        # The dataset passed into the iterator is not the root of the tree.
        # Trim the tree by saving the parent subtree into self.parent_subtree and
-        # restore it after launching our c++ pipeline.
+        # restore it after launching our C++ pipeline.
        if self.dataset.parent:
            logger.info("The dataset passed in is not the root of the pipeline. Ignoring parent subtree.")
            self.parent_subtree = self.dataset.parent
@ -101,7 +108,7 @@ class Iterator:

        self.dataset = alter_tree(self.dataset)
        if not self.__is_tree():
-            raise ValueError("The data pipeline is not a tree (i.e., one node has 2 consumers)")
+            raise ValueError("The data pipeline is not a tree (i.e., one node has 2 consumers).")
        self.depipeline = DEPipeline()

        # for manifest temporary use
@ -116,7 +123,7 @@ class Iterator:
        """
        Manually terminate Python iterator instead of relying on out of scope destruction.
        """
-        logger.info("terminating Python iterator. This will also terminate c++ pipeline.")
+        logger.info("Terminating Python iterator. This will also terminate C++ pipeline.")
        if hasattr(self, 'depipeline') and self.depipeline:
            del self.depipeline

@ -205,7 +212,7 @@ class Iterator:
        elif isinstance(dataset, de.CSVDataset):
            op_type = OpName.CSV
        else:
-            raise ValueError("Unsupported DatasetOp")
+            raise ValueError("Unsupported DatasetOp.")

        return op_type

@ -256,9 +263,9 @@ class Iterator:

    def __next__(self):
        if not self.depipeline:
-            logger.warning("Iterator does not have a running c++ pipeline." +
-                           "It can be because Iterator stop() had been called, or c++ pipeline crashed silently.")
-            raise RuntimeError("Iterator does not have a running c++ pipeline.")
+            logger.warning("Iterator does not have a running C++ pipeline." +
+                           "It might because Iterator stop() had been called, or C++ pipeline crashed silently.")
+            raise RuntimeError("Iterator does not have a running C++ pipeline.")

        data = self.get_next()
        if not data:
@ -298,6 +305,7 @@ class Iterator:
    def __deepcopy__(self, memo):
        return self

+
 class SaveOp(Iterator):
    """
    The derived class of Iterator with dict type.
@ -375,7 +383,7 @@ class TupleIterator(Iterator):
        return [Tensor(t.as_array()) for t in self.depipeline.GetNextAsList()]


-class DummyIterator():
+class DummyIterator:
    """
    A DummyIterator only work when env MS_ROLE="MS_PSERVER" or MS_ROLE="MS_SCHED"
    """
--- a/mindspore/dataset/engine/samplers.py
+++ b/mindspore/dataset/engine/samplers.py
@ -24,6 +24,7 @@ import numpy as np
 import mindspore._c_dataengine as cde
 import mindspore.dataset as ds

+
 class Sampler:
    """
    Base class for user defined sampler.
@ -245,22 +246,22 @@ class DistributedSampler(BuiltinSampler):

    def __init__(self, num_shards, shard_id, shuffle=True, num_samples=None, offset=-1):
        if num_shards <= 0:
-            raise ValueError("num_shards should be a positive integer value, but got num_shards={}".format(num_shards))
+            raise ValueError("num_shards should be a positive integer value, but got num_shards:{}.".format(num_shards))

        if shard_id < 0 or shard_id >= num_shards:
-            raise ValueError("shard_id is invalid, shard_id={}".format(shard_id))
+            raise ValueError("shard_id should in range [0, {}], but got shard_id: {}.".format(num_shards, shard_id))

        if not isinstance(shuffle, bool):
-            raise ValueError("shuffle should be a boolean value, but got shuffle={}".format(shuffle))
+            raise ValueError("shuffle should be a boolean value, but got shuffle: {}.".format(shuffle))

        if num_samples is not None:
            if num_samples <= 0:
                raise ValueError("num_samples should be a positive integer "
-                                 "value, but got num_samples={}".format(num_samples))
+                                 "value, but got num_samples: {}.".format(num_samples))

        if offset > num_shards:
-            raise ValueError("offset should be no more than num_shards={}, "
-                             "but got offset={}".format(num_shards, offset))
+            raise ValueError("offset should be no more than num_shards: {}, "
+                             "but got offset: {}".format(num_shards, offset))

        self.num_shards = num_shards
        self.shard_id = shard_id
@ -332,18 +333,18 @@ class PKSampler(BuiltinSampler):

    def __init__(self, num_val, num_class=None, shuffle=False, class_column='label', num_samples=None):
        if num_val <= 0:
-            raise ValueError("num_val should be a positive integer value, but got num_val={}".format(num_val))
+            raise ValueError("num_val should be a positive integer value, but got num_val: {}.".format(num_val))

        if num_class is not None:
-            raise NotImplementedError("Not support specify num_class")
+            raise NotImplementedError("Not supported to specify num_class for PKSampler.")

        if not isinstance(shuffle, bool):
-            raise ValueError("shuffle should be a boolean value, but got shuffle={}".format(shuffle))
+            raise ValueError("shuffle should be a boolean value, but got shuffle: {}.".format(shuffle))

        if num_samples is not None:
            if num_samples <= 0:
                raise ValueError("num_samples should be a positive integer "
-                                 "value, but got num_samples={}".format(num_samples))
+                                 "value, but got num_samples: {}.".format(num_samples))

        self.num_val = num_val
        self.shuffle = shuffle
@ -372,7 +373,7 @@ class PKSampler(BuiltinSampler):
    def create_for_minddataset(self):
        if not self.class_column or not isinstance(self.class_column, str):
            raise ValueError("class_column should be a not empty string value, \
-                    but got class_column={}".format(class_column))
+                    but got class_column: {}.".format(class_column))
        num_samples = self.num_samples if self.num_samples is not None else 0
        c_sampler = cde.MindrecordPkSampler(self.num_val, self.class_column, self.shuffle, num_samples)
        c_child_sampler = self.create_child_for_minddataset()
@ -404,12 +405,12 @@ class RandomSampler(BuiltinSampler):

    def __init__(self, replacement=False, num_samples=None):
        if not isinstance(replacement, bool):
-            raise ValueError("replacement should be a boolean value, but got replacement={}".format(replacement))
+            raise ValueError("replacement should be a boolean value, but got replacement: {}.".format(replacement))

        if num_samples is not None:
            if num_samples <= 0:
                raise ValueError("num_samples should be a positive integer "
-                                 "value, but got num_samples={}".format(num_samples))
+                                 "value, but got num_samples: {}.".format(num_samples))

        self.deterministic = False
        self.replacement = replacement
@ -462,12 +463,12 @@ class SequentialSampler(BuiltinSampler):
        if num_samples is not None:
            if num_samples <= 0:
                raise ValueError("num_samples should be a positive integer "
-                                 "value, but got num_samples={}".format(num_samples))
+                                 "value, but got num_samples: {}.".format(num_samples))

        if start_index is not None:
            if start_index < 0:
                raise ValueError("start_index should be a positive integer "
-                                 "value or 0, but got start_index={}".format(start_index))
+                                 "value or 0, but got start_index: {}.".format(start_index))

        self.start_index = start_index
        super().__init__(num_samples)
@ -517,7 +518,7 @@ class SubsetRandomSampler(BuiltinSampler):
        >>> indices = [0, 1, 2, 3, 7, 88, 119]
        >>>
        >>> # creates a SubsetRandomSampler, will sample from the provided indices
-        >>> sampler = ds.SubsetRandomSampler()
+        >>> sampler = ds.SubsetRandomSampler(indices)
        >>> data = ds.ImageFolderDataset(dataset_dir, num_parallel_workers=8, sampler=sampler)
    """

@ -525,7 +526,7 @@ class SubsetRandomSampler(BuiltinSampler):
        if num_samples is not None:
            if num_samples <= 0:
                raise ValueError("num_samples should be a positive integer "
-                                 "value, but got num_samples={}".format(num_samples))
+                                 "value, but got num_samples: {}.".format(num_samples))

        if not isinstance(indices, list):
            indices = [indices]
@ -595,24 +596,24 @@ class WeightedRandomSampler(BuiltinSampler):
        for ind, w in enumerate(weights):
            if not isinstance(w, numbers.Number):
                raise TypeError("type of weights element should be number, "
-                                "but got w[{}]={}, type={}".format(ind, w, type(w)))
+                                "but got w[{}]: {}, type: {}.".format(ind, w, type(w)))

        if weights == []:
            raise ValueError("weights size should not be 0")

        if list(filter(lambda x: x < 0, weights)) != []:
-            raise ValueError("weights should not contain negative numbers")
+            raise ValueError("weights should not contain negative numbers.")

        if list(filter(lambda x: x == 0, weights)) == weights:
-            raise ValueError("elements of weights should not be all zero")
+            raise ValueError("elements of weights should not be all zeros.")

        if num_samples is not None:
            if num_samples <= 0:
                raise ValueError("num_samples should be a positive integer "
-                                 "value, but got num_samples={}".format(num_samples))
+                                 "value, but got num_samples: {}.".format(num_samples))

        if not isinstance(replacement, bool):
-            raise ValueError("replacement should be a boolean value, but got replacement={}".format(replacement))
+            raise ValueError("replacement should be a boolean value, but got replacement: {}.".format(replacement))

        self.weights = weights
        self.replacement = replacement
--- a/mindspore/dataset/engine/serializer_deserializer.py
+++ b/mindspore/dataset/engine/serializer_deserializer.py
@ -348,15 +348,15 @@ def create_node(node):

    elif dataset_op == 'CacheDataset':
        # Member function cache() is not defined in class Dataset yet.
-        raise RuntimeError(dataset_op + " is not yet supported")
+        raise RuntimeError(dataset_op + " is not yet supported.")

    elif dataset_op == 'FilterDataset':
        # Member function filter() is not defined in class Dataset yet.
-        raise RuntimeError(dataset_op + " is not yet supported")
+        raise RuntimeError(dataset_op + " is not yet supported.")

    elif dataset_op == 'TakeDataset':
        # Member function take() is not defined in class Dataset yet.
-        raise RuntimeError(dataset_op + " is not yet supported")
+        raise RuntimeError(dataset_op + " is not yet supported.")

    elif dataset_op == 'ZipDataset':
        # Create ZipDataset instance, giving dummy input dataset that will be overrided in the caller.
@ -376,7 +376,7 @@ def create_node(node):
        pyobj = de.Dataset().to_device()

    else:
-        raise RuntimeError(dataset_op + " is not yet supported by ds.engine.deserialize()")
+        raise RuntimeError(dataset_op + " is not yet supported by ds.engine.deserialize().")

    return pyobj

@ -401,7 +401,7 @@ def construct_sampler(in_sampler):
        elif sampler_name == 'WeightedRandomSampler':
            sampler = sampler_class(in_sampler['weights'], in_sampler['num_samples'], in_sampler.get('replacement'))
        else:
-            raise ValueError("Sampler type is unknown: " + sampler_name)
+            raise ValueError("Sampler type is unknown: {}.".format(sampler_name))

    return sampler

@ -461,7 +461,7 @@ def construct_tensor_ops(operations):
            result.append(op_class())

        elif op_name == 'CHW2HWC':
-            raise ValueError("Tensor op is not supported: " + op_name)
+            raise ValueError("Tensor op is not supported: {}.".format(op_name))

        elif op_name == 'OneHot':
            result.append(op_class(op['num_classes']))
@ -474,6 +474,6 @@ def construct_tensor_ops(operations):
            result.append(op_class(op['padding'], op['fill_value'], Border(op['padding_mode'])))

        else:
-            raise ValueError("Tensor op name is unknown: " + op_name)
+            raise ValueError("Tensor op name is unknown: {}.".format(op_name))

    return result
--- a/mindspore/dataset/engine/validators.py
+++ b/mindspore/dataset/engine/validators.py
@ -134,7 +134,7 @@ def check_tfrecorddataset(method):

        dataset_files = param_dict.get('dataset_files')
        if not isinstance(dataset_files, (str, list)):
-            raise TypeError("dataset_files should be of type str or a list of strings.")
+            raise TypeError("dataset_files should be type str or a list of strings.")

        validate_dataset_param_value(nreq_param_int, param_dict, int)
        validate_dataset_param_value(nreq_param_list, param_dict, list)
@ -173,11 +173,11 @@ def check_vocdataset(method):
        if task == "Segmentation":
            imagesets_file = os.path.join(dataset_dir, "ImageSets", "Segmentation", usage + ".txt")
            if param_dict.get('class_indexing') is not None:
-                raise ValueError("class_indexing is invalid in Segmentation task")
+                raise ValueError("class_indexing is not supported in Segmentation task.")
        elif task == "Detection":
            imagesets_file = os.path.join(dataset_dir, "ImageSets", "Main", usage + ".txt")
        else:
-            raise ValueError("Invalid task : " + task)
+            raise ValueError("Invalid task : " + task + ".")

        check_file(imagesets_file)

@ -214,7 +214,7 @@ def check_cocodataset(method):
        type_check(task, (str,), "task")

        if task not in {'Detection', 'Stuff', 'Panoptic', 'Keypoint'}:
-            raise ValueError("Invalid task type")
+            raise ValueError("Invalid task type: " + task + ".")

        validate_dataset_param_value(nreq_param_int, param_dict, int)

@ -222,7 +222,7 @@ def check_cocodataset(method):

        sampler = param_dict.get('sampler')
        if sampler is not None and isinstance(sampler, samplers.PKSampler):
-            raise ValueError("CocoDataset doesn't support PKSampler")
+            raise ValueError("CocoDataset doesn't support PKSampler.")
        check_sampler_shuffle_shard_options(param_dict)

        cache = param_dict.get('cache')
@ -256,13 +256,13 @@ def check_celebadataset(method):

        usage = param_dict.get('usage')
        if usage is not None and usage not in ('all', 'train', 'valid', 'test'):
-            raise ValueError("usage should be one of 'all', 'train', 'valid' or 'test'.")
+            raise ValueError("usage should be 'all', 'train', 'valid' or 'test'.")

        check_sampler_shuffle_shard_options(param_dict)

        sampler = param_dict.get('sampler')
        if sampler is not None and isinstance(sampler, samplers.PKSampler):
-            raise ValueError("CelebADataset does not support PKSampler.")
+            raise ValueError("CelebADataset doesn't support PKSampler.")

        cache = param_dict.get('cache')
        check_cache_option(cache)
@ -350,14 +350,14 @@ def check_generatordataset(method):
            try:
                iter(source)
            except TypeError:
-                raise TypeError("source should be callable, iterable or random accessible")
+                raise TypeError("source should be callable, iterable or random accessible.")

        column_names = param_dict.get('column_names')
        if column_names is not None:
            check_columns(column_names, "column_names")
        schema = param_dict.get('schema')
        if column_names is None and schema is None:
-            raise ValueError("Neither columns_names not schema are provided.")
+            raise ValueError("Neither columns_names nor schema are provided.")

        if schema is not None:
            if not isinstance(schema, datasets.Schema) and not isinstance(schema, str):
@ -375,7 +375,7 @@ def check_generatordataset(method):
        shard_id = param_dict.get("shard_id")
        if (num_shards is None) != (shard_id is None):
            # These two parameters appear together.
-            raise ValueError("num_shards and shard_id need to be passed in together")
+            raise ValueError("num_shards and shard_id need to be passed in together.")
        if num_shards is not None:
            check_pos_int32(num_shards, "num_shards")
            if shard_id >= num_shards:
@ -384,19 +384,19 @@ def check_generatordataset(method):
        sampler = param_dict.get("sampler")
        if sampler is not None:
            if isinstance(sampler, samplers.PKSampler):
-                raise ValueError("PKSampler is not supported by GeneratorDataset")
+                raise ValueError("GeneratorDataset doesn't support PKSampler.")
            if not isinstance(sampler, (samplers.SequentialSampler, samplers.DistributedSampler,
                                        samplers.RandomSampler, samplers.SubsetRandomSampler,
                                        samplers.WeightedRandomSampler, samplers.Sampler)):
                try:
                    iter(sampler)
                except TypeError:
-                    raise TypeError("sampler should be either iterable or from mindspore.dataset.samplers")
+                    raise TypeError("sampler should be either iterable or from mindspore.dataset.samplers.")

        if sampler is not None and not hasattr(source, "__getitem__"):
-            raise ValueError("sampler is not supported if source does not have attribute '__getitem__'")
+            raise ValueError("sampler is not supported if source does not have attribute '__getitem__'.")
        if num_shards is not None and not hasattr(source, "__getitem__"):
-            raise ValueError("num_shards is not supported if source does not have attribute '__getitem__'")
+            raise ValueError("num_shards is not supported if source does not have attribute '__getitem__'.")

        return method(self, *args, **kwargs)

@ -433,7 +433,7 @@ def check_pad_info(key, val):
    type_check(key, (str,), "key in pad_info")

    if val is not None:
-        assert len(val) == 2, "value of pad_info should be a tuple of size 2"
+        assert len(val) == 2, "value of pad_info should be a tuple of size 2."
        type_check(val, (tuple,), "value in pad_info")

        if val[0] is not None:
@ -521,14 +521,14 @@ def check_batch(method):
        if callable(batch_size):
            sig = ins.signature(batch_size)
            if len(sig.parameters) != 1:
-                raise ValueError("batch_size callable should take one parameter (BatchInfo).")
+                raise ValueError("callable batch_size should take one parameter (BatchInfo).")

        if num_parallel_workers is not None:
            check_num_parallel_workers(num_parallel_workers)
        type_check(drop_remainder, (bool,), "drop_remainder")

        if (pad_info is not None) and (per_batch_map is not None):
-            raise ValueError("pad_info and per_batch_map can't both be set")
+            raise ValueError("pad_info and per_batch_map can't both be set.")

        if pad_info is not None:
            type_check(param_dict["pad_info"], (dict,), "pad_info")
@ -542,7 +542,7 @@ def check_batch(method):
        if input_columns is not None:
            check_columns(input_columns, "input_columns")
            if len(input_columns) != (len(ins.signature(per_batch_map).parameters) - 1):
-                raise ValueError("the signature of per_batch_map should match with input columns")
+                raise ValueError("The signature of per_batch_map should match with input columns.")

        if output_columns is not None:
            check_columns(output_columns, "output_columns")
@ -816,13 +816,13 @@ def check_add_column(method):
        type_check(name, (str,), "name")

        if not name:
-            raise TypeError("Expected non-empty string.")
+            raise TypeError("Expected non-empty string for column name.")

        if de_type is not None:
            if not isinstance(de_type, typing.Type) and not check_valid_detype(de_type):
-                raise TypeError("Unknown column type.")
+                raise TypeError("Unknown column type: {}.".format(de_type))
        else:
-            raise TypeError("Expected non-empty string.")
+            raise TypeError("Expected non-empty string for de_type.")

        if shape is not None:
            type_check(shape, (list,), "shape")
@ -848,12 +848,12 @@ def check_cluedataset(method):
        # check task
        task_param = param_dict.get('task')
        if task_param not in ['AFQMC', 'TNEWS', 'IFLYTEK', 'CMNLI', 'WSC', 'CSL']:
-            raise ValueError("task should be AFQMC, TNEWS, IFLYTEK, CMNLI, WSC or CSL")
+            raise ValueError("task should be 'AFQMC', 'TNEWS', 'IFLYTEK', 'CMNLI', 'WSC' or 'CSL'.")

        # check usage
        usage_param = param_dict.get('usage')
        if usage_param not in ['train', 'test', 'eval']:
-            raise ValueError("usage should be train, test or eval")
+            raise ValueError("usage should be 'train', 'test' or 'eval'.")

        validate_dataset_param_value(nreq_param_int, param_dict, int)
        check_sampler_shuffle_shard_options(param_dict)
@ -883,7 +883,7 @@ def check_csvdataset(method):
        field_delim = param_dict.get('field_delim')
        type_check(field_delim, (str,), 'field delim')
        if field_delim in ['"', '\r', '\n'] or len(field_delim) > 1:
-            raise ValueError("field_delim is not legal.")
+            raise ValueError("field_delim is invalid.")

        # check column_defaults
        column_defaults = param_dict.get('column_defaults')
@ -892,7 +892,7 @@ def check_csvdataset(method):
                raise TypeError("column_defaults should be type of list.")
            for item in column_defaults:
                if not isinstance(item, (str, int, float)):
-                    raise TypeError("column type is not legal.")
+                    raise TypeError("column type in column_defaults is invalid.")

        # check column_names: must be list of string.
        column_names = param_dict.get("column_names")
@ -997,7 +997,7 @@ def check_gnn_graphdata(method):
            raise ValueError("The hostname is illegal")
        type_check(working_mode, (str,), "working_mode")
        if working_mode not in {'local', 'client', 'server'}:
-            raise ValueError("Invalid working mode, please enter 'local', 'client' or 'server'")
+            raise ValueError("Invalid working mode, please enter 'local', 'client' or 'server'.")
        type_check(port, (int,), "port")
        check_value(port, (1024, 65535), "port")
        type_check(num_client, (int,), "num_client")
@ -1073,17 +1073,17 @@ def check_gnn_get_sampled_neighbors(method):

        check_gnn_list_or_ndarray(neighbor_nums, 'neighbor_nums')
        if not neighbor_nums or len(neighbor_nums) > 6:
-            raise ValueError("Wrong number of input members for {0}, should be between 1 and 6, got {1}".format(
+            raise ValueError("Wrong number of input members for {0}, should be between 1 and 6, got {1}.".format(
                'neighbor_nums', len(neighbor_nums)))

        check_gnn_list_or_ndarray(neighbor_types, 'neighbor_types')
        if not neighbor_types or len(neighbor_types) > 6:
-            raise ValueError("Wrong number of input members for {0}, should be between 1 and 6, got {1}".format(
+            raise ValueError("Wrong number of input members for {0}, should be between 1 and 6, got {1}.".format(
                'neighbor_types', len(neighbor_types)))

        if len(neighbor_nums) != len(neighbor_types):
            raise ValueError(
-                "The number of members of neighbor_nums and neighbor_types is inconsistent")
+                "The number of members of neighbor_nums and neighbor_types is inconsistent.")

        return method(self, *args, **kwargs)

@ -1139,17 +1139,17 @@ def check_aligned_list(param, param_name, member_type):
            check_aligned_list(member, param_name, member_type)

            if member_have_list not in (None, True):
-                raise TypeError("The type of each member of the parameter {0} is inconsistent".format(
+                raise TypeError("The type of each member of the parameter {0} is inconsistent.".format(
                    param_name))
            if list_len is not None and len(member) != list_len:
-                raise TypeError("The size of each member of parameter {0} is inconsistent".format(
+                raise TypeError("The size of each member of parameter {0} is inconsistent.".format(
                    param_name))
            member_have_list = True
            list_len = len(member)
        else:
            type_check(member, (member_type,), param_name)
            if member_have_list not in (None, False):
-                raise TypeError("The type of each member of the parameter {0} is inconsistent".format(
+                raise TypeError("The type of each member of the parameter {0} is inconsistent.".format(
                    param_name))
            member_have_list = False

@ -1248,7 +1248,7 @@ def check_paddeddataset(method):

        padded_samples = param_dict.get("padded_samples")
        if not padded_samples:
-            raise ValueError("Argument padded_samples cannot be empty")
+            raise ValueError("padded_samples cannot be empty.")
        type_check(padded_samples, (list,), "padded_samples")
        type_check(padded_samples[0], (dict,), "padded_element")
        return method(self, *args, **kwargs)
@ -1261,6 +1261,6 @@ def check_cache_option(cache):
    if cache is not None:
        if os.getenv('MS_ENABLE_CACHE') != 'TRUE':
            # temporary disable cache feature in the current release
-            raise ValueError("Caching is disabled in the current release")
+            raise ValueError("Caching is disabled in the current release.")
        from . import cache_client
        type_check(cache, (cache_client.DatasetCache,), "cache")
--- a/mindspore/dataset/text/transforms.py
+++ b/mindspore/dataset/text/transforms.py
@ -257,7 +257,7 @@ class JiebaTokenizer(cde.JiebaTokenizerOp):
            for k, v in user_dict.items():
                self.add_word(k, v)
        else:
-            raise ValueError("the type of user_dict must str or dict")
+            raise TypeError("The type of user_dict must str or dict.")

    def __add_dict_py_file(self, file_path):
        """Add user defined word by file"""
@ -273,7 +273,7 @@ class JiebaTokenizer(cde.JiebaTokenizerOp):
        """parser user defined word by file"""
        if not os.path.exists(file_path):
            raise ValueError(
-                "user dict file {} is not exist".format(file_path))
+                "user dict file {} is not exist.".format(file_path))
        real_file_path = os.path.realpath(file_path)
        file_dict = open(real_file_path)
        data_re = re.compile('^(.+?)( [0-9]+)?$', re.U)
@ -285,7 +285,7 @@ class JiebaTokenizer(cde.JiebaTokenizerOp):
            words = data_re.match(data).groups()
            if len(words) != 2:
                raise ValueError(
-                    "user dict file {} format error".format(real_file_path))
+                    "user dict file {} format error.".format(real_file_path))
            words_list.append(words)
        file_dict.close()
        return words_list
@ -295,14 +295,14 @@ class JiebaTokenizer(cde.JiebaTokenizerOp):
        try:
            data = data.decode('utf-8')
        except UnicodeDecodeError:
-            raise ValueError("user dict file must utf8")
+            raise ValueError("user dict file must be utf8 format.")
        return data.lstrip('\ufeff')

    def __check_path__(self, model_path):
        """check model path"""
        if not os.path.exists(model_path):
            raise ValueError(
-                " jieba mode file {} is not exist".format(model_path))
+                " jieba mode file {} is not exist.".format(model_path))


 class UnicodeCharTokenizer(cde.UnicodeCharTokenizerOp):
@ -528,7 +528,7 @@ if platform.system().lower() != 'windows':

        def __init__(self, normalize_form=NormalizeForm.NFKC):
            if not isinstance(normalize_form, NormalizeForm):
-                raise TypeError("Wrong input type for normalization_form, should be NormalizeForm.")
+                raise TypeError("Wrong input type for normalization_form, should be enum of 'NormalizeForm'.")

            self.normalize_form = DE_C_INTER_NORMALIZE_FORM[normalize_form]
            super().__init__(self.normalize_form)
@ -650,7 +650,7 @@ if platform.system().lower() != 'windows':
        def __init__(self, lower_case=False, keep_whitespace=False, normalization_form=NormalizeForm.NONE,
                     preserve_unused_token=True, with_offsets=False):
            if not isinstance(normalization_form, NormalizeForm):
-                raise TypeError("Wrong input type for normalization_form, should be NormalizeForm.")
+                raise TypeError("Wrong input type for normalization_form, should be enum of 'NormalizeForm'.")

            self.lower_case = lower_case
            self.keep_whitespace = keep_whitespace
@ -710,7 +710,7 @@ if platform.system().lower() != 'windows':
                     lower_case=False, keep_whitespace=False, normalization_form=NormalizeForm.NONE,
                     preserve_unused_token=True, with_offsets=False):
            if not isinstance(normalization_form, NormalizeForm):
-                raise TypeError("Wrong input type for normalization_form, should be NormalizeForm.")
+                raise TypeError("Wrong input type for normalization_form, should be enum of 'NormalizeForm'.")

            self.vocab = vocab
            self.suffix_indicator = suffix_indicator
--- a/mindspore/dataset/text/validators.py
+++ b/mindspore/dataset/text/validators.py
@ -417,7 +417,7 @@ def check_python_tokenizer(method):
        [tokenizer], _ = parse_user_args(method, *args, **kwargs)

        if not callable(tokenizer):
-            raise TypeError("tokenizer is not a callable Python function")
+            raise TypeError("tokenizer is not a callable Python function.")

        return method(self, *args, **kwargs)

@ -437,8 +437,7 @@ def check_from_dataset_sentencepiece(method):
        if vocab_size is not None:
            check_uint32(vocab_size, "vocab_size")
        else:
-            raise TypeError("vocab_size must be provided")
-
+            raise TypeError("vocab_size must be provided.")

        if character_coverage is not None:
            type_check(character_coverage, (float,), "character_coverage")
--- a/mindspore/dataset/transforms/py_transforms_util.py
+++ b/mindspore/dataset/transforms/py_transforms_util.py
@ -49,7 +49,7 @@ def compose(transforms, *args):

        if all_numpy(args):
            return args
-        raise TypeError('args should be Numpy ndarray. Got {}. Append ToTensor() to transforms'.format(type(args)))
+        raise TypeError('args should be Numpy ndarray. Got {}. Append ToTensor() to transforms.'.format(type(args)))
    raise TypeError('args should be Numpy ndarray. Got {}.'.format(type(args)))


--- a/mindspore/dataset/vision/py_transforms.py
+++ b/mindspore/dataset/vision/py_transforms.py
@ -971,7 +971,7 @@ class Cutout:
            np_img (numpy.ndarray), NumPy image array with square patches cut out.
        """
        if not isinstance(np_img, np.ndarray):
-            raise TypeError('img should be NumPy array. Got {}'.format(type(np_img)))
+            raise TypeError("img should be NumPy array. Got {}.".format(type(np_img)))
        _, image_h, image_w = np_img.shape
        scale = (self.length * self.length) / (image_h * image_w)
        bounded = False
--- a/mindspore/dataset/vision/py_transforms_util.py
+++ b/mindspore/dataset/vision/py_transforms_util.py
@ -26,7 +26,7 @@ from PIL import Image, ImageOps, ImageEnhance, __version__
 from .utils import Inter
 from ..core.py_util_helpers import is_numpy

-augment_error_message = 'img should be PIL image. Got {}. Use Decode() for encoded data or ToPIL() for decoded data.'
+augment_error_message = "img should be PIL image. Got {}. Use Decode() for encoded data or ToPIL() for decoded data."


 def is_pil(img):
@ -55,19 +55,19 @@ def normalize(img, mean, std):
        img (numpy.ndarray), Normalized image.
    """
    if not is_numpy(img):
-        raise TypeError('img should be NumPy image. Got {}'.format(type(img)))
+        raise TypeError("img should be NumPy image. Got {}.".format(type(img)))

    num_channels = img.shape[0]  # shape is (C, H, W)

    if len(mean) != len(std):
-        raise ValueError("Length of mean and std must be equal")
+        raise ValueError("Length of mean and std must be equal.")
    # if length equal to 1, adjust the mean and std arrays to have the correct
    # number of channels (replicate the values)
    if len(mean) == 1:
        mean = [mean[0]] * num_channels
        std = [std[0]] * num_channels
    elif len(mean) != num_channels:
-        raise ValueError("Length of mean and std must both be 1 or equal to the number of channels({0})"
+        raise ValueError("Length of mean and std must both be 1 or equal to the number of channels({0})."
                         .format(num_channels))

    mean = np.array(mean, dtype=img.dtype)
@ -108,7 +108,7 @@ def hwc_to_chw(img):
    """
    if is_numpy(img):
        return img.transpose(2, 0, 1).copy()
-    raise TypeError('img should be NumPy array. Got {}'.format(type(img)))
+    raise TypeError('img should be NumPy array. Got {}.'.format(type(img)))


 def to_tensor(img, output_type):
@ -123,11 +123,11 @@ def to_tensor(img, output_type):
        img (numpy.ndarray), Converted image.
    """
    if not (is_pil(img) or is_numpy(img)):
-        raise TypeError('img should be PIL image or NumPy array. Got {}'.format(type(img)))
+        raise TypeError("img should be PIL image or NumPy array. Got {}.".format(type(img)))

    img = np.asarray(img)
    if img.ndim not in (2, 3):
-        raise ValueError('img dimension should be 2 or 3. Got {}'.format(img.ndim))
+        raise ValueError("img dimension should be 2 or 3. Got {}.".format(img.ndim))

    if img.ndim == 2:
        img = img[:, :, None]
@ -265,7 +265,7 @@ def resize(img, size, interpolation=Inter.BILINEAR):
        raise TypeError(augment_error_message.format(type(img)))
    if not (isinstance(size, int) or (isinstance(size, (list, tuple)) and len(size) == 2)):
        raise TypeError('Size should be a single number or a list/tuple (h, w) of length 2.'
-                        'Got {}'.format(size))
+                        'Got {}.'.format(size))

    if isinstance(size, int):
        img_width, img_height = img.size
@ -424,7 +424,7 @@ def random_crop(img, size, padding, pad_if_needed, fill_value, padding_mode):
        img_width, img_height = img.size
        height, width = size
        if height > img_height or width > img_width:
-            raise ValueError("Crop size {} is larger than input image size {}".format(size, (img_height, img_width)))
+            raise ValueError("Crop size {} is larger than input image size {}.".format(size, (img_height, img_width)))

        if width == img_width and height == img_height:
            return 0, 0, img_height, img_width
@ -558,7 +558,7 @@ def to_type(img, output_type):
        img (numpy.ndarray), Converted image.
    """
    if not is_numpy(img):
-        raise TypeError('img should be NumPy image. Got {}'.format(type(img)))
+        raise TypeError("img should be NumPy image. Got {}.".format(type(img)))

    return img.astype(output_type)

@ -632,7 +632,7 @@ def random_color_adjust(img, brightness, contrast, saturation, hue):
        elif isinstance(value, (list, tuple)) and len(value) == 2:
            if not bound[0] <= value[0] <= value[1] <= bound[1]:
                raise ValueError("Please check your value range of {} is valid and "
-                                 "within the bound {}".format(input_name, bound))
+                                 "within the bound {}.".format(input_name, bound))
        else:
            raise TypeError("Input of {} should be either a single value, or a list/tuple of "
                            "length 2.".format(input_name))
@ -695,7 +695,7 @@ def random_rotation(img, degrees, resample, expand, center, fill_value):
        if len(degrees) != 2:
            raise ValueError("If degrees is a sequence, the length must be 2.")
    else:
-        raise TypeError("Degrees must be a single non-negative number or a sequence")
+        raise TypeError("Degrees must be a single non-negative number or a sequence.")

    angle = random.uniform(degrees[0], degrees[1])
    return rotate(img, angle, resample, expand, center, fill_value)
@ -729,7 +729,7 @@ def five_crop(img, size):
    img_width, img_height = img.size
    crop_height, crop_width = size
    if crop_height > img_height or crop_width > img_width:
-        raise ValueError("Crop size {} is larger than input image size {}".format(size, (img_height, img_width)))
+        raise ValueError("Crop size {} is larger than input image size {}.".format(size, (img_height, img_width)))
    center = center_crop(img, (crop_height, crop_width))
    top_left = img.crop((0, 0, crop_width, crop_height))
    top_right = img.crop((img_width - crop_width, 0, img_width, crop_height))
@ -802,7 +802,7 @@ def grayscale(img, num_output_channels):
        np_img = np.dstack([np_gray, np_gray, np_gray])
        img = Image.fromarray(np_img, 'RGB')
    else:
-        raise ValueError('num_output_channels should be either 1 or 3. Got {}'.format(num_output_channels))
+        raise ValueError('num_output_channels should be either 1 or 3. Got {}.'.format(num_output_channels))

    return img

@ -859,7 +859,7 @@ def pad(img, padding, fill_value, padding_mode):
        raise TypeError("fill_value can be any of: an integer, a string or a tuple.")

    if padding_mode not in ['constant', 'edge', 'reflect', 'symmetric']:
-        raise ValueError("Padding mode can be any of ['constant', 'edge', 'reflect', 'symmetric'].")
+        raise ValueError("Padding mode should be 'constant', 'edge', 'reflect', or 'symmetric'.")

    if padding_mode == 'constant':
        if img.mode == 'P':
@ -946,7 +946,7 @@ def get_erase_params(np_img, scale, ratio, value, bounded, max_attempts):
    """Helper function to get parameters for RandomErasing/ Cutout.
    """
    if not is_numpy(np_img):
-        raise TypeError('img should be NumPy array. Got {}'.format(type(np_img)))
+        raise TypeError('img should be NumPy array. Got {}.'.format(type(np_img)))

    image_c, image_h, image_w = np_img.shape
    area = image_h * image_w
@ -1009,7 +1009,7 @@ def erase(np_img, i, j, height, width, erase_value, inplace=False):
        np_img (numpy.ndarray), Erased NumPy image array.
    """
    if not is_numpy(np_img):
-        raise TypeError('img should be NumPy array. Got {}'.format(type(np_img)))
+        raise TypeError('img should be NumPy array. Got {}.'.format(type(np_img)))

    if not inplace:
        np_img = np_img.copy()
@ -1111,7 +1111,7 @@ def random_affine(img, angle, translations, scale, shear, resample, fill_value=0
    else:
        raise ValueError(
            "Shear should be a single value or a tuple/list containing " +
-            "two values. Got {}".format(shear))
+            "two values. Got {}.".format(shear))

    scale = 1.0 / scale

@ -1239,13 +1239,13 @@ def rgb_to_hsvs(np_rgb_imgs, is_hwc):
        np_hsv_imgs (numpy.ndarray), NumPy HSV images with same type of np_rgb_imgs.
    """
    if not is_numpy(np_rgb_imgs):
-        raise TypeError('img should be NumPy image. Got {}'.format(type(np_rgb_imgs)))
+        raise TypeError("img should be NumPy image. Got {}".format(type(np_rgb_imgs)))

    shape_size = len(np_rgb_imgs.shape)

    if not shape_size in (3, 4):
-        raise TypeError('img shape should be (H, W, C)/(N, H, W, C)/(C ,H, W)/(N, C, H, W). \
-                         Got {}'.format(np_rgb_imgs.shape))
+        raise TypeError("img shape should be (H, W, C)/(N, H, W, C)/(C ,H, W)/(N, C, H, W). \
+                         Got {}.".format(np_rgb_imgs.shape))

    if shape_size == 3:
        batch_size = 0
@ -1261,7 +1261,7 @@ def rgb_to_hsvs(np_rgb_imgs, is_hwc):
            num_channels = np_rgb_imgs.shape[1]

    if num_channels != 3:
-        raise TypeError('img should be 3 channels RGB img. Got {} channels'.format(num_channels))
+        raise TypeError("img should be 3 channels RGB img. Got {} channels.".format(num_channels))
    if batch_size == 0:
        return rgb_to_hsv(np_rgb_imgs, is_hwc)
    return np.array([rgb_to_hsv(img, is_hwc) for img in np_rgb_imgs])
@ -1307,13 +1307,13 @@ def hsv_to_rgbs(np_hsv_imgs, is_hwc):
        np_rgb_imgs (numpy.ndarray), NumPy RGB images with same type of np_hsv_imgs.
    """
    if not is_numpy(np_hsv_imgs):
-        raise TypeError('img should be NumPy image. Got {}'.format(type(np_hsv_imgs)))
+        raise TypeError("img should be NumPy image. Got {}.".format(type(np_hsv_imgs)))

    shape_size = len(np_hsv_imgs.shape)

    if not shape_size in (3, 4):
-        raise TypeError('img shape should be (H, W, C)/(N, H, W, C)/(C, H, W)/(N, C, H, W). \
-                         Got {}'.format(np_hsv_imgs.shape))
+        raise TypeError("img shape should be (H, W, C)/(N, H, W, C)/(C, H, W)/(N, C, H, W). \
+                         Got {}.".format(np_hsv_imgs.shape))

    if shape_size == 3:
        batch_size = 0
@ -1329,7 +1329,7 @@ def hsv_to_rgbs(np_hsv_imgs, is_hwc):
            num_channels = np_hsv_imgs.shape[1]

    if num_channels != 3:
-        raise TypeError('img should be 3 channels RGB img. Got {} channels'.format(num_channels))
+        raise TypeError("img should be 3 channels RGB img. Got {} channels.".format(num_channels))
    if batch_size == 0:
        return hsv_to_rgb(np_hsv_imgs, is_hwc)
    return np.array([hsv_to_rgb(img, is_hwc) for img in np_hsv_imgs])
@ -1349,7 +1349,7 @@ def random_color(img, degrees):
    """

    if not is_pil(img):
-        raise TypeError('img should be PIL image. Got {}'.format(type(img)))
+        raise TypeError("img should be PIL image. Got {}.".format(type(img)))

    v = (degrees[1] - degrees[0]) * random.random() + degrees[0]
    return ImageEnhance.Color(img).enhance(v)
@ -1369,7 +1369,7 @@ def random_sharpness(img, degrees):
    """

    if not is_pil(img):
-        raise TypeError('img should be PIL image. Got {}'.format(type(img)))
+        raise TypeError("img should be PIL image. Got {}.".format(type(img)))

    v = (degrees[1] - degrees[0]) * random.random() + degrees[0]
    return ImageEnhance.Sharpness(img).enhance(v)
@ -1390,7 +1390,7 @@ def auto_contrast(img, cutoff, ignore):
    """

    if not is_pil(img):
-        raise TypeError('img should be PIL image. Got {}'.format(type(img)))
+        raise TypeError("img should be PIL image. Got {}.".format(type(img)))

    return ImageOps.autocontrast(img, cutoff, ignore)

@ -1408,7 +1408,7 @@ def invert_color(img):
    """

    if not is_pil(img):
-        raise TypeError('img should be PIL image. Got {}'.format(type(img)))
+        raise TypeError("img should be PIL image. Got {}.".format(type(img)))

    return ImageOps.invert(img)

@ -1426,7 +1426,7 @@ def equalize(img):
    """

    if not is_pil(img):
-        raise TypeError('img should be PIL image. Got {}'.format(type(img)))
+        raise TypeError("img should be PIL image. Got {}.".format(type(img)))

    return ImageOps.equalize(img)

--- a/mindspore/dataset/vision/validators.py
+++ b/mindspore/dataset/vision/validators.py
@ -79,7 +79,7 @@ def check_mix_up_batch_c(method):

 def check_normalize_c_param(mean, std):
    if len(mean) != len(std):
-        raise ValueError("Length of mean and std must be equal")
+        raise ValueError("Length of mean and std must be equal.")
    for mean_value in mean:
        check_pos_float32(mean_value)
    for std_value in std:
@ -88,7 +88,7 @@ def check_normalize_c_param(mean, std):

 def check_normalize_py_param(mean, std):
    if len(mean) != len(std):
-        raise ValueError("Length of mean and std must be equal")
+        raise ValueError("Length of mean and std must be equal.")
    for mean_value in mean:
        check_value(mean_value, [0., 1.], "mean_value")
    for std_value in std:
@ -372,7 +372,7 @@ def check_num_channels(method):
        if num_output_channels is not None:
            if num_output_channels not in (1, 3):
                raise ValueError("Number of channels of the output grayscale image"
-                                 "should be either 1 or 3. Got {0}".format(num_output_channels))
+                                 "should be either 1 or 3. Got {0}.".format(num_output_channels))

        return method(self, *args, **kwargs)

@ -471,7 +471,7 @@ def check_linear_transform(method):

        if transformation_matrix.shape[0] != transformation_matrix.shape[1]:
            raise ValueError("transformation_matrix should be a square matrix. "
-                             "Got shape {} instead".format(transformation_matrix.shape))
+                             "Got shape {} instead.".format(transformation_matrix.shape))
        if mean_vector.shape[0] != transformation_matrix.shape[0]:
            raise ValueError("mean_vector length {0} should match either one dimension of the square"
                             "transformation_matrix {1}.".format(mean_vector.shape[0], transformation_matrix.shape))
@ -556,7 +556,7 @@ def check_uniform_augment_cpp(method):
        check_positive(num_ops, "num_ops")

        if num_ops > len(transforms):
-            raise ValueError("num_ops is greater than transforms list size")
+            raise ValueError("num_ops is greater than transforms list size.")
        type_check_list(transforms, (TensorOp,), "tensor_ops")

        return method(self, *args, **kwargs)
@ -693,11 +693,11 @@ def check_random_solarize(method):
        type_check(threshold, (tuple,), "threshold")
        type_check_list(threshold, (int,), "threshold")
        if len(threshold) != 2:
-            raise ValueError("threshold must be a sequence of two numbers")
+            raise ValueError("threshold must be a sequence of two numbers.")
        for element in threshold:
            check_value(element, (0, UINT8_MAX))
        if threshold[1] < threshold[0]:
-            raise ValueError("threshold must be in min max format numbers")
+            raise ValueError("threshold must be in min max format numbers.")

        return method(self, *args, **kwargs)

--- a/tests/ut/python/dataset/test_c_compose.py
+++ b/tests/ut/python/dataset/test_c_compose.py
@ -41,7 +41,7 @@ def test_compose():
    # test one python transform followed by a C transform. type after oneHot is float (mixed use-case)
    assert test_config([1, 0], [py_ops.OneHotOp(2), ops.TypeCast(mstype.int32)]) == [[[0, 1]], [[1, 0]]]
    # test exceptions. compose, randomApply randomChoice use the same validator
-    assert "op_list[0] is not a c_transform op" in test_config([1, 0], [1, ops.TypeCast(mstype.int32)])
+    assert "op_list[0] is neither a c_transform op" in test_config([1, 0], [1, ops.TypeCast(mstype.int32)])
    # test empty op list
    assert "op_list can not be empty." in test_config([1, 0], [])

--- a/tests/ut/python/dataset/test_compose.py
+++ b/tests/ut/python/dataset/test_compose.py
@ -63,7 +63,7 @@ def test_compose():
    # Test exceptions.
    with pytest.raises(TypeError) as error_info:
        c_transforms.Compose([1, c_transforms.TypeCast(mstype.int32)])
-    assert "op_list[0] is not a c_transform op (TensorOp) nor a callable pyfunc." in str(error_info.value)
+    assert "op_list[0] is neither a c_transform op (TensorOp) nor a callable pyfunc." in str(error_info.value)

    # Test empty op list
    with pytest.raises(ValueError) as error_info:
--- a/tests/ut/python/dataset/test_datasets_generator.py
+++ b/tests/ut/python/dataset/test_datasets_generator.py
@ -510,7 +510,8 @@ def test_generator_error_3():

        for _ in data1:
            pass
-    assert "When (len(input_columns) != len(output_columns)), column_order must be specified." in str(info.value)
+    assert "When length of input_columns and output_columns are not equal, column_order must be specified." in \
+           str(info.value)


 def test_generator_error_4():
--- a/tests/ut/python/dataset/test_minddataset_exception.py
+++ b/tests/ut/python/dataset/test_minddataset_exception.py
@ -279,7 +279,7 @@ def test_cv_minddataset_partition_num_samples_equals_0():
    with pytest.raises(Exception) as error_info:
        partitions(5)
    try:
-        assert 'num_samples should be a positive integer value, but got num_samples=0' in str(error_info.value)
+        assert 'num_samples should be a positive integer value, but got num_samples: 0.' in str(error_info.value)
    except Exception as error:
        os.remove(CV_FILE_NAME)
        os.remove("{}.db".format(CV_FILE_NAME))
--- a/tests/ut/python/dataset/test_normalizeOp.py
+++ b/tests/ut/python/dataset/test_normalizeOp.py
@ -242,7 +242,7 @@ def test_normalize_exception_unequal_size_c():
        _ = c_vision.Normalize([100, 250, 125], [50, 50, 75, 75])
    except ValueError as e:
        logger.info("Got an exception in DE: {}".format(str(e)))
-        assert str(e) == "Length of mean and std must be equal"
+        assert str(e) == "Length of mean and std must be equal."


 def test_normalize_exception_unequal_size_py():
@ -255,7 +255,7 @@ def test_normalize_exception_unequal_size_py():
        _ = py_vision.Normalize([0.50, 0.30, 0.75], [0.18, 0.32, 0.71, 0.72])
    except ValueError as e:
        logger.info("Got an exception in DE: {}".format(str(e)))
-        assert str(e) == "Length of mean and std must be equal"
+        assert str(e) == "Length of mean and std must be equal."


 def test_normalize_exception_invalid_size_py():
--- a/tests/ut/python/dataset/test_paddeddataset.py
+++ b/tests/ut/python/dataset/test_paddeddataset.py
@ -483,7 +483,7 @@ def test_clue_padded_and_skip_with_0_samples():
        count += 1
    assert count == 0

-    with pytest.raises(ValueError, match="There is no samples in the "):
+    with pytest.raises(ValueError, match="There are no samples in the "):
        dataset = dataset.concat(data_copy1)
        count = 0
        for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True):
--- a/tests/ut/python/dataset/test_random_select_subpolicy.py
+++ b/tests/ut/python/dataset/test_random_select_subpolicy.py
@ -41,8 +41,8 @@ def test_random_select_subpolicy():
    # test exceptions
    assert "policy can not be empty." in test_config([[1, 2, 3]], [])
    assert "policy[0] can not be empty." in test_config([[1, 2, 3]], [[]])
-    assert "op of (op, prob) in policy[1][0] is not a c_transform op (TensorOp) nor a callable pyfunc" in test_config(
-        [[1, 2, 3]], [[(ops.PadEnd([4], 0), 0.5)], [(1, 0.4)]])
+    assert "op of (op, prob) in policy[1][0] is neither a c_transform op (TensorOp) nor a callable pyfunc" \
+        in test_config([[1, 2, 3]], [[(ops.PadEnd([4], 0), 0.5)], [(1, 0.4)]])
    assert "prob of (op, prob) policy[1][0] is not within the required interval of (0 to 1)" in test_config([[1]], [
        [(ops.Duplicate(), 0)], [(ops.Duplicate(), -0.1)]])