From 9b3c33e157ffdb487930235560ae86a7e795a7c3 Mon Sep 17 00:00:00 2001 From: xiefangqi Date: Sat, 5 Sep 2020 10:56:38 +0800 Subject: [PATCH] minddata iterator output ms_tensor --- mindspore/dataset/engine/datasets.py | 31 ++--- mindspore/dataset/engine/iterators.py | 19 ++-- mindspore/train/dataset_helper.py | 5 +- model_zoo/official/cv/faster_rcnn/eval.py | 4 +- model_zoo/official/cv/maskrcnn/eval.py | 2 +- .../official/cv/mobilenetv2/src/dataset.py | 2 +- model_zoo/official/cv/resnext50/eval.py | 2 +- model_zoo/official/cv/ssd/eval.py | 2 +- model_zoo/official/cv/vgg16/eval.py | 2 +- .../official/cv/yolov3_darknet53/eval.py | 6 +- .../official/cv/yolov3_darknet53/train.py | 2 +- .../cv/yolov3_darknet53_quant/eval.py | 6 +- .../cv/yolov3_darknet53_quant/train.py | 2 +- model_zoo/official/cv/yolov3_resnet18/eval.py | 2 +- model_zoo/official/gnn/bgcf/train.py | 2 +- model_zoo/official/nlp/bert/run_classifier.py | 3 +- model_zoo/official/nlp/bert/run_ner.py | 3 +- model_zoo/official/nlp/bert/run_squad.py | 2 +- .../nlp/mass/src/transformer/infer_mass.py | 4 +- .../official/nlp/tinybert/run_task_distill.py | 3 +- model_zoo/official/nlp/tinybert/src/utils.py | 2 +- model_zoo/official/nlp/transformer/eval.py | 2 +- .../create_dataset.py | 2 +- model_zoo/utils/graph_to_mindrecord/reader.py | 2 +- .../aclImdb/create_dataset.py | 2 +- .../aclImdb_preprocess/create_dataset.py | 2 +- tests/st/ops/ascend/test_tdt_data_ms.py | 2 +- tests/st/probability/test_bnn_layer.py | 4 +- tests/st/probability/test_gpu_svi_cvae.py | 2 +- tests/st/probability/test_gpu_svi_vae.py | 2 +- .../probability/test_transform_bnn_layer.py | 4 +- .../probability/test_transform_bnn_model.py | 4 +- tests/st/probability/test_uncertainty.py | 2 +- tests/st/pynative/test_pynative_resnet50.py | 4 +- tests/ut/python/dataset/test_HWC2CHW.py | 6 +- tests/ut/python/dataset/test_apply.py | 6 +- tests/ut/python/dataset/test_autocontrast.py | 30 ++--- .../dataset/test_bounding_box_augment.py | 15 ++- .../dataset/test_bucket_batch_by_length.py | 20 ++-- tests/ut/python/dataset/test_c_compose.py | 2 +- .../ut/python/dataset/test_c_random_apply.py | 2 +- .../ut/python/dataset/test_c_random_choice.py | 2 +- tests/ut/python/dataset/test_center_crop.py | 8 +- tests/ut/python/dataset/test_concat.py | 76 +++++++------ .../ut/python/dataset/test_concatenate_op.py | 12 +- tests/ut/python/dataset/test_config.py | 8 +- tests/ut/python/dataset/test_cut_out.py | 9 +- .../ut/python/dataset/test_cutmix_batch_op.py | 48 ++++---- .../dataset/test_dataset_numpy_slices.py | 34 +++--- .../ut/python/dataset/test_datasets_celeba.py | 4 +- .../python/dataset/test_datasets_cifarop.py | 15 +-- tests/ut/python/dataset/test_datasets_clue.py | 42 +++---- tests/ut/python/dataset/test_datasets_coco.py | 8 +- tests/ut/python/dataset/test_datasets_csv.py | 26 ++--- .../python/dataset/test_datasets_generator.py | 54 ++++----- .../dataset/test_datasets_imagefolder.py | 6 +- .../dataset/test_datasets_manifestop.py | 10 +- .../ut/python/dataset/test_datasets_mnist.py | 10 +- .../python/dataset/test_datasets_sharding.py | 16 +-- .../dataset/test_datasets_textfileop.py | 24 ++-- .../python/dataset/test_datasets_tfrecord.py | 16 +-- tests/ut/python/dataset/test_datasets_voc.py | 8 +- tests/ut/python/dataset/test_decode.py | 6 +- tests/ut/python/dataset/test_duplicate_op.py | 2 +- tests/ut/python/dataset/test_epoch_ctrl.py | 54 ++++----- tests/ut/python/dataset/test_equalize.py | 30 ++--- tests/ut/python/dataset/test_fill_op.py | 8 +- tests/ut/python/dataset/test_filterop.py | 36 +++--- tests/ut/python/dataset/test_five_crop.py | 3 +- tests/ut/python/dataset/test_flat_map.py | 4 +- tests/ut/python/dataset/test_from_dataset.py | 8 +- tests/ut/python/dataset/test_graphdata.py | 2 +- .../dataset/test_graphdata_distributed.py | 2 +- tests/ut/python/dataset/test_invert.py | 24 ++-- tests/ut/python/dataset/test_iterator.py | 52 ++++++++- .../dataset/test_linear_transformation.py | 3 +- tests/ut/python/dataset/test_mask_op.py | 2 +- tests/ut/python/dataset/test_minddataset.py | 95 ++++++++-------- .../dataset/test_minddataset_exception.py | 8 +- .../dataset/test_minddataset_multi_images.py | 4 +- ...st_minddataset_multi_images_and_ndarray.py | 2 +- .../python/dataset/test_minddataset_padded.py | 22 ++-- .../dataset/test_minddataset_sampler.py | 56 ++++----- .../dataset/test_mixup_label_smoothing.py | 8 +- tests/ut/python/dataset/test_mixup_op.py | 64 +++++------ tests/ut/python/dataset/test_ngram_op.py | 6 +- tests/ut/python/dataset/test_nlp.py | 4 +- tests/ut/python/dataset/test_normalizeOp.py | 6 +- tests/ut/python/dataset/test_opt.py | 2 +- tests/ut/python/dataset/test_opt_pass.py | 10 +- tests/ut/python/dataset/test_pad.py | 7 +- tests/ut/python/dataset/test_pad_batch.py | 14 +-- tests/ut/python/dataset/test_pad_end_op.py | 2 +- tests/ut/python/dataset/test_paddeddataset.py | 30 ++--- tests/ut/python/dataset/test_pair_truncate.py | 3 +- tests/ut/python/dataset/test_pyfunc.py | 20 ++-- .../python/dataset/test_python_tokenizer.py | 2 +- tests/ut/python/dataset/test_random_affine.py | 6 +- tests/ut/python/dataset/test_random_apply.py | 3 +- tests/ut/python/dataset/test_random_choice.py | 6 +- tests/ut/python/dataset/test_random_color.py | 14 ++- .../dataset/test_random_color_adjust.py | 3 +- tests/ut/python/dataset/test_random_crop.py | 9 +- .../dataset/test_random_crop_and_resize.py | 9 +- .../test_random_crop_and_resize_with_bbox.py | 9 +- .../dataset/test_random_crop_decode_resize.py | 3 +- .../dataset/test_random_crop_with_bbox.py | 15 ++- .../ut/python/dataset/test_random_erasing.py | 3 +- .../python/dataset/test_random_grayscale.py | 6 +- .../dataset/test_random_horizontal_flip.py | 6 +- .../test_random_horizontal_flip_with_bbox.py | 12 +- tests/ut/python/dataset/test_random_order.py | 3 +- .../python/dataset/test_random_perspective.py | 3 +- .../python/dataset/test_random_posterize.py | 9 +- tests/ut/python/dataset/test_random_resize.py | 3 +- .../dataset/test_random_resize_with_bbox.py | 9 +- .../ut/python/dataset/test_random_rotation.py | 9 +- .../dataset/test_random_select_subpolicy.py | 2 +- .../python/dataset/test_random_sharpness.py | 18 +-- .../python/dataset/test_random_solarize_op.py | 9 +- .../dataset/test_random_vertical_flip.py | 6 +- .../test_random_vertical_flip_with_bbox.py | 12 +- tests/ut/python/dataset/test_rename.py | 2 +- tests/ut/python/dataset/test_repeat.py | 24 ++-- tests/ut/python/dataset/test_rescale_op.py | 5 +- tests/ut/python/dataset/test_resize.py | 6 +- .../python/dataset/test_resize_with_bbox.py | 9 +- tests/ut/python/dataset/test_rgb_hsv.py | 4 +- tests/ut/python/dataset/test_sampler.py | 14 +-- tests/ut/python/dataset/test_save_op.py | 14 +-- .../dataset/test_sentencepiece_tokenizer.py | 20 ++-- .../ut/python/dataset/test_serdes_dataset.py | 24 ++-- tests/ut/python/dataset/test_shuffle.py | 3 +- tests/ut/python/dataset/test_skip.py | 22 ++-- tests/ut/python/dataset/test_slice_op.py | 4 +- .../ut/python/dataset/test_sliding_window.py | 10 +- tests/ut/python/dataset/test_soft_dvpp.py | 9 +- tests/ut/python/dataset/test_split.py | 106 +++++++++--------- tests/ut/python/dataset/test_sync_wait.py | 15 +-- tests/ut/python/dataset/test_take.py | 32 +++--- tests/ut/python/dataset/test_ten_crop.py | 3 +- tests/ut/python/dataset/test_tensor_empty.py | 6 +- tests/ut/python/dataset/test_tensor_string.py | 26 ++--- .../dataset/test_text_basic_tokenizer.py | 4 +- .../dataset/test_text_bert_tokenizer.py | 4 +- .../dataset/test_text_jieba_tokenizer.py | 42 +++---- .../ut/python/dataset/test_text_tokenizer.py | 26 ++--- .../dataset/test_text_wordpiece_tokenizer.py | 4 +- tests/ut/python/dataset/test_to_number_op.py | 26 ++--- tests/ut/python/dataset/test_to_type.py | 3 +- tests/ut/python/dataset/test_type_cast.py | 6 +- .../ut/python/dataset/test_uniform_augment.py | 18 +-- tests/ut/python/dataset/test_var_batch_map.py | 20 ++-- tests/ut/python/dataset/test_vocab.py | 12 +- tests/ut/python/dataset/test_zip.py | 12 +- tests/ut/python/dataset/util.py | 14 +-- 156 files changed, 1073 insertions(+), 921 deletions(-) diff --git a/mindspore/dataset/engine/datasets.py b/mindspore/dataset/engine/datasets.py index 84afa96f81f..00b68878c18 100644 --- a/mindspore/dataset/engine/datasets.py +++ b/mindspore/dataset/engine/datasets.py @@ -394,7 +394,7 @@ class Dataset: logger.error("func must be a function.") raise TypeError("func must be a function.") - for row_data in self: + for row_data in self.create_tuple_iterator(output_numpy=True): if dataset is None: dataset = func(row_data) else: @@ -1133,7 +1133,7 @@ class Dataset: return SaveOp(self).save(file_names, file_type) - def create_tuple_iterator(self, columns=None, num_epochs=-1): + def create_tuple_iterator(self, columns=None, num_epochs=-1, output_numpy=False): """ Create an Iterator over the dataset. The data retrieved will be a list of ndarray of data. @@ -1143,8 +1143,11 @@ class Dataset: Args: columns (list[str], optional): List of columns to be used to specify the order of columns (default=None, means all columns). - num_epochs (int, optional): max epochs that iterator can be iteratered, - if num_epochs = -1, iterator can be iteratered infinit epochs (default=-1) + num_epochs (int, optional): maximum epochs that iterator can be iteratered, + if num_epochs = -1, iterator can be iteratered infinite epochs (default=-1) + output_numpy (bool, optional): Whether or not to output NumPy datatype, + if output_numpy=False, iterator will output MSTensor (default=False). + Returns: Iterator, list of ndarray. @@ -1161,9 +1164,9 @@ class Dataset: """ if self._noop_mode(): return DummyIterator(self, 'tuple') - return TupleIterator(self, columns, num_epochs) + return TupleIterator(self, columns, num_epochs, output_numpy) - def create_dict_iterator(self, num_epochs=-1): + def create_dict_iterator(self, num_epochs=-1, output_numpy=False): """ Create an Iterator over the dataset. @@ -1171,8 +1174,10 @@ class Dataset: of the columns in the dictionary may not be the same as the original order. Args: - num_epochs (int, optional): max epochs that iterator can be iteratered, - if num_epochs = -1, iterator can be iteratered infinit epochs (default=-1) + num_epochs (int, optional): maximum epochs that iterator can be iteratered, + if num_epochs = -1, iterator can be iteratered infinite epochs (default=-1) + output_numpy (bool, optional): Whether or not to output NumPy datatype, + if output_numpy=False, iterator will output MSTensor (default=False). Returns: Iterator, dictionary of column_name-ndarray pair. @@ -1190,7 +1195,7 @@ class Dataset: """ if self._noop_mode(): return DummyIterator(self, 'dict') - return DictIterator(self, num_epochs) + return DictIterator(self, num_epochs, output_numpy) def __iter__(self): """Create an Iterator over the dataset.""" @@ -1617,7 +1622,7 @@ class BucketBatchByLengthDataset(DatasetOp): """ if self.dataset_size is None: num_rows = 0 - for _ in self.create_dict_iterator(num_epochs=1): + for _ in self.create_dict_iterator(num_epochs=1, output_numpy=True): num_rows += 1 self.dataset_size = num_rows return self.dataset_size @@ -2163,7 +2168,7 @@ class FilterDataset(DatasetOp): """ if self.dataset_size is None: num_rows = 0 - for _ in self.create_dict_iterator(num_epochs=1): + for _ in self.create_dict_iterator(num_epochs=1, output_numpy=True): num_rows += 1 self.dataset_size = num_rows return self.dataset_size @@ -2400,7 +2405,7 @@ class ConcatDataset(DatasetOp): """ if self.dataset_size is None: num_rows = 0 - for _ in self.create_dict_iterator(num_epochs=1): + for _ in self.create_dict_iterator(num_epochs=1, output_numpy=True): num_rows += 1 self.dataset_size = num_rows return self.dataset_size @@ -3495,7 +3500,7 @@ class GeneratorDataset(MappableDataset): self.dataset_size = rows_from_sampler else: num_rows = 0 - for _ in self.create_dict_iterator(num_epochs=1): + for _ in self.create_dict_iterator(num_epochs=1, output_numpy=True): num_rows += 1 self.dataset_size = num_rows return self.dataset_size diff --git a/mindspore/dataset/engine/iterators.py b/mindspore/dataset/engine/iterators.py index d10cf80999f..6642a0396bd 100644 --- a/mindspore/dataset/engine/iterators.py +++ b/mindspore/dataset/engine/iterators.py @@ -67,8 +67,9 @@ class Iterator: dataset: Dataset to be iterated over """ - def __init__(self, dataset, num_epochs=-1): + def __init__(self, dataset, num_epochs=-1, output_numpy=False): self.num_epochs = num_epochs + self.output_numpy = output_numpy ITERATORS_LIST.append(weakref.ref(self)) # create a copy of tree and work on it. self.dataset = copy.deepcopy(dataset) @@ -305,8 +306,8 @@ class DictIterator(Iterator): """ The derived class of Iterator with dict type. """ - def __init__(self, dataset, num_epochs=-1): - super().__init__(dataset, num_epochs) + def __init__(self, dataset, num_epochs=-1, output_numpy=False): + super().__init__(dataset, num_epochs, output_numpy) self.depipeline.LaunchTreeExec() def check_node_type(self, node): @@ -323,7 +324,9 @@ class DictIterator(Iterator): Dict, the next record in the dataset. """ - return {k: v.as_array() for k, v in self.depipeline.GetNextAsMap().items()} + if self.output_numpy: + return {k: v.as_array() for k, v in self.depipeline.GetNextAsMap().items()} + return {k: Tensor(v.as_array()) for k, v in self.depipeline.GetNextAsMap().items()} class TupleIterator(Iterator): @@ -333,12 +336,12 @@ class TupleIterator(Iterator): def check_node_type(self, node): pass - def __init__(self, dataset, columns=None, num_epochs=-1): + def __init__(self, dataset, columns=None, num_epochs=-1, output_numpy=False): if columns is not None: if not isinstance(columns, list): columns = [columns] dataset = dataset.project(columns) - super().__init__(dataset, num_epochs) + super().__init__(dataset, num_epochs, output_numpy) self.depipeline.LaunchTreeExec() def __iter__(self): @@ -352,7 +355,9 @@ class TupleIterator(Iterator): List, the next record in the dataset. """ - return [t.as_array() for t in self.depipeline.GetNextAsList()] + if self.output_numpy: + return [t.as_array() for t in self.depipeline.GetNextAsList()] + return [Tensor(t.as_array()) for t in self.depipeline.GetNextAsList()] class DummyIterator(): diff --git a/mindspore/train/dataset_helper.py b/mindspore/train/dataset_helper.py index bed44552fc4..85f58590e5c 100644 --- a/mindspore/train/dataset_helper.py +++ b/mindspore/train/dataset_helper.py @@ -18,8 +18,7 @@ import os from mindspore._checkparam import check_bool, check_int from .. import context, nn -from ._utils import _exec_datagraph, _get_types_and_shapes, _to_tensor, \ - _construct_tensor_list +from ._utils import _exec_datagraph, _get_types_and_shapes, _construct_tensor_list from ..nn.wrap import GetNextSingleOp from ..parallel._utils import _get_device_num, _get_global_rank, _need_to_full, _to_full_shapes from ..ops import operations as P @@ -297,4 +296,4 @@ class _DatasetIterNormal: def __next__(self): data = self.iter.__next__() - return _to_tensor(data) + return data diff --git a/model_zoo/official/cv/faster_rcnn/eval.py b/model_zoo/official/cv/faster_rcnn/eval.py index d8d960e0c6e..5f227c1bcda 100644 --- a/model_zoo/official/cv/faster_rcnn/eval.py +++ b/model_zoo/official/cv/faster_rcnn/eval.py @@ -19,7 +19,7 @@ import argparse import time import numpy as np from pycocotools.coco import COCO -from mindspore import context, Tensor +from mindspore import context from mindspore.train.serialization import load_checkpoint, load_param_into_net from mindspore.common import set_seed @@ -68,7 +68,7 @@ def FasterRcnn_eval(dataset_path, ckpt_path, ann_file): start = time.time() # run net - output = net(Tensor(img_data), Tensor(img_metas), Tensor(gt_bboxes), Tensor(gt_labels), Tensor(gt_num)) + output = net(img_data, img_metas, gt_bboxes, gt_labels, gt_num) end = time.time() print("Iter {} cost time {}".format(eval_iter, end - start)) diff --git a/model_zoo/official/cv/maskrcnn/eval.py b/model_zoo/official/cv/maskrcnn/eval.py index c2d00cf2b48..6197f8e81b2 100644 --- a/model_zoo/official/cv/maskrcnn/eval.py +++ b/model_zoo/official/cv/maskrcnn/eval.py @@ -57,7 +57,7 @@ def MaskRcnn_eval(dataset_path, ckpt_path, ann_file): print("total images num: ", total) print("Processing, please wait a moment.") max_num = 128 - for data in ds.create_dict_iterator(): + for data in ds.create_dict_iterator(output_numpy=True): eval_iter = eval_iter + 1 img_data = data['image'] diff --git a/model_zoo/official/cv/mobilenetv2/src/dataset.py b/model_zoo/official/cv/mobilenetv2/src/dataset.py index dab64fd33df..db0f8c65bac 100644 --- a/model_zoo/official/cv/mobilenetv2/src/dataset.py +++ b/model_zoo/official/cv/mobilenetv2/src/dataset.py @@ -109,7 +109,7 @@ def extract_features(net, dataset_path, config): config=config, repeat_num=1) step_size = dataset.get_dataset_size() - pbar = tqdm(list(dataset.create_dict_iterator())) + pbar = tqdm(list(dataset.create_dict_iterator(output_numpy=True))) model = Model(net) i = 0 for data in pbar: diff --git a/model_zoo/official/cv/resnext50/eval.py b/model_zoo/official/cv/resnext50/eval.py index 88e7ce8e4bc..eab37f18812 100644 --- a/model_zoo/official/cv/resnext50/eval.py +++ b/model_zoo/official/cv/resnext50/eval.py @@ -146,7 +146,7 @@ def test(cloud_args=None): per_batch_size=args.per_batch_size, max_epoch=1, rank=args.rank, group_size=args.group_size, mode='eval') - eval_dataloader = de_dataset.create_tuple_iterator() + eval_dataloader = de_dataset.create_tuple_iterator(output_numpy=True) network = get_network(args.backbone, args.num_classes, platform=args.platform) if network is None: raise NotImplementedError('not implement {}'.format(args.backbone)) diff --git a/model_zoo/official/cv/ssd/eval.py b/model_zoo/official/cv/ssd/eval.py index 37b5092206c..87827cbd7b7 100644 --- a/model_zoo/official/cv/ssd/eval.py +++ b/model_zoo/official/cv/ssd/eval.py @@ -44,7 +44,7 @@ def ssd_eval(dataset_path, ckpt_path): print("\n========================================\n") print("total images num: ", total) print("Processing, please wait a moment.") - for data in ds.create_dict_iterator(): + for data in ds.create_dict_iterator(output_numpy=True): img_id = data['img_id'] img_np = data['image'] image_shape = data['image_shape'] diff --git a/model_zoo/official/cv/vgg16/eval.py b/model_zoo/official/cv/vgg16/eval.py index be9e6cbe123..1fef490b76f 100644 --- a/model_zoo/official/cv/vgg16/eval.py +++ b/model_zoo/official/cv/vgg16/eval.py @@ -159,7 +159,7 @@ def test(cloud_args=None): for model in args.models: dataset = classification_dataset(args.data_path, args.image_size, args.per_batch_size, mode='eval') - eval_dataloader = dataset.create_tuple_iterator() + eval_dataloader = dataset.create_tuple_iterator(output_numpy=True) network = vgg16(args.num_classes, args, phase="test") # pre_trained diff --git a/model_zoo/official/cv/yolov3_darknet53/eval.py b/model_zoo/official/cv/yolov3_darknet53/eval.py index f9d74ba1c1d..9ab935f9dc3 100644 --- a/model_zoo/official/cv/yolov3_darknet53/eval.py +++ b/model_zoo/official/cv/yolov3_darknet53/eval.py @@ -300,10 +300,10 @@ def test(): input_shape = Tensor(tuple(config.test_img_shape), ms.float32) args.logger.info('Start inference....') for i, data in enumerate(ds.create_dict_iterator()): - image = Tensor(data["image"]) + image = data["image"] - image_shape = Tensor(data["image_shape"]) - image_id = Tensor(data["img_id"]) + image_shape = data["image_shape"] + image_id = data["img_id"] prediction = network(image, input_shape) output_big, output_me, output_small = prediction diff --git a/model_zoo/official/cv/yolov3_darknet53/train.py b/model_zoo/official/cv/yolov3_darknet53/train.py index f3449908ea0..ccc53514e90 100644 --- a/model_zoo/official/cv/yolov3_darknet53/train.py +++ b/model_zoo/official/cv/yolov3_darknet53/train.py @@ -299,7 +299,7 @@ def train(): old_progress = -1 t_end = time.time() - data_loader = ds.create_dict_iterator() + data_loader = ds.create_dict_iterator(output_numpy=True) for i, data in enumerate(data_loader): images = data["image"] diff --git a/model_zoo/official/cv/yolov3_darknet53_quant/eval.py b/model_zoo/official/cv/yolov3_darknet53_quant/eval.py index a7a4297dbc2..0b652a39b60 100644 --- a/model_zoo/official/cv/yolov3_darknet53_quant/eval.py +++ b/model_zoo/official/cv/yolov3_darknet53_quant/eval.py @@ -306,10 +306,10 @@ def test(): input_shape = Tensor(tuple(config.test_img_shape), ms.float32) args.logger.info('Start inference....') for i, data in enumerate(ds.create_dict_iterator()): - image = Tensor(data["image"]) + image = data["image"] - image_shape = Tensor(data["image_shape"]) - image_id = Tensor(data["img_id"]) + image_shape = data["image_shape"] + image_id = data["img_id"] prediction = network(image, input_shape) output_big, output_me, output_small = prediction diff --git a/model_zoo/official/cv/yolov3_darknet53_quant/train.py b/model_zoo/official/cv/yolov3_darknet53_quant/train.py index c0a0ba5b305..63cd804e060 100644 --- a/model_zoo/official/cv/yolov3_darknet53_quant/train.py +++ b/model_zoo/official/cv/yolov3_darknet53_quant/train.py @@ -303,7 +303,7 @@ def train(): old_progress = -1 t_end = time.time() - data_loader = ds.create_dict_iterator() + data_loader = ds.create_dict_iterator(output_numpy=True) shape_record = ShapeRecord() for i, data in enumerate(data_loader): diff --git a/model_zoo/official/cv/yolov3_resnet18/eval.py b/model_zoo/official/cv/yolov3_resnet18/eval.py index 17823af0c6c..18ea4fbac14 100644 --- a/model_zoo/official/cv/yolov3_resnet18/eval.py +++ b/model_zoo/official/cv/yolov3_resnet18/eval.py @@ -44,7 +44,7 @@ def yolo_eval(dataset_path, ckpt_path): print("\n========================================\n") print("total images num: ", total) print("Processing, please wait a moment.") - for data in ds.create_dict_iterator(): + for data in ds.create_dict_iterator(output_numpy=True): img_np = data['image'] image_shape = data['image_shape'] annotation = data['annotation'] diff --git a/model_zoo/official/gnn/bgcf/train.py b/model_zoo/official/gnn/bgcf/train.py index 055aebb9684..dc34a12a2bc 100644 --- a/model_zoo/official/gnn/bgcf/train.py +++ b/model_zoo/official/gnn/bgcf/train.py @@ -52,7 +52,7 @@ def train_and_eval(): eval_class = BGCFEvaluate(parser, train_graph, test_graph, parser.Ks) - itr = train_ds.create_dict_iterator(parser.num_epoch) + itr = train_ds.create_dict_iterator(parser.num_epoch, output_numpy=True) num_iter = int(num_pairs / parser.batch_pairs) for _epoch in range(1, parser.num_epoch + 1): diff --git a/model_zoo/official/nlp/bert/run_classifier.py b/model_zoo/official/nlp/bert/run_classifier.py index dbc461773a1..bb2cc5ec152 100644 --- a/model_zoo/official/nlp/bert/run_classifier.py +++ b/model_zoo/official/nlp/bert/run_classifier.py @@ -29,7 +29,6 @@ from mindspore import context from mindspore import log as logger from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell from mindspore.nn.optim import AdamWeightDecay, Lamb, Momentum -from mindspore.common.tensor import Tensor from mindspore.train.model import Model from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, TimeMonitor from mindspore.train.serialization import load_checkpoint, load_param_into_net @@ -123,7 +122,7 @@ def do_eval(dataset=None, network=None, num_class=2, assessment_method="accuracy for data in dataset.create_dict_iterator(): input_data = [] for i in columns_list: - input_data.append(Tensor(data[i])) + input_data.append(data[i]) input_ids, input_mask, token_type_id, label_ids = input_data logits = model.predict(input_ids, input_mask, token_type_id, label_ids) callback.update(logits, label_ids) diff --git a/model_zoo/official/nlp/bert/run_ner.py b/model_zoo/official/nlp/bert/run_ner.py index 069201dfa3d..704be721ead 100644 --- a/model_zoo/official/nlp/bert/run_ner.py +++ b/model_zoo/official/nlp/bert/run_ner.py @@ -30,7 +30,6 @@ from mindspore import context from mindspore import log as logger from mindspore.nn.wrap.loss_scale import DynamicLossScaleUpdateCell from mindspore.nn.optim import AdamWeightDecay, Lamb, Momentum -from mindspore.common.tensor import Tensor from mindspore.train.model import Model from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, TimeMonitor from mindspore.train.serialization import load_checkpoint, load_param_into_net @@ -132,7 +131,7 @@ def do_eval(dataset=None, network=None, use_crf="", num_class=2, assessment_meth for data in dataset.create_dict_iterator(): input_data = [] for i in columns_list: - input_data.append(Tensor(data[i])) + input_data.append(data[i]) input_ids, input_mask, token_type_id, label_ids = input_data logits = model.predict(input_ids, input_mask, token_type_id, label_ids) callback.update(logits, label_ids) diff --git a/model_zoo/official/nlp/bert/run_squad.py b/model_zoo/official/nlp/bert/run_squad.py index 839412448ce..82859229d34 100644 --- a/model_zoo/official/nlp/bert/run_squad.py +++ b/model_zoo/official/nlp/bert/run_squad.py @@ -112,7 +112,7 @@ def do_eval(dataset=None, vocab_file="", eval_json="", load_checkpoint_path="", for data in dataset.create_dict_iterator(): input_data = [] for i in columns_list: - input_data.append(Tensor(data[i])) + input_data.append(data[i]) input_ids, input_mask, segment_ids, unique_ids = input_data start_positions = Tensor([1], mstype.float32) end_positions = Tensor([1], mstype.float32) diff --git a/model_zoo/official/nlp/mass/src/transformer/infer_mass.py b/model_zoo/official/nlp/mass/src/transformer/infer_mass.py index 6fe829c4e96..c71e7f5a364 100644 --- a/model_zoo/official/nlp/mass/src/transformer/infer_mass.py +++ b/model_zoo/official/nlp/mass/src/transformer/infer_mass.py @@ -107,7 +107,7 @@ def transformer_infer(config, dataset): probs = [] source_sentences = [] target_sentences = [] - for batch in dataset.create_dict_iterator(): + for batch in dataset.create_dict_iterator(output_numpy=True): source_sentences.append(batch["source_eos_ids"]) target_sentences.append(batch["target_eos_ids"]) @@ -232,7 +232,7 @@ def transformer_infer_ppl(config, dataset): lengths = [] source_sentences = [] target_sentences = [] - for batch in dataset.create_dict_iterator(): + for batch in dataset.create_dict_iterator(output_numpy=True): source_sentences.append(batch["source_eos_ids"]) target_sentences.append(batch["target_eos_ids"]) diff --git a/model_zoo/official/nlp/tinybert/run_task_distill.py b/model_zoo/official/nlp/tinybert/run_task_distill.py index 275207ca9e0..5ed4730d6ff 100644 --- a/model_zoo/official/nlp/tinybert/run_task_distill.py +++ b/model_zoo/official/nlp/tinybert/run_task_distill.py @@ -19,7 +19,6 @@ import os import re import argparse import mindspore.common.dtype as mstype -from mindspore import Tensor from mindspore import context from mindspore.train.model import Model from mindspore.train.callback import TimeMonitor @@ -282,7 +281,7 @@ def do_eval_standalone(): for data in eval_dataset.create_dict_iterator(): input_data = [] for i in columns_list: - input_data.append(Tensor(data[i])) + input_data.append(data[i]) input_ids, input_mask, token_type_id, label_ids = input_data logits = eval_model(input_ids, token_type_id, input_mask) callback.update(logits[3], label_ids) diff --git a/model_zoo/official/nlp/tinybert/src/utils.py b/model_zoo/official/nlp/tinybert/src/utils.py index 84746ae5132..eaa80aaf127 100644 --- a/model_zoo/official/nlp/tinybert/src/utils.py +++ b/model_zoo/official/nlp/tinybert/src/utils.py @@ -96,7 +96,7 @@ class EvalCallBack(Callback): for data in self.dataset.create_dict_iterator(): input_data = [] for i in columns_list: - input_data.append(Tensor(data[i])) + input_data.append(data[i]) input_ids, input_mask, token_type_id, label_ids = input_data self.network.set_train(False) logits = self.network(input_ids, token_type_id, input_mask) diff --git a/model_zoo/official/nlp/transformer/eval.py b/model_zoo/official/nlp/transformer/eval.py index 865c1307f94..9a65e3ebc57 100644 --- a/model_zoo/official/nlp/transformer/eval.py +++ b/model_zoo/official/nlp/transformer/eval.py @@ -113,7 +113,7 @@ def run_transformer_eval(): predictions = [] source_sents = [] target_sents = [] - for batch in dataset.create_dict_iterator(): + for batch in dataset.create_dict_iterator(output_numpy=True): source_sents.append(batch["source_eos_ids"]) target_sents.append(batch["target_eos_ids"]) source_ids = Tensor(batch["source_eos_ids"], mstype.int32) diff --git a/model_zoo/utils/cv_to_mindrecord/Caltech-UCSD-Birds-200-2011/create_dataset.py b/model_zoo/utils/cv_to_mindrecord/Caltech-UCSD-Birds-200-2011/create_dataset.py index e3fde9ffef2..49f1fca10b7 100644 --- a/model_zoo/utils/cv_to_mindrecord/Caltech-UCSD-Birds-200-2011/create_dataset.py +++ b/model_zoo/utils/cv_to_mindrecord/Caltech-UCSD-Birds-200-2011/create_dataset.py @@ -22,7 +22,7 @@ def create_dataset(data_file): num_parallel_workers=num_readers, shuffle=True) index = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(output_numpy=True): print("example {}: {}".format(index, item)) index += 1 if index % 1000 == 0: diff --git a/model_zoo/utils/graph_to_mindrecord/reader.py b/model_zoo/utils/graph_to_mindrecord/reader.py index 637ce41a171..d911ea904a4 100644 --- a/model_zoo/utils/graph_to_mindrecord/reader.py +++ b/model_zoo/utils/graph_to_mindrecord/reader.py @@ -28,7 +28,7 @@ args = parser.parse_args() data_set = ds.MindDataset(args.path) num_iter = 0 -for item in data_set.create_dict_iterator(): +for item in data_set.create_dict_iterator(output_numpy=True): print(item) num_iter += 1 print("Total items # is {}".format(num_iter)) diff --git a/model_zoo/utils/nlp_to_mindrecord/aclImdb/create_dataset.py b/model_zoo/utils/nlp_to_mindrecord/aclImdb/create_dataset.py index 3dba6c79657..8196fd58ac1 100644 --- a/model_zoo/utils/nlp_to_mindrecord/aclImdb/create_dataset.py +++ b/model_zoo/utils/nlp_to_mindrecord/aclImdb/create_dataset.py @@ -22,7 +22,7 @@ def create_dataset(data_file): num_parallel_workers=num_readers, shuffle=True) index = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(output_numpy=True): print("example {}: {}".format(index, item)) index += 1 if index % 1000 == 0: diff --git a/model_zoo/utils/nlp_to_mindrecord/aclImdb_preprocess/create_dataset.py b/model_zoo/utils/nlp_to_mindrecord/aclImdb_preprocess/create_dataset.py index 3dba6c79657..8196fd58ac1 100644 --- a/model_zoo/utils/nlp_to_mindrecord/aclImdb_preprocess/create_dataset.py +++ b/model_zoo/utils/nlp_to_mindrecord/aclImdb_preprocess/create_dataset.py @@ -22,7 +22,7 @@ def create_dataset(data_file): num_parallel_workers=num_readers, shuffle=True) index = 0 - for item in data_set.create_dict_iterator(): + for item in data_set.create_dict_iterator(output_numpy=True): print("example {}: {}".format(index, item)) index += 1 if index % 1000 == 0: diff --git a/tests/st/ops/ascend/test_tdt_data_ms.py b/tests/st/ops/ascend/test_tdt_data_ms.py index 8dd3d4e6918..9073c6a2afc 100644 --- a/tests/st/ops/ascend/test_tdt_data_ms.py +++ b/tests/st/ops/ascend/test_tdt_data_ms.py @@ -96,7 +96,7 @@ if __name__ == '__main__': dataset_types, dataset_shapes, (), 'dataset') ds1.send() - for data in data_set.create_tuple_iterator(): + for data in data_set.create_tuple_iterator(output_numpy=True): output = net() print(data[0].any()) print( diff --git a/tests/st/probability/test_bnn_layer.py b/tests/st/probability/test_bnn_layer.py index cdc16908c30..45f43607f24 100644 --- a/tests/st/probability/test_bnn_layer.py +++ b/tests/st/probability/test_bnn_layer.py @@ -92,7 +92,7 @@ class BNNLeNet5(nn.Cell): def train_model(train_net, net, dataset): accs = [] loss_sum = 0 - for _, data in enumerate(dataset.create_dict_iterator()): + for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)): train_x = Tensor(data['image'].astype(np.float32)) label = Tensor(data['label'].astype(np.int32)) loss = train_net(train_x, label) @@ -109,7 +109,7 @@ def train_model(train_net, net, dataset): def validate_model(net, dataset): accs = [] - for _, data in enumerate(dataset.create_dict_iterator()): + for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)): train_x = Tensor(data['image'].astype(np.float32)) label = Tensor(data['label'].astype(np.int32)) output = net(train_x) diff --git a/tests/st/probability/test_gpu_svi_cvae.py b/tests/st/probability/test_gpu_svi_cvae.py index 8dde2b35421..e050858f1a6 100644 --- a/tests/st/probability/test_gpu_svi_cvae.py +++ b/tests/st/probability/test_gpu_svi_cvae.py @@ -122,7 +122,7 @@ def test_svi_cvae(): sample_label = Tensor([i for i in range(0, 8)] * 8, dtype=mstype.int32) generated_sample = cvae.generate_sample(sample_label, 64, IMAGE_SHAPE) # test function: reconstruct_sample - for sample in ds_train.create_dict_iterator(): + for sample in ds_train.create_dict_iterator(output_numpy=True): sample_x = Tensor(sample['image'], dtype=mstype.float32) sample_y = Tensor(sample['label'], dtype=mstype.int32) reconstructed_sample = cvae.reconstruct_sample(sample_x, sample_y) diff --git a/tests/st/probability/test_gpu_svi_vae.py b/tests/st/probability/test_gpu_svi_vae.py index dd338cbc018..9faf9483ee6 100644 --- a/tests/st/probability/test_gpu_svi_vae.py +++ b/tests/st/probability/test_gpu_svi_vae.py @@ -110,7 +110,7 @@ def test_svi_vae(): # test function: generate_sample generated_sample = vae.generate_sample(64, IMAGE_SHAPE) # test function: reconstruct_sample - for sample in ds_train.create_dict_iterator(): + for sample in ds_train.create_dict_iterator(output_numpy=True): sample_x = Tensor(sample['image'], dtype=mstype.float32) reconstructed_sample = vae.reconstruct_sample(sample_x) print('The loss of the trained network is ', trained_loss) diff --git a/tests/st/probability/test_transform_bnn_layer.py b/tests/st/probability/test_transform_bnn_layer.py index 52f0edffa78..86f26330d5b 100644 --- a/tests/st/probability/test_transform_bnn_layer.py +++ b/tests/st/probability/test_transform_bnn_layer.py @@ -93,7 +93,7 @@ class LeNet5(nn.Cell): def train_model(train_net, net, dataset): accs = [] loss_sum = 0 - for _, data in enumerate(dataset.create_dict_iterator()): + for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)): train_x = Tensor(data['image'].astype(np.float32)) label = Tensor(data['label'].astype(np.int32)) loss = train_net(train_x, label) @@ -110,7 +110,7 @@ def train_model(train_net, net, dataset): def validate_model(net, dataset): accs = [] - for _, data in enumerate(dataset.create_dict_iterator()): + for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)): train_x = Tensor(data['image'].astype(np.float32)) label = Tensor(data['label'].astype(np.int32)) output = net(train_x) diff --git a/tests/st/probability/test_transform_bnn_model.py b/tests/st/probability/test_transform_bnn_model.py index 008802b3d5e..43693355ca8 100644 --- a/tests/st/probability/test_transform_bnn_model.py +++ b/tests/st/probability/test_transform_bnn_model.py @@ -92,7 +92,7 @@ class LeNet5(nn.Cell): def train_model(train_net, net, dataset): accs = [] loss_sum = 0 - for _, data in enumerate(dataset.create_dict_iterator()): + for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)): train_x = Tensor(data['image'].astype(np.float32)) label = Tensor(data['label'].astype(np.int32)) loss = train_net(train_x, label) @@ -109,7 +109,7 @@ def train_model(train_net, net, dataset): def validate_model(net, dataset): accs = [] - for _, data in enumerate(dataset.create_dict_iterator()): + for _, data in enumerate(dataset.create_dict_iterator(output_numpy=True)): train_x = Tensor(data['image'].astype(np.float32)) label = Tensor(data['label'].astype(np.int32)) output = net(train_x) diff --git a/tests/st/probability/test_uncertainty.py b/tests/st/probability/test_uncertainty.py index 2131c53a9b0..72c9b06b6e9 100644 --- a/tests/st/probability/test_uncertainty.py +++ b/tests/st/probability/test_uncertainty.py @@ -129,7 +129,7 @@ if __name__ == '__main__': epi_uncer_model_path=None, ale_uncer_model_path=None, save_model=False) - for eval_data in ds_eval.create_dict_iterator(): + for eval_data in ds_eval.create_dict_iterator(output_numpy=True): eval_data = Tensor(eval_data['image'], mstype.float32) epistemic_uncertainty = evaluation.eval_epistemic_uncertainty(eval_data) aleatoric_uncertainty = evaluation.eval_aleatoric_uncertainty(eval_data) diff --git a/tests/st/pynative/test_pynative_resnet50.py b/tests/st/pynative/test_pynative_resnet50.py index ace4676dad5..84c2d146ab3 100644 --- a/tests/st/pynative/test_pynative_resnet50.py +++ b/tests/st/pynative/test_pynative_resnet50.py @@ -423,8 +423,8 @@ def test_pynative_resnet50(): if step > max_step: break start_time = time.time() - input_data = Tensor(element["image"]) - input_label = Tensor(element["label"]) + input_data = element["image"] + input_label = element["label"] loss_output = net_with_criterion(input_data, input_label) grads = train_network(input_data, input_label) optimizer(grads) diff --git a/tests/ut/python/dataset/test_HWC2CHW.py b/tests/ut/python/dataset/test_HWC2CHW.py index 0c576ee1125..ac5936ad0ea 100644 --- a/tests/ut/python/dataset/test_HWC2CHW.py +++ b/tests/ut/python/dataset/test_HWC2CHW.py @@ -48,7 +48,8 @@ def test_HWC2CHW(plot=False): image_transposed = [] image = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): transposed_item = item1["image"].copy() original_item = item2["image"].copy() image_transposed.append(transposed_item.transpose(1, 2, 0)) @@ -105,7 +106,8 @@ def test_HWC2CHW_comp(plot=False): image_c_transposed = [] image_py_transposed = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): c_image = item1["image"] py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) diff --git a/tests/ut/python/dataset/test_apply.py b/tests/ut/python/dataset/test_apply.py index 38c61db5ae7..9c08376e84c 100644 --- a/tests/ut/python/dataset/test_apply.py +++ b/tests/ut/python/dataset/test_apply.py @@ -40,7 +40,8 @@ def test_apply_generator_case(): data2 = data2.repeat(2) data2 = data2.batch(4) - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): np.testing.assert_array_equal(item1["data"], item2["data"]) @@ -63,7 +64,8 @@ def test_apply_imagefolder_case(): data2 = data2.map(operations=normalize_op) data2 = data2.repeat(2) - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): np.testing.assert_array_equal(item1["image"], item2["image"]) diff --git a/tests/ut/python/dataset/test_autocontrast.py b/tests/ut/python/dataset/test_autocontrast.py index 6055ce90ea9..fd8a7d45c3f 100644 --- a/tests/ut/python/dataset/test_autocontrast.py +++ b/tests/ut/python/dataset/test_autocontrast.py @@ -48,10 +48,10 @@ def test_auto_contrast_py(plot=False): for idx, (image, _) in enumerate(ds_original): if idx == 0: - images_original = np.transpose(image, (0, 2, 3, 1)) + images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_original = np.append(images_original, - np.transpose(image, (0, 2, 3, 1)), + np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) # AutoContrast Images @@ -69,10 +69,10 @@ def test_auto_contrast_py(plot=False): for idx, (image, _) in enumerate(ds_auto_contrast): if idx == 0: - images_auto_contrast = np.transpose(image, (0, 2, 3, 1)) + images_auto_contrast = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_auto_contrast = np.append(images_auto_contrast, - np.transpose(image, (0, 2, 3, 1)), + np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) num_samples = images_original.shape[0] @@ -110,10 +110,10 @@ def test_auto_contrast_c(plot=False): for idx, (image, _) in enumerate(ds_auto_contrast_py): if idx == 0: - images_auto_contrast_py = image + images_auto_contrast_py = image.asnumpy() else: images_auto_contrast_py = np.append(images_auto_contrast_py, - image, + image.asnumpy(), axis=0) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) @@ -125,10 +125,10 @@ def test_auto_contrast_c(plot=False): for idx, (image, _) in enumerate(ds_auto_contrast_c): if idx == 0: - images_auto_contrast_c = image + images_auto_contrast_c = image.asnumpy() else: images_auto_contrast_c = np.append(images_auto_contrast_c, - image, + image.asnumpy(), axis=0) num_samples = images_auto_contrast_c.shape[0] @@ -170,10 +170,10 @@ def test_auto_contrast_one_channel_c(plot=False): for idx, (image, _) in enumerate(ds_auto_contrast_py): if idx == 0: - images_auto_contrast_py = image + images_auto_contrast_py = image.asnumpy() else: images_auto_contrast_py = np.append(images_auto_contrast_py, - image, + image.asnumpy(), axis=0) ds = de.ImageFolderDataset(dataset_dir=DATA_DIR, shuffle=False) @@ -186,10 +186,10 @@ def test_auto_contrast_one_channel_c(plot=False): for idx, (image, _) in enumerate(ds_auto_contrast_c): if idx == 0: - images_auto_contrast_c = image + images_auto_contrast_c = image.asnumpy() else: images_auto_contrast_c = np.append(images_auto_contrast_c, - image, + image.asnumpy(), axis=0) num_samples = images_auto_contrast_c.shape[0] @@ -218,9 +218,9 @@ def test_auto_contrast_mnist_c(plot=False): for _, (data_orig, data_trans) in enumerate(zip(ds_orig, ds_auto_contrast_c)): image_orig, label_orig = data_orig image_trans, _ = data_trans - images.append(image_orig) - labels.append(label_orig) - images_trans.append(image_trans) + images.append(image_orig.asnumpy()) + labels.append(label_orig.asnumpy()) + images_trans.append(image_trans.asnumpy()) # Compare with expected md5 from images filename = "autocontrast_mnist_result_c.npz" diff --git a/tests/ut/python/dataset/test_bounding_box_augment.py b/tests/ut/python/dataset/test_bounding_box_augment.py index a93311dd167..39846344918 100644 --- a/tests/ut/python/dataset/test_bounding_box_augment.py +++ b/tests/ut/python/dataset/test_bounding_box_augment.py @@ -58,7 +58,8 @@ def test_bounding_box_augment_with_rotation_op(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), + dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -96,7 +97,8 @@ def test_bounding_box_augment_with_crop_op(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), + dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -133,7 +135,8 @@ def test_bounding_box_augment_valid_ratio_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), + dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -166,7 +169,8 @@ def test_bounding_box_augment_op_coco_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataCoco1.create_dict_iterator(num_epochs=1), dataCoco2.create_dict_iterator(num_epochs=1)): + for unAug, Aug in zip(dataCoco1.create_dict_iterator(num_epochs=1, output_numpy=True), + dataCoco2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -209,7 +213,8 @@ def test_bounding_box_augment_valid_edge_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), + dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) diff --git a/tests/ut/python/dataset/test_bucket_batch_by_length.py b/tests/ut/python/dataset/test_bucket_batch_by_length.py index bc5993fd212..eb6aca92bf2 100644 --- a/tests/ut/python/dataset/test_bucket_batch_by_length.py +++ b/tests/ut/python/dataset/test_bucket_batch_by_length.py @@ -135,7 +135,7 @@ def test_bucket_batch_multi_bucket_no_padding(): [[1], [5], [9]]] output = [] - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): output.append(data["col1"].tolist()) assert output == expected_output @@ -166,7 +166,7 @@ def test_bucket_batch_multi_bucket_with_padding(): [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]] output = [] - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): output.append(data["col1"].tolist()) assert output == expected_output @@ -187,7 +187,7 @@ def test_bucket_batch_single_bucket_no_padding(): [[5], [6], [7], [8], [9]]] output = [] - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): output.append(data["col1"].tolist()) assert output == expected_output @@ -217,7 +217,7 @@ def test_bucket_batch_single_bucket_with_padding(): [0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0]]] output = [] - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): output.append(data["col1"].tolist()) assert output == expected_output @@ -248,7 +248,7 @@ def test_bucket_batch_pad_to_bucket_boundary(): [0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0]]] output = [] - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): output.append(data["col1"].tolist()) assert output == expected_output @@ -284,7 +284,7 @@ def test_bucket_batch_default_pad(): [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]]] output = [] - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): output.append(data["col1"].tolist()) assert output == expected_output @@ -315,7 +315,7 @@ def test_bucket_batch_drop_remainder(): [[19], [22], [25]]] output = [] - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): output.append(data["col1"].tolist()) assert output == expected_output @@ -345,7 +345,7 @@ def test_bucket_batch_default_length_function(): [0, 1, 2, 3, 4, 5, 6, 7, 8]]] output = [] - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): output.append(data["col1"].tolist()) assert output == expected_output @@ -380,7 +380,7 @@ def test_bucket_batch_multi_column(): same_shape_output = [] variable_shape_output = [] - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): same_shape_output.append(data["same_shape"].tolist()) variable_shape_output.append(data["variable_shape"].tolist()) @@ -419,7 +419,7 @@ def test_bucket_batch_three_columns(): same_shape_output = [] same_shape2_output = [] variable_shape_output = [] - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): same_shape_output.append(data["same_shape"].tolist()) same_shape2_output.append(data["same_shape2"].tolist()) variable_shape_output.append(data["variable_shape"].tolist()) diff --git a/tests/ut/python/dataset/test_c_compose.py b/tests/ut/python/dataset/test_c_compose.py index 4eb851daa43..20a45c6e9b4 100644 --- a/tests/ut/python/dataset/test_c_compose.py +++ b/tests/ut/python/dataset/test_c_compose.py @@ -27,7 +27,7 @@ def test_compose(): data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False) data = data.map(operations=ops.Compose(op_list), input_columns=["col"]) res = [] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(i["col"].tolist()) return res except (TypeError, ValueError) as e: diff --git a/tests/ut/python/dataset/test_c_random_apply.py b/tests/ut/python/dataset/test_c_random_apply.py index 29c64d55af6..97df753ba3d 100644 --- a/tests/ut/python/dataset/test_c_random_apply.py +++ b/tests/ut/python/dataset/test_c_random_apply.py @@ -26,7 +26,7 @@ def test_random_apply(): data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False) data = data.map(operations=ops.RandomApply(op_list, prob), input_columns=["col"]) res = [] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(i["col"].tolist()) return res except (TypeError, ValueError) as e: diff --git a/tests/ut/python/dataset/test_c_random_choice.py b/tests/ut/python/dataset/test_c_random_choice.py index c7db883b86c..335274a1f63 100644 --- a/tests/ut/python/dataset/test_c_random_choice.py +++ b/tests/ut/python/dataset/test_c_random_choice.py @@ -29,7 +29,7 @@ def test_random_choice(): data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False) data = data.map(operations=ops.RandomChoice(op_list), input_columns=["col"]) res = [] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(i["col"].tolist()) return res except (TypeError, ValueError) as e: diff --git a/tests/ut/python/dataset/test_center_crop.py b/tests/ut/python/dataset/test_center_crop.py index fd07934fc93..bd5c83cd374 100644 --- a/tests/ut/python/dataset/test_center_crop.py +++ b/tests/ut/python/dataset/test_center_crop.py @@ -49,7 +49,8 @@ def test_center_crop_op(height=375, width=375, plot=False): image_cropped = [] image = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image_cropped.append(item1["image"].copy()) image.append(item2["image"].copy()) if plot: @@ -99,7 +100,8 @@ def test_center_crop_comp(height=375, width=375, plot=False): image_c_cropped = [] image_py_cropped = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): c_image = item1["image"] py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) # Note: The images aren't exactly the same due to rounding error @@ -132,7 +134,7 @@ def test_crop_grayscale(height=375, width=375): crop_gray = vision.CenterCrop([height, width]) data1 = data1.map(operations=crop_gray, input_columns=["image"]) - for item1 in data1.create_dict_iterator(num_epochs=1): + for item1 in data1.create_dict_iterator(num_epochs=1, output_numpy=True): c_image = item1["image"] # Check that the image is grayscale diff --git a/tests/ut/python/dataset/test_concat.py b/tests/ut/python/dataset/test_concat.py index 1feb04f125b..eff8c7905e3 100644 --- a/tests/ut/python/dataset/test_concat.py +++ b/tests/ut/python/dataset/test_concat.py @@ -50,9 +50,10 @@ def test_concat_01(): data3 = data1 + data2 # Here i refers to index, d refers to data element - for i, d in enumerate(data3): - logger.info("data: %i", d[0][0]) - assert i == d[0][0] + for i, d in enumerate(data3.create_tuple_iterator(output_numpy=True)): + t = d + logger.info("data: %i", t[0][0]) + assert i == t[0][0] assert sum([1 for _ in data3]) == 10 @@ -68,9 +69,10 @@ def test_concat_02(): data3 = data1.concat(data2) # Here i refers to index, d refers to data element - for i, d in enumerate(data3): - logger.info("data: %i", d[0][0]) - assert i == d[0][0] + for i, d in enumerate(data3.create_tuple_iterator(output_numpy=True)): + t = d + logger.info("data: %i", t[0][0]) + assert i == t[0][0] assert sum([1 for _ in data3]) == 10 @@ -145,9 +147,10 @@ def test_concat_06(): dataset = data1 + data2 + data3 # Here i refers to index, d refers to data element - for i, d in enumerate(dataset): - logger.info("data: %i", d[0][0]) - assert i == d[0][0] + for i, d in enumerate(dataset.create_tuple_iterator(output_numpy=True)): + t = d + logger.info("data: %i", t[0][0]) + assert i == t[0][0] assert sum([1 for _ in dataset]) == 20 @@ -165,9 +168,10 @@ def test_concat_07(): data4 = data1 + dataset # Here i refers to index, d refers to data element - for i, d in enumerate(data4): - logger.info("data: %i", d[0][0]) - assert i == d[0][0] + for i, d in enumerate(data4.create_tuple_iterator(output_numpy=True)): + t = d + logger.info("data: %i", t[0][0]) + assert i == t[0][0] assert sum([1 for _ in data4]) == 20 @@ -184,9 +188,10 @@ def test_concat_08(): data3 = data3.repeat(2) # Here i refers to index, d refers to data element - for i, d in enumerate(data3): - logger.info("data: %i", d[0][0]) - assert i % 10 == d[0][0] + for i, d in enumerate(data3.create_tuple_iterator(output_numpy=True)): + t = d + logger.info("data: %i", t[0][0]) + assert i % 10 == t[0][0] assert sum([1 for _ in data3]) == 20 @@ -205,9 +210,10 @@ def test_concat_09(): res = [0, 1, 2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 3, 4, 5, 6, 7, 8, 9] # Here i refers to index, d refers to data element - for i, d in enumerate(data3): - logger.info("data: %i", d[0][0]) - assert res[i] == d[0][0] + for i, d in enumerate(data3.create_tuple_iterator(output_numpy=True)): + t = d + logger.info("data: %i", t[0][0]) + assert res[i] == t[0][0] assert sum([1 for _ in data3]) == 20 @@ -225,9 +231,10 @@ def test_concat_10(): res = [0, 1, 2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9] # Here i refers to index, d refers to data element - for i, d in enumerate(data3): - logger.info("data: %i", d[0][0]) - assert res[i] == d[0][0] + for i, d in enumerate(data3.create_tuple_iterator(output_numpy=True)): + t = d + logger.info("data: %i", t[0][0]) + assert res[i] == t[0][0] assert sum([1 for _ in data3]) == 13 @@ -247,9 +254,10 @@ def test_concat_11(): res = [0, 10, 15, 20] # Here i refers to index, d refers to data element - for i, d in enumerate(data3): - logger.info("data: %i", d[0][0]) - assert res[i] == d[0][0] + for i, d in enumerate(data3.create_tuple_iterator(output_numpy=True)): + t = d + logger.info("data: %i", t[0][0]) + assert res[i] == t[0][0] assert sum([1 for _ in data3]) == 3 @@ -270,9 +278,10 @@ def test_concat_12(): data3 = data3.shuffle(buffer_size=10) # Here i refers to index, d refers to data element - for i, d in enumerate(data3): - logger.info("data: %i", d[0][0]) - assert res[i] == d[0][0] + for i, d in enumerate(data3.create_tuple_iterator(output_numpy=True)): + t = d + logger.info("data: %i", t[0][0]) + assert res[i] == t[0][0] assert sum([1 for _ in data3]) == 10 @@ -297,9 +306,10 @@ def test_concat_13(): data3 = data3.shuffle(buffer_size=int(data3.get_dataset_size())) # Here i refers to index, d refers to data element - for i, d in enumerate(data3): - logger.info("data: %i", d[0][0]) - assert res[i] == d[0][0] + for i, d in enumerate(data3.create_tuple_iterator(output_numpy=True)): + t = d + logger.info("data: %i", t[0][0]) + assert res[i] == t[0][0] assert sum([1 for _ in data3]) == 3 @@ -324,11 +334,11 @@ def test_concat_14(): data3 = data1 + data2 expected, output = [], [] - for d in data1: + for d in data1.create_tuple_iterator(output_numpy=True): expected.append(d[0]) - for d in data2: + for d in data2.create_tuple_iterator(output_numpy=True): expected.append(d[0]) - for d in data3: + for d in data3.create_tuple_iterator(output_numpy=True): output.append(d[0]) assert len(expected) == len(output) diff --git a/tests/ut/python/dataset/test_concatenate_op.py b/tests/ut/python/dataset/test_concatenate_op.py index 242147ed33a..a99d90df6f5 100644 --- a/tests/ut/python/dataset/test_concatenate_op.py +++ b/tests/ut/python/dataset/test_concatenate_op.py @@ -34,7 +34,7 @@ def test_concatenate_op_all(): data = data.map(operations=concatenate_op, input_columns=["col"]) expected = np.array([1.4, 2., 3., 4., 4.5, 5., 6., 7., 8., 9., 10.3, 11., 12.]) - for data_row in data: + for data_row in data.create_tuple_iterator(output_numpy=True): np.testing.assert_array_equal(data_row[0], expected) @@ -46,7 +46,7 @@ def test_concatenate_op_none(): concatenate_op = data_trans.Concatenate() data = data.map(operations=concatenate_op, input_columns=["col"]) - for data_row in data: + for data_row in data.create_tuple_iterator(output_numpy=True): np.testing.assert_array_equal(data_row[0], np.array([5., 6., 7., 8.], dtype=np.float)) @@ -61,7 +61,7 @@ def test_concatenate_op_string(): data = data.map(operations=concatenate_op, input_columns=["col"]) expected = np.array(["dw", "df", "ss", "ad", "dwsdf", "df"], dtype='S') - for data_row in data: + for data_row in data.create_tuple_iterator(output_numpy=True): np.testing.assert_array_equal(data_row[0], expected) @@ -77,7 +77,7 @@ def test_concatenate_op_multi_input_string(): data = data.map(operations=concatenate_op, input_columns=["col1", "col2"], column_order=["out1"], output_columns=["out1"]) expected = np.array(["dw", "df", "1", "2", "d", "3", "4", "e", "dwsdf", "df"], dtype='S') - for data_row in data: + for data_row in data.create_tuple_iterator(output_numpy=True): np.testing.assert_array_equal(data_row[0], expected) @@ -92,7 +92,7 @@ def test_concatenate_op_multi_input_numeric(): data = data.map(operations=concatenate_op, input_columns=["col1", "col2"], column_order=["out1"], output_columns=["out1"]) expected = np.array([3, 5, 1, 2, 3, 4]) - for data_row in data: + for data_row in data.create_tuple_iterator(output_numpy=True): np.testing.assert_array_equal(data_row[0], expected) @@ -158,7 +158,7 @@ def test_concatenate_op_negative_axis(): data = data.map(operations=concatenate_op, input_columns=["col"]) expected = np.array([1.4, 2., 3., 4., 4.5, 5., 6., 7., 8., 9., 10.3, 11., 12.]) - for data_row in data: + for data_row in data.create_tuple_iterator(output_numpy=True): np.testing.assert_array_equal(data_row[0], expected) diff --git a/tests/ut/python/dataset/test_config.py b/tests/ut/python/dataset/test_config.py index efec569f669..4f3c087372b 100644 --- a/tests/ut/python/dataset/test_config.py +++ b/tests/ut/python/dataset/test_config.py @@ -288,7 +288,7 @@ def test_deterministic_python_seed(): data1 = data1.map(operations=transform, input_columns=["image"]) data1_output = [] # config.set_seed() calls random.seed() - for data_one in data1.create_dict_iterator(num_epochs=1): + for data_one in data1.create_dict_iterator(num_epochs=1, output_numpy=True): data1_output.append(data_one["image"]) # Second dataset @@ -298,7 +298,7 @@ def test_deterministic_python_seed(): ds.config.set_seed(0) data2_output = [] - for data_two in data2.create_dict_iterator(num_epochs=1): + for data_two in data2.create_dict_iterator(num_epochs=1, output_numpy=True): data2_output.append(data_two["image"]) np.testing.assert_equal(data1_output, data2_output) @@ -331,7 +331,7 @@ def test_deterministic_python_seed_multi_thread(): data1 = data1.map(operations=transform, input_columns=["image"], python_multiprocessing=True) data1_output = [] # config.set_seed() calls random.seed() - for data_one in data1.create_dict_iterator(num_epochs=1): + for data_one in data1.create_dict_iterator(num_epochs=1, output_numpy=True): data1_output.append(data_one["image"]) # Second dataset @@ -342,7 +342,7 @@ def test_deterministic_python_seed_multi_thread(): ds.config.set_seed(0) data2_output = [] - for data_two in data2.create_dict_iterator(num_epochs=1): + for data_two in data2.create_dict_iterator(num_epochs=1, output_numpy=True): data2_output.append(data_two["image"]) try: diff --git a/tests/ut/python/dataset/test_cut_out.py b/tests/ut/python/dataset/test_cut_out.py index c56c5c0f5ac..2c27ebc0e81 100644 --- a/tests/ut/python/dataset/test_cut_out.py +++ b/tests/ut/python/dataset/test_cut_out.py @@ -61,7 +61,8 @@ def test_cut_out_op(plot=False): data2 = data2.map(operations=transforms_2, input_columns=["image"]) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): num_iter += 1 image_1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) # C image doesn't require transpose @@ -108,7 +109,8 @@ def test_cut_out_op_multicut(plot=False): num_iter = 0 image_list_1, image_list_2 = [], [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): num_iter += 1 image_1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) # C image doesn't require transpose @@ -189,7 +191,8 @@ def test_cut_out_comp(plot=False): num_iter = 0 image_list_1, image_list_2 = [], [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): num_iter += 1 image_1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) # C image doesn't require transpose diff --git a/tests/ut/python/dataset/test_cutmix_batch_op.py b/tests/ut/python/dataset/test_cutmix_batch_op.py index 8fc6a78f527..02a2174d487 100644 --- a/tests/ut/python/dataset/test_cutmix_batch_op.py +++ b/tests/ut/python/dataset/test_cutmix_batch_op.py @@ -44,9 +44,9 @@ def test_cutmix_batch_success1(plot=False): images_original = None for idx, (image, _) in enumerate(ds_original): if idx == 0: - images_original = image + images_original = image.asnumpy() else: - images_original = np.append(images_original, image, axis=0) + images_original = np.append(images_original, image.asnumpy(), axis=0) # CutMix Images data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) @@ -61,9 +61,9 @@ def test_cutmix_batch_success1(plot=False): images_cutmix = None for idx, (image, _) in enumerate(data1): if idx == 0: - images_cutmix = image.transpose(0, 2, 3, 1) + images_cutmix = image.asnumpy().transpose(0, 2, 3, 1) else: - images_cutmix = np.append(images_cutmix, image.transpose(0, 2, 3, 1), axis=0) + images_cutmix = np.append(images_cutmix, image.asnumpy().transpose(0, 2, 3, 1), axis=0) if plot: visualize_list(images_original, images_cutmix) @@ -87,9 +87,9 @@ def test_cutmix_batch_success2(plot=False): images_original = None for idx, (image, _) in enumerate(ds_original): if idx == 0: - images_original = image + images_original = image.asnumpy() else: - images_original = np.append(images_original, image, axis=0) + images_original = np.append(images_original, image.asnumpy(), axis=0) # CutMix Images data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) @@ -104,9 +104,9 @@ def test_cutmix_batch_success2(plot=False): images_cutmix = None for idx, (image, _) in enumerate(data1): if idx == 0: - images_cutmix = image + images_cutmix = image.asnumpy() else: - images_cutmix = np.append(images_cutmix, image, axis=0) + images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0) if plot: visualize_list(images_original, images_cutmix) @@ -131,9 +131,9 @@ def test_cutmix_batch_success3(plot=False): images_original = None for idx, (image, _) in enumerate(ds_original): if idx == 0: - images_original = image + images_original = image.asnumpy() else: - images_original = np.append(images_original, image, axis=0) + images_original = np.append(images_original, image.asnumpy(), axis=0) # CutMix Images data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) @@ -151,9 +151,9 @@ def test_cutmix_batch_success3(plot=False): images_cutmix = None for idx, (image, _) in enumerate(data1): if idx == 0: - images_cutmix = image + images_cutmix = image.asnumpy() else: - images_cutmix = np.append(images_cutmix, image, axis=0) + images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0) if plot: visualize_list(images_original, images_cutmix) @@ -178,9 +178,9 @@ def test_cutmix_batch_success4(plot=False): images_original = None for idx, (image, _) in enumerate(ds_original): if idx == 0: - images_original = image + images_original = image.asnumpy() else: - images_original = np.append(images_original, image, axis=0) + images_original = np.append(images_original, image.asnumpy(), axis=0) # CutMix Images data1 = ds.CelebADataset(dataset_dir=DATA_DIR3, shuffle=False) @@ -198,9 +198,9 @@ def test_cutmix_batch_success4(plot=False): images_cutmix = None for idx, (image, _) in enumerate(data1): if idx == 0: - images_cutmix = image + images_cutmix = image.asnumpy() else: - images_cutmix = np.append(images_cutmix, image, axis=0) + images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0) if plot: visualize_list(images_original, images_cutmix) @@ -279,9 +279,9 @@ def test_cutmix_batch_fail1(): data1 = data1.map(operations=cutmix_batch_op, input_columns=["image", "label"]) for idx, (image, _) in enumerate(data1): if idx == 0: - images_cutmix = image + images_cutmix = image.asnumpy() else: - images_cutmix = np.append(images_cutmix, image, axis=0) + images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0) error_message = "You must make sure images are HWC or CHW and batch " assert error_message in str(error.value) @@ -360,9 +360,9 @@ def test_cutmix_batch_fail5(): images_cutmix = np.array([]) for idx, (image, _) in enumerate(data1): if idx == 0: - images_cutmix = image + images_cutmix = image.asnumpy() else: - images_cutmix = np.append(images_cutmix, image, axis=0) + images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0) error_message = "Both images and labels columns are required" assert error_message in str(error.value) @@ -387,9 +387,9 @@ def test_cutmix_batch_fail6(): images_cutmix = np.array([]) for idx, (image, _) in enumerate(data1): if idx == 0: - images_cutmix = image + images_cutmix = image.asnumpy() else: - images_cutmix = np.append(images_cutmix, image, axis=0) + images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0) error_message = "CutMixBatch: Image doesn't match the given image format." assert error_message in str(error.value) @@ -412,9 +412,9 @@ def test_cutmix_batch_fail7(): images_cutmix = np.array([]) for idx, (image, _) in enumerate(data1): if idx == 0: - images_cutmix = image + images_cutmix = image.asnumpy() else: - images_cutmix = np.append(images_cutmix, image, axis=0) + images_cutmix = np.append(images_cutmix, image.asnumpy(), axis=0) error_message = "CutMixBatch: Wrong labels shape. The second column (labels) must have a shape of NC or NLC" assert error_message in str(error.value) diff --git a/tests/ut/python/dataset/test_dataset_numpy_slices.py b/tests/ut/python/dataset/test_dataset_numpy_slices.py index 861778c6547..d3762d6703e 100644 --- a/tests/ut/python/dataset/test_dataset_numpy_slices.py +++ b/tests/ut/python/dataset/test_dataset_numpy_slices.py @@ -28,7 +28,7 @@ def test_numpy_slices_list_1(): ds = de.NumpySlicesDataset(np_data, shuffle=False) for i, data in enumerate(ds): - assert data[0] == np_data[i] + assert data[0].asnumpy() == np_data[i] def test_numpy_slices_list_2(): @@ -38,7 +38,7 @@ def test_numpy_slices_list_2(): ds = de.NumpySlicesDataset(np_data, column_names=["col1"], shuffle=False) for i, data in enumerate(ds): - assert np.equal(data[0], np_data[i]).all() + assert np.equal(data[0].asnumpy(), np_data[i]).all() def test_numpy_slices_list_3(): @@ -48,7 +48,7 @@ def test_numpy_slices_list_3(): ds = de.NumpySlicesDataset(np_data, column_names=["col1"], shuffle=False) for i, data in enumerate(ds): - assert np.equal(data[0], np_data[i]).all() + assert np.equal(data[0].asnumpy(), np_data[i]).all() def test_numpy_slices_list_append(): @@ -62,12 +62,12 @@ def test_numpy_slices_list_append(): data1 = data1.map(operations=[vision.Decode(True), resize_op], input_columns=["image"]) res = [] - for data in data1.create_dict_iterator(num_epochs=1): + for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(data["image"]) ds = de.NumpySlicesDataset(res, column_names=["col1"], shuffle=False) - for i, data in enumerate(ds): + for i, data in enumerate(ds.create_tuple_iterator(output_numpy=True)): assert np.equal(data, res[i]).all() @@ -79,8 +79,8 @@ def test_numpy_slices_dict_1(): res = [[1, 3], [2, 4]] for i, data in enumerate(ds): - assert data[0] == res[i][0] - assert data[1] == res[i][1] + assert data[0].asnumpy() == res[i][0] + assert data[1].asnumpy() == res[i][1] def test_numpy_slices_tuple_1(): @@ -89,7 +89,7 @@ def test_numpy_slices_tuple_1(): np_data = [([1, 2], [3, 4]), ([11, 12], [13, 14]), ([21, 22], [23, 24])] ds = de.NumpySlicesDataset(np_data, shuffle=False) - for i, data in enumerate(ds): + for i, data in enumerate(ds.create_tuple_iterator(output_numpy=True)): assert np.equal(data, np_data[i]).all() assert sum([1 for _ in ds]) == 3 @@ -102,7 +102,7 @@ def test_numpy_slices_tuple_2(): expected = [[1, 3, 5], [2, 4, 6]] ds = de.NumpySlicesDataset(np_data, shuffle=False) - for i, data in enumerate(ds): + for i, data in enumerate(ds.create_tuple_iterator(output_numpy=True)): assert np.equal(data, expected[i]).all() assert sum([1 for _ in ds]) == 2 @@ -116,8 +116,8 @@ def test_numpy_slices_tuple_3(): ds = de.NumpySlicesDataset(data, column_names=["col1", "col2"], shuffle=False) for i, data in enumerate(ds): - assert np.equal(data[0], features[i]).all() - assert data[1] == labels[i] + assert np.equal(data[0].asnumpy(), features[i]).all() + assert data[1].asnumpy() == labels[i] def test_numpy_slices_csv_value(): @@ -132,8 +132,8 @@ def test_numpy_slices_csv_value(): ds = de.NumpySlicesDataset(np_data, column_names=["col1", "col2"], shuffle=False) for i, data in enumerate(ds): - assert np.equal(np_data[0][i], data[0]).all() - assert np.equal(np_data[1][i], data[1]).all() + assert np.equal(np_data[0][i], data[0].asnumpy()).all() + assert np.equal(np_data[1][i], data[1].asnumpy()).all() def test_numpy_slices_csv_dict(): @@ -146,7 +146,7 @@ def test_numpy_slices_csv_dict(): ds = de.NumpySlicesDataset(dict(df), shuffle=False) - for i, data in enumerate(ds): + for i, data in enumerate(ds.create_tuple_iterator(output_numpy=True)): assert np.equal(data, res[i]).all() @@ -157,7 +157,7 @@ def test_numpy_slices_num_samplers(): ds = de.NumpySlicesDataset(np_data, shuffle=False, num_samples=2) for i, data in enumerate(ds): - assert np.equal(data[0], np_data[i]).all() + assert np.equal(data[0].asnumpy(), np_data[i]).all() assert sum([1 for _ in ds]) == 2 @@ -169,7 +169,7 @@ def test_numpy_slices_distributed_sampler(): ds = de.NumpySlicesDataset(np_data, shuffle=False, shard_id=0, num_shards=4) for i, data in enumerate(ds): - assert np.equal(data[0], np_data[i * 4]).all() + assert np.equal(data[0].asnumpy(), np_data[i * 4]).all() assert sum([1 for _ in ds]) == 2 @@ -200,7 +200,7 @@ def test_numpy_slices_sequential_sampler(): ds = de.NumpySlicesDataset(np_data, sampler=de.SequentialSampler()).repeat(2) for i, data in enumerate(ds): - assert np.equal(data[0], np_data[i % 8]).all() + assert np.equal(data[0].asnumpy(), np_data[i % 8]).all() def test_numpy_slices_invalid_column_names_type(): diff --git a/tests/ut/python/dataset/test_datasets_celeba.py b/tests/ut/python/dataset/test_datasets_celeba.py index 889e18cec3b..329ff8888a9 100644 --- a/tests/ut/python/dataset/test_datasets_celeba.py +++ b/tests/ut/python/dataset/test_datasets_celeba.py @@ -27,7 +27,7 @@ def test_celeba_dataset_label(): [0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1]] count = 0 - for item in data.create_dict_iterator(num_epochs=1): + for item in data.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("----------image--------") logger.info(item["image"]) logger.info("----------attr--------") @@ -63,7 +63,7 @@ def test_celeba_dataset_ext(): expect_labels = [0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1], count = 0 - for item in data.create_dict_iterator(num_epochs=1): + for item in data.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("----------image--------") logger.info(item["image"]) logger.info("----------attr--------") diff --git a/tests/ut/python/dataset/test_datasets_cifarop.py b/tests/ut/python/dataset/test_datasets_cifarop.py index 258ba634947..24193199b2a 100644 --- a/tests/ut/python/dataset/test_datasets_cifarop.py +++ b/tests/ut/python/dataset/test_datasets_cifarop.py @@ -75,7 +75,7 @@ def test_cifar10_content_check(): images, labels = load_cifar(DATA_DIR_10) num_iter = 0 # in this example, each dictionary has keys "image" and "label" - for i, d in enumerate(data1.create_dict_iterator(num_epochs=1)): + for i, d in enumerate(data1.create_dict_iterator(num_epochs=1, output_numpy=True)): np.testing.assert_array_equal(d["image"], images[i]) np.testing.assert_array_equal(d["label"], labels[i]) num_iter += 1 @@ -153,7 +153,7 @@ def test_cifar10_pk_sampler(): data = ds.Cifar10Dataset(DATA_DIR_10, sampler=sampler) num_iter = 0 label_list = [] - for item in data.create_dict_iterator(num_epochs=1): + for item in data.create_dict_iterator(num_epochs=1, output_numpy=True): label_list.append(item["label"]) num_iter += 1 np.testing.assert_array_equal(golden, label_list) @@ -170,7 +170,8 @@ def test_cifar10_sequential_sampler(): data1 = ds.Cifar10Dataset(DATA_DIR_10, sampler=sampler) data2 = ds.Cifar10Dataset(DATA_DIR_10, shuffle=False, num_samples=num_samples) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): np.testing.assert_equal(item1["label"], item2["label"]) num_iter += 1 assert num_iter == num_samples @@ -225,7 +226,7 @@ def test_cifar10_visualize(plot=False): data1 = ds.Cifar10Dataset(DATA_DIR_10, num_samples=10, shuffle=False) num_iter = 0 image_list, label_list = [], [] - for item in data1.create_dict_iterator(num_epochs=1): + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): image = item["image"] label = item["label"] image_list.append(image) @@ -251,7 +252,7 @@ def test_cifar100_content_check(): images, labels = load_cifar(DATA_DIR_100, kind="cifar100") num_iter = 0 # in this example, each dictionary has keys "image", "coarse_label" and "fine_image" - for i, d in enumerate(data1.create_dict_iterator(num_epochs=1)): + for i, d in enumerate(data1.create_dict_iterator(num_epochs=1, output_numpy=True)): np.testing.assert_array_equal(d["image"], images[i]) np.testing.assert_array_equal(d["coarse_label"], labels[i][0]) np.testing.assert_array_equal(d["fine_label"], labels[i][1]) @@ -319,7 +320,7 @@ def test_cifar100_pk_sampler(): data = ds.Cifar100Dataset(DATA_DIR_100, sampler=sampler) num_iter = 0 label_list = [] - for item in data.create_dict_iterator(num_epochs=1): + for item in data.create_dict_iterator(num_epochs=1, output_numpy=True): label_list.append(item["coarse_label"]) num_iter += 1 np.testing.assert_array_equal(golden, label_list) @@ -375,7 +376,7 @@ def test_cifar100_visualize(plot=False): data1 = ds.Cifar100Dataset(DATA_DIR_100, num_samples=10, shuffle=False) num_iter = 0 image_list, label_list = [], [] - for item in data1.create_dict_iterator(num_epochs=1): + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): image = item["image"] coarse_label = item["coarse_label"] fine_label = item["fine_label"] diff --git a/tests/ut/python/dataset/test_datasets_clue.py b/tests/ut/python/dataset/test_datasets_clue.py index d9f33041d49..faf92cbf965 100644 --- a/tests/ut/python/dataset/test_datasets_clue.py +++ b/tests/ut/python/dataset/test_datasets_clue.py @@ -26,7 +26,7 @@ def test_clue(): data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train', shuffle=False) data = data.repeat(2) data = data.skip(3) - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.append({ 'label': d['label'].item().decode("utf8"), 'sentence1': d['sentence1'].item().decode("utf8"), @@ -43,7 +43,7 @@ def test_clue_num_shards(): buffer = [] data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train', num_shards=3, shard_id=1) - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.append({ 'label': d['label'].item().decode("utf8"), 'sentence1': d['sentence1'].item().decode("utf8"), @@ -60,7 +60,7 @@ def test_clue_num_samples(): data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train', num_samples=2) count = 0 - for _ in data.create_dict_iterator(num_epochs=1): + for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True): count += 1 assert count == 2 @@ -87,7 +87,7 @@ def test_clue_afqmc(): # train buffer = [] data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train', shuffle=False) - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.append({ 'label': d['label'].item().decode("utf8"), 'sentence1': d['sentence1'].item().decode("utf8"), @@ -98,7 +98,7 @@ def test_clue_afqmc(): # test buffer = [] data = ds.CLUEDataset(TEST_FILE, task='AFQMC', usage='test', shuffle=False) - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.append({ 'id': d['id'], 'sentence1': d['sentence1'].item().decode("utf8"), @@ -109,7 +109,7 @@ def test_clue_afqmc(): # evaluation buffer = [] data = ds.CLUEDataset(EVAL_FILE, task='AFQMC', usage='eval', shuffle=False) - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.append({ 'label': d['label'].item().decode("utf8"), 'sentence1': d['sentence1'].item().decode("utf8"), @@ -129,7 +129,7 @@ def test_clue_cmnli(): # train buffer = [] data = ds.CLUEDataset(TRAIN_FILE, task='CMNLI', usage='train', shuffle=False) - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.append({ 'label': d['label'].item().decode("utf8"), 'sentence1': d['sentence1'].item().decode("utf8"), @@ -140,7 +140,7 @@ def test_clue_cmnli(): # test buffer = [] data = ds.CLUEDataset(TEST_FILE, task='CMNLI', usage='test', shuffle=False) - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.append({ 'id': d['id'], 'sentence1': d['sentence1'], @@ -151,7 +151,7 @@ def test_clue_cmnli(): # eval buffer = [] data = ds.CLUEDataset(EVAL_FILE, task='CMNLI', usage='eval', shuffle=False) - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.append({ 'label': d['label'], 'sentence1': d['sentence1'], @@ -171,7 +171,7 @@ def test_clue_csl(): # train buffer = [] data = ds.CLUEDataset(TRAIN_FILE, task='CSL', usage='train', shuffle=False) - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.append({ 'id': d['id'], 'abst': d['abst'].item().decode("utf8"), @@ -183,7 +183,7 @@ def test_clue_csl(): # test buffer = [] data = ds.CLUEDataset(TEST_FILE, task='CSL', usage='test', shuffle=False) - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.append({ 'id': d['id'], 'abst': d['abst'].item().decode("utf8"), @@ -194,7 +194,7 @@ def test_clue_csl(): # eval buffer = [] data = ds.CLUEDataset(EVAL_FILE, task='CSL', usage='eval', shuffle=False) - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.append({ 'id': d['id'], 'abst': d['abst'].item().decode("utf8"), @@ -215,7 +215,7 @@ def test_clue_iflytek(): # train buffer = [] data = ds.CLUEDataset(TRAIN_FILE, task='IFLYTEK', usage='train', shuffle=False) - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.append({ 'label': d['label'].item().decode("utf8"), 'label_des': d['label_des'].item().decode("utf8"), @@ -226,7 +226,7 @@ def test_clue_iflytek(): # test buffer = [] data = ds.CLUEDataset(TEST_FILE, task='IFLYTEK', usage='test', shuffle=False) - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.append({ 'id': d['id'], 'sentence': d['sentence'].item().decode("utf8") @@ -236,7 +236,7 @@ def test_clue_iflytek(): # eval buffer = [] data = ds.CLUEDataset(EVAL_FILE, task='IFLYTEK', usage='eval', shuffle=False) - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.append({ 'label': d['label'].item().decode("utf8"), 'label_des': d['label_des'].item().decode("utf8"), @@ -256,7 +256,7 @@ def test_clue_tnews(): # train buffer = [] data = ds.CLUEDataset(TRAIN_FILE, task='TNEWS', usage='train', shuffle=False) - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.append({ 'label': d['label'].item().decode("utf8"), 'label_desc': d['label_desc'].item().decode("utf8"), @@ -269,7 +269,7 @@ def test_clue_tnews(): # test buffer = [] data = ds.CLUEDataset(TEST_FILE, task='TNEWS', usage='test', shuffle=False) - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.append({ 'id': d['id'], 'sentence': d['sentence'].item().decode("utf8"), @@ -281,7 +281,7 @@ def test_clue_tnews(): # eval buffer = [] data = ds.CLUEDataset(EVAL_FILE, task='TNEWS', usage='eval', shuffle=False) - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.append({ 'label': d['label'].item().decode("utf8"), 'label_desc': d['label_desc'].item().decode("utf8"), @@ -303,7 +303,7 @@ def test_clue_wsc(): # train buffer = [] data = ds.CLUEDataset(TRAIN_FILE, task='WSC', usage='train') - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.append({ 'span1_index': d['span1_index'], 'span2_index': d['span2_index'], @@ -318,7 +318,7 @@ def test_clue_wsc(): # test buffer = [] data = ds.CLUEDataset(TEST_FILE, task='WSC', usage='test') - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.append({ 'span1_index': d['span1_index'], 'span2_index': d['span2_index'], @@ -332,7 +332,7 @@ def test_clue_wsc(): # eval buffer = [] data = ds.CLUEDataset(EVAL_FILE, task='WSC', usage='eval') - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.append({ 'span1_index': d['span1_index'], 'span2_index': d['span2_index'], diff --git a/tests/ut/python/dataset/test_datasets_coco.py b/tests/ut/python/dataset/test_datasets_coco.py index f91a1f23390..0f7dccfe301 100644 --- a/tests/ut/python/dataset/test_datasets_coco.py +++ b/tests/ut/python/dataset/test_datasets_coco.py @@ -33,7 +33,7 @@ def test_coco_detection(): image_shape = [] bbox = [] category_id = [] - for data in data1.create_dict_iterator(num_epochs=1): + for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True): image_shape.append(data["image"].shape) bbox.append(data["bbox"]) category_id.append(data["category_id"]) @@ -66,7 +66,7 @@ def test_coco_stuff(): image_shape = [] segmentation = [] iscrowd = [] - for data in data1.create_dict_iterator(num_epochs=1): + for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True): image_shape.append(data["image"].shape) segmentation.append(data["segmentation"]) iscrowd.append(data["iscrowd"]) @@ -107,7 +107,7 @@ def test_coco_keypoint(): image_shape = [] keypoints = [] num_keypoints = [] - for data in data1.create_dict_iterator(num_epochs=1): + for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True): image_shape.append(data["image"].shape) keypoints.append(data["keypoints"]) num_keypoints.append(data["num_keypoints"]) @@ -136,7 +136,7 @@ def test_coco_panoptic(): category_id = [] iscrowd = [] area = [] - for data in data1.create_dict_iterator(num_epochs=1): + for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True): image_shape.append(data["image"].shape) bbox.append(data["bbox"]) category_id.append(data["category_id"]) diff --git a/tests/ut/python/dataset/test_datasets_csv.py b/tests/ut/python/dataset/test_datasets_csv.py index ca2eb8d1338..600885c3ec0 100644 --- a/tests/ut/python/dataset/test_datasets_csv.py +++ b/tests/ut/python/dataset/test_datasets_csv.py @@ -33,7 +33,7 @@ def test_csv_dataset_basic(): shuffle=False) data = data.repeat(2) data = data.skip(2) - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.append(d) assert len(buffer) == 4 @@ -45,7 +45,7 @@ def test_csv_dataset_one_file(): column_names=['col1', 'col2', 'col3', 'col4'], shuffle=False) buffer = [] - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.append(d) assert len(buffer) == 3 @@ -58,7 +58,7 @@ def test_csv_dataset_all_file(): column_names=['col1', 'col2', 'col3', 'col4'], shuffle=False) buffer = [] - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.append(d) assert len(buffer) == 10 @@ -70,7 +70,7 @@ def test_csv_dataset_num_samples(): column_names=['col1', 'col2', 'col3', 'col4'], shuffle=False, num_samples=2) count = 0 - for _ in data.create_dict_iterator(num_epochs=1): + for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True): count += 1 assert count == 2 @@ -83,7 +83,7 @@ def test_csv_dataset_distribution(): column_names=['col1', 'col2', 'col3', 'col4'], shuffle=False, num_shards=2, shard_id=0) count = 0 - for _ in data.create_dict_iterator(num_epochs=1): + for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True): count += 1 assert count == 2 @@ -96,7 +96,7 @@ def test_csv_dataset_quoted(): column_names=['col1', 'col2', 'col3', 'col4'], shuffle=False) buffer = [] - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.extend([d['col1'].item().decode("utf8"), d['col2'].item().decode("utf8"), d['col3'].item().decode("utf8"), @@ -113,7 +113,7 @@ def test_csv_dataset_separated(): column_names=['col1', 'col2', 'col3', 'col4'], shuffle=False) buffer = [] - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.extend([d['col1'].item().decode("utf8"), d['col2'].item().decode("utf8"), d['col3'].item().decode("utf8"), @@ -129,7 +129,7 @@ def test_csv_dataset_embedded(): column_names=['col1', 'col2', 'col3', 'col4'], shuffle=False) buffer = [] - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.extend([d['col1'].item().decode("utf8"), d['col2'].item().decode("utf8"), d['col3'].item().decode("utf8"), @@ -145,7 +145,7 @@ def test_csv_dataset_chinese(): column_names=['col1', 'col2', 'col3', 'col4', 'col5'], shuffle=False) buffer = [] - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.extend([d['col1'].item().decode("utf8"), d['col2'].item().decode("utf8"), d['col3'].item().decode("utf8"), @@ -161,7 +161,7 @@ def test_csv_dataset_header(): column_defaults=["", "", "", ""], shuffle=False) buffer = [] - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.extend([d['col1'].item().decode("utf8"), d['col2'].item().decode("utf8"), d['col3'].item().decode("utf8"), @@ -177,7 +177,7 @@ def test_csv_dataset_number(): column_names=['col1', 'col2', 'col3', 'col4'], shuffle=False) buffer = [] - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): buffer.extend([d['col1'].item(), d['col2'].item(), d['col3'].item(), @@ -203,7 +203,7 @@ def test_csv_dataset_exception(): column_names=['col1', 'col2', 'col3', 'col4'], shuffle=False) with pytest.raises(Exception) as err: - for _ in data.create_dict_iterator(num_epochs=1): + for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True): pass assert "Failed to parse file" in str(err.value) @@ -216,7 +216,7 @@ def test_csv_dataset_type_error(): column_names=['col1', 'col2', 'col3', 'col4'], shuffle=False) with pytest.raises(Exception) as err: - for _ in data.create_dict_iterator(num_epochs=1): + for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True): pass assert "type does not match" in str(err.value) diff --git a/tests/ut/python/dataset/test_datasets_generator.py b/tests/ut/python/dataset/test_datasets_generator.py index ed2543094a6..2c4342beba9 100644 --- a/tests/ut/python/dataset/test_datasets_generator.py +++ b/tests/ut/python/dataset/test_datasets_generator.py @@ -47,7 +47,7 @@ def test_generator_0(): data1 = ds.GeneratorDataset(generator_1d, ["data"]) i = 0 - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(item["data"], golden) i = i + 1 @@ -69,7 +69,7 @@ def test_generator_1(): data1 = ds.GeneratorDataset(generator_md, ["data"]) i = 0 - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary golden = np.array([[i, i + 1], [i + 2, i + 3]]) np.testing.assert_array_equal(item["data"], golden) i = i + 1 @@ -91,7 +91,7 @@ def test_generator_2(): data1 = ds.GeneratorDataset(generator_mc, ["col0", "col1"]) i = 0 - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(item["col0"], golden) golden = np.array([[i, i + 1], [i + 2, i + 3]]) @@ -111,7 +111,7 @@ def test_generator_3(): data1 = data1.repeat(4) i = 0 - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(item["data"], golden) i = i + 1 @@ -131,7 +131,7 @@ def test_generator_4(): data1 = data1.batch(4) i = 0 - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary golden = np.array([[i], [i + 1], [i + 2], [i + 3]]) np.testing.assert_array_equal(item["data"], golden) i = i + 4 @@ -151,7 +151,7 @@ def type_tester(t): data1 = data1.batch(4) i = 0 - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t) np.testing.assert_array_equal(item["data"], golden) i = i + 4 @@ -178,7 +178,7 @@ def type_tester_with_type_check(t, c): data1 = data1.batch(4) i = 0 - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t) np.testing.assert_array_equal(item["data"], golden) i = i + 4 @@ -213,7 +213,7 @@ def type_tester_with_type_check_2c(t, c): data1 = data1.batch(4) i = 0 - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t) np.testing.assert_array_equal(item["data0"], golden) i = i + 4 @@ -250,7 +250,7 @@ def test_generator_8(): num_parallel_workers=2) i = 0 - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary golden = np.array([i * 3]) np.testing.assert_array_equal(item["out0"], golden) golden = np.array([[i * 7, (i + 1) * 7], [(i + 2) * 7, (i + 3) * 7]]) @@ -280,14 +280,14 @@ def test_generator_9(): i = 0 for data1, data2 in zip(data1, data2): # each data is a dictionary golden = np.array([i]) - np.testing.assert_array_equal(data1[0], golden) + np.testing.assert_array_equal(data1[0].asnumpy(), golden) golden = np.array([[i * 3, (i + 1) * 3], [(i + 2) * 3, (i + 3) * 3]]) - np.testing.assert_array_equal(data1[1], golden) + np.testing.assert_array_equal(data1[1].asnumpy(), golden) golden = np.array([i * 3]) - np.testing.assert_array_equal(data2[0], golden) + np.testing.assert_array_equal(data2[0].asnumpy(), golden) golden = np.array([[i, i + 1], [i + 2, i + 3]]) - np.testing.assert_array_equal(data2[1], golden) + np.testing.assert_array_equal(data2[1].asnumpy(), golden) i = i + 1 @@ -304,7 +304,7 @@ def test_generator_10(): # Expected column order is |col0|out1|out2| i = 0 - for item in data1.create_tuple_iterator(num_epochs=1): + for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True): golden = np.array([i]) np.testing.assert_array_equal(item[0], golden) golden = np.array([[i, i + 1], [i + 2, i + 3]]) @@ -328,7 +328,7 @@ def test_generator_11(): # Expected column order is |out1|out2| i = 0 - for item in data1.create_tuple_iterator(num_epochs=1): + for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True): # len should be 2 because col0 is dropped (not included in column_order) assert len(item) == 2 golden = np.array([[i, i + 1], [i + 2, i + 3]]) @@ -350,7 +350,7 @@ def test_generator_12(): # Expected column order is |col0|col1| i = 0 - for item in data1.create_tuple_iterator(num_epochs=1): + for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True): assert len(item) == 2 golden = np.array([i * 5]) np.testing.assert_array_equal(item[0], golden) @@ -363,7 +363,7 @@ def test_generator_12(): # Expected column order is |col0|col1| i = 0 - for item in data1.create_tuple_iterator(num_epochs=1): + for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True): assert len(item) == 2 golden = np.array([i * 5]) np.testing.assert_array_equal(item[1], golden) @@ -384,7 +384,7 @@ def test_generator_13(): # Expected column order is |out0|col1| i = 0 - for item in data1.create_tuple_iterator(num_epochs=1): + for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True): assert len(item) == 2 golden = np.array([i * 5]) np.testing.assert_array_equal(item[0], golden) @@ -392,7 +392,7 @@ def test_generator_13(): np.testing.assert_array_equal(item[1], golden) i = i + 1 - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary # len should be 2 because col0 is dropped (not included in column_order) assert len(item) == 2 golden = np.array([i * 5]) @@ -411,7 +411,7 @@ def test_generator_14(): source = [(np.array([x]),) for x in range(256)] ds1 = ds.GeneratorDataset(source, ["data"], sampler=ds.SequentialSampler(), num_parallel_workers=4).repeat(2) i = 0 - for data in ds1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for data in ds1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(data["data"], golden) i = i + 1 @@ -429,7 +429,7 @@ def test_generator_15(): source = [(np.array([x]),) for x in range(256)] ds1 = ds.GeneratorDataset(source, ["data"], sampler=sampler, num_parallel_workers=4).repeat(2) i = 0 - for data in ds1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for data in ds1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(data["data"], golden) i = i + 1 @@ -448,7 +448,7 @@ def test_generator_16(): data1 = ds.GeneratorDataset(source, ["col0", "col1"], sampler=ds.SequentialSampler()) i = 0 - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(item["col0"], golden) golden = np.array([i + 1]) @@ -468,7 +468,7 @@ def test_generator_17(): data1 = ds.GeneratorDataset(source, ["col0", "col1"], sampler=sampler) i = 0 - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(item["col0"], golden) golden = np.array([i + 1]) @@ -528,7 +528,7 @@ def test_generator_sequential_sampler(): source = [(np.array([x]),) for x in range(64)] ds1 = ds.GeneratorDataset(source, ["data"], sampler=ds.SequentialSampler()) i = 0 - for data in ds1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for data in ds1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(data["data"], golden) i = i + 1 @@ -546,7 +546,7 @@ def test_generator_distributed_sampler(): for sid in range(8): ds1 = ds.GeneratorDataset(source, ["data"], shuffle=False, num_shards=8, shard_id=sid) i = sid - for data in ds1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for data in ds1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(data["data"], golden) i = i + 8 @@ -605,7 +605,7 @@ def type_tester_with_type_check_2c_schema(t, c): data1 = data1.batch(4) i = 0 - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t) np.testing.assert_array_equal(item["data0"], golden) i = i + 4 @@ -636,7 +636,7 @@ def test_generator_dataset_size_0(): data_size = data1.get_dataset_size() num_rows = 0 - for _ in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary num_rows = num_rows + 1 assert data_size == num_rows diff --git a/tests/ut/python/dataset/test_datasets_imagefolder.py b/tests/ut/python/dataset/test_datasets_imagefolder.py index fefa0f16d80..3f0638c647b 100644 --- a/tests/ut/python/dataset/test_datasets_imagefolder.py +++ b/tests/ut/python/dataset/test_datasets_imagefolder.py @@ -171,7 +171,7 @@ def test_imagefolder_classindex(): 333, 333, 333, 333, 333, 333, 333, 333, 333, 333, 333] num_iter = 0 - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" logger.info("image is {}".format(item["image"])) logger.info("label is {}".format(item["label"])) @@ -196,7 +196,7 @@ def test_imagefolder_negative_classindex(): -333, -333, -333, -333, -333, -333, -333, -333, -333, -333, -333] num_iter = 0 - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" logger.info("image is {}".format(item["image"])) logger.info("label is {}".format(item["label"])) @@ -267,7 +267,7 @@ def test_sequential_sampler(): result = [] num_iter = 0 - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary # in this example, each dictionary has keys "image" and "label" result.append(item["label"]) num_iter += 1 diff --git a/tests/ut/python/dataset/test_datasets_manifestop.py b/tests/ut/python/dataset/test_datasets_manifestop.py index d8abcf9c9a7..f056d0edd4a 100644 --- a/tests/ut/python/dataset/test_datasets_manifestop.py +++ b/tests/ut/python/dataset/test_datasets_manifestop.py @@ -26,7 +26,7 @@ def test_manifest_dataset_train(): count = 0 cat_count = 0 dog_count = 0 - for item in data.create_dict_iterator(num_epochs=1): + for item in data.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("item[image] is {}".format(item["image"])) count = count + 1 if item["label"].size == 1 and item["label"] == 0: @@ -41,7 +41,7 @@ def test_manifest_dataset_train(): def test_manifest_dataset_eval(): data = ds.ManifestDataset(DATA_FILE, "eval", decode=True) count = 0 - for item in data.create_dict_iterator(num_epochs=1): + for item in data.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("item[image] is {}".format(item["image"])) count = count + 1 if item["label"] != 0 and item["label"] != 1: @@ -55,7 +55,7 @@ def test_manifest_dataset_class_index(): out_class_indexing = data.get_class_indexing() assert out_class_indexing == {"dog": 11} count = 0 - for item in data.create_dict_iterator(num_epochs=1): + for item in data.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("item[image] is {}".format(item["image"])) count = count + 1 if item["label"] != 11: @@ -81,7 +81,7 @@ def test_manifest_dataset_multi_label(): data = ds.ManifestDataset(DATA_FILE, decode=True, shuffle=False) count = 0 expect_label = [1, 0, 0, [0, 2]] - for item in data.create_dict_iterator(num_epochs=1): + for item in data.create_dict_iterator(num_epochs=1, output_numpy=True): assert item["label"].tolist() == expect_label[count] logger.info("item[image] is {}".format(item["image"])) count = count + 1 @@ -107,7 +107,7 @@ def test_manifest_dataset_multi_label_onehot(): data = data.map(operations=multi_label_hot, input_columns=["label"]) data = data.batch(2) count = 0 - for item in data.create_dict_iterator(num_epochs=1): + for item in data.create_dict_iterator(num_epochs=1, output_numpy=True): assert item["label"].tolist() == expect_label[count] logger.info("item[image] is {}".format(item["image"])) count = count + 1 diff --git a/tests/ut/python/dataset/test_datasets_mnist.py b/tests/ut/python/dataset/test_datasets_mnist.py index 2e227430a39..4e2c48a3449 100644 --- a/tests/ut/python/dataset/test_datasets_mnist.py +++ b/tests/ut/python/dataset/test_datasets_mnist.py @@ -64,7 +64,7 @@ def test_mnist_content_check(): num_iter = 0 # in this example, each dictionary has keys "image" and "label" image_list, label_list = [], [] - for i, data in enumerate(data1.create_dict_iterator(num_epochs=1)): + for i, data in enumerate(data1.create_dict_iterator(num_epochs=1, output_numpy=True)): image_list.append(data["image"]) label_list.append("label {}".format(data["label"])) np.testing.assert_array_equal(data["image"], images[i]) @@ -137,7 +137,7 @@ def test_mnist_pk_sampler(): data = ds.MnistDataset(DATA_DIR, sampler=sampler) num_iter = 0 label_list = [] - for item in data.create_dict_iterator(num_epochs=1): + for item in data.create_dict_iterator(num_epochs=1, output_numpy=True): label_list.append(item["label"]) num_iter += 1 np.testing.assert_array_equal(golden, label_list) @@ -156,8 +156,8 @@ def test_mnist_sequential_sampler(): label_list1, label_list2 = [], [] num_iter = 0 for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): - label_list1.append(item1["label"]) - label_list2.append(item2["label"]) + label_list1.append(item1["label"].asnumpy()) + label_list2.append(item2["label"].asnumpy()) num_iter += 1 np.testing.assert_array_equal(label_list1, label_list2) assert num_iter == num_samples @@ -214,7 +214,7 @@ def test_mnist_visualize(plot=False): data1 = ds.MnistDataset(DATA_DIR, num_samples=10, shuffle=False) num_iter = 0 image_list, label_list = [], [] - for item in data1.create_dict_iterator(num_epochs=1): + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): image = item["image"] label = item["label"] image_list.append(image) diff --git a/tests/ut/python/dataset/test_datasets_sharding.py b/tests/ut/python/dataset/test_datasets_sharding.py index 9730b91fb00..ea315e4cc35 100644 --- a/tests/ut/python/dataset/test_datasets_sharding.py +++ b/tests/ut/python/dataset/test_datasets_sharding.py @@ -25,7 +25,7 @@ def test_imagefolder_shardings(print_res=False): shuffle=shuffle, class_indexing=class_index, decode=True) data1 = data1.repeat(repeat_cnt) res = [] - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary res.append(item["label"].item()) if print_res: logger.info("labels of dataset: {}".format(res)) @@ -59,7 +59,7 @@ def test_tfrecord_shardings1(print_res=False): shuffle=ds.Shuffle.FILES, num_parallel_workers=1) data1 = data1.repeat(repeat_cnt) res = [] - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary res.append(item["scalars"][0]) if print_res: logger.info("scalars of dataset: {}".format(res)) @@ -97,7 +97,7 @@ def test_tfrecord_shardings4(print_res=False): shuffle=ds.Shuffle.FILES, num_parallel_workers=4) data1 = data1.repeat(repeat_cnt) res = [] - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary res.append(item["scalars"][0]) if print_res: logger.info("scalars of dataset: {}".format(res)) @@ -141,7 +141,7 @@ def test_manifest_shardings(print_res=False): shuffle=shuffle, decode=True) data1 = data1.repeat(repeat_cnt) res = [] - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary res.append(item["label"].item()) if print_res: logger.info("labels of dataset: {}".format(res)) @@ -166,7 +166,7 @@ def test_voc_shardings(print_res=False): data1 = ds.VOCDataset(voc_dir, decode=True, sampler=sampler) data1 = data1.repeat(repeat_cnt) res = [] - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary res.append(item["image"].shape[0]) if print_res: logger.info("labels of dataset: {}".format(res)) @@ -194,7 +194,7 @@ def test_cifar10_shardings(print_res=False): shuffle=shuffle) data1 = data1.repeat(repeat_cnt) res = [] - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary res.append(item["label"].item()) if print_res: logger.info("labels of dataset: {}".format(res)) @@ -214,7 +214,7 @@ def test_cifar100_shardings(print_res=False): shuffle=shuffle) data1 = data1.repeat(repeat_cnt) res = [] - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary res.append(item["coarse_label"].item()) if print_res: logger.info("labels of dataset: {}".format(res)) @@ -233,7 +233,7 @@ def test_mnist_shardings(print_res=False): shuffle=shuffle) data1 = data1.repeat(repeat_cnt) res = [] - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary res.append(item["label"].item()) if print_res: logger.info("labels of dataset: {}".format(res)) diff --git a/tests/ut/python/dataset/test_datasets_textfileop.py b/tests/ut/python/dataset/test_datasets_textfileop.py index 40f58a5cf5b..74de6363db4 100644 --- a/tests/ut/python/dataset/test_datasets_textfileop.py +++ b/tests/ut/python/dataset/test_datasets_textfileop.py @@ -25,7 +25,7 @@ DATA_ALL_FILE = "../data/dataset/testTextFileDataset/*" def test_textline_dataset_one_file(): data = ds.TextFileDataset(DATA_FILE) count = 0 - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("{}".format(i["text"])) count += 1 assert count == 3 @@ -34,7 +34,7 @@ def test_textline_dataset_one_file(): def test_textline_dataset_all_file(): data = ds.TextFileDataset(DATA_ALL_FILE) count = 0 - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("{}".format(i["text"])) count += 1 assert count == 5 @@ -43,7 +43,7 @@ def test_textline_dataset_all_file(): def test_textline_dataset_num_samples_zero(): data = ds.TextFileDataset(DATA_FILE, num_samples=0) count = 0 - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("{}".format(i["text"])) count += 1 assert count == 3 @@ -56,7 +56,7 @@ def test_textline_dataset_shuffle_false4(): count = 0 line = ["This is a text file.", "Another file.", "Be happy every day.", "End of file.", "Good luck to everyone."] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): strs = i["text"].item().decode("utf8") assert strs == line[count] count += 1 @@ -73,7 +73,7 @@ def test_textline_dataset_shuffle_false1(): count = 0 line = ["This is a text file.", "Be happy every day.", "Good luck to everyone.", "Another file.", "End of file."] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): strs = i["text"].item().decode("utf8") assert strs == line[count] count += 1 @@ -90,7 +90,7 @@ def test_textline_dataset_shuffle_files4(): count = 0 line = ["This is a text file.", "Another file.", "Be happy every day.", "End of file.", "Good luck to everyone."] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): strs = i["text"].item().decode("utf8") assert strs == line[count] count += 1 @@ -107,7 +107,7 @@ def test_textline_dataset_shuffle_files1(): count = 0 line = ["This is a text file.", "Be happy every day.", "Good luck to everyone.", "Another file.", "End of file."] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): strs = i["text"].item().decode("utf8") assert strs == line[count] count += 1 @@ -124,7 +124,7 @@ def test_textline_dataset_shuffle_global4(): count = 0 line = ["Another file.", "Good luck to everyone.", "End of file.", "This is a text file.", "Be happy every day."] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): strs = i["text"].item().decode("utf8") assert strs == line[count] count += 1 @@ -141,7 +141,7 @@ def test_textline_dataset_shuffle_global1(): count = 0 line = ["Another file.", "Good luck to everyone.", "This is a text file.", "End of file.", "Be happy every day."] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): strs = i["text"].item().decode("utf8") assert strs == line[count] count += 1 @@ -154,7 +154,7 @@ def test_textline_dataset_shuffle_global1(): def test_textline_dataset_num_samples(): data = ds.TextFileDataset(DATA_FILE, num_samples=2) count = 0 - for _ in data.create_dict_iterator(num_epochs=1): + for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True): count += 1 assert count == 2 @@ -162,7 +162,7 @@ def test_textline_dataset_num_samples(): def test_textline_dataset_distribution(): data = ds.TextFileDataset(DATA_ALL_FILE, num_shards=2, shard_id=1) count = 0 - for _ in data.create_dict_iterator(num_epochs=1): + for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True): count += 1 assert count == 3 @@ -174,7 +174,7 @@ def test_textline_dataset_repeat(): line = ["This is a text file.", "Be happy every day.", "Good luck to everyone.", "This is a text file.", "Be happy every day.", "Good luck to everyone.", "This is a text file.", "Be happy every day.", "Good luck to everyone."] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): strs = i["text"].item().decode("utf8") assert strs == line[count] count += 1 diff --git a/tests/ut/python/dataset/test_datasets_tfrecord.py b/tests/ut/python/dataset/test_datasets_tfrecord.py index d134c38f0d2..35e13a859fb 100644 --- a/tests/ut/python/dataset/test_datasets_tfrecord.py +++ b/tests/ut/python/dataset/test_datasets_tfrecord.py @@ -39,7 +39,7 @@ def test_tfrecord_shape(): schema_file = "../data/dataset/testTFTestAllTypes/datasetSchemaRank0.json" ds1 = ds.TFRecordDataset(FILES, schema_file) ds1 = ds1.batch(2) - for data in ds1.create_dict_iterator(num_epochs=1): + for data in ds1.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info(data) output_shape = ds1.output_shapes() assert len(output_shape[-1]) == 1 @@ -162,7 +162,7 @@ def test_tfrecord_schema(): for d1, d2 in zip(data1, data2): for t1, t2 in zip(d1, d2): - np.testing.assert_array_equal(t1, t2) + np.testing.assert_array_equal(t1.asnumpy(), t2.asnumpy()) def test_tfrecord_shuffle(): @@ -174,7 +174,7 @@ def test_tfrecord_shuffle(): for d1, d2 in zip(data1, data2): for t1, t2 in zip(d1, d2): - np.testing.assert_array_equal(t1, t2) + np.testing.assert_array_equal(t1.asnumpy(), t2.asnumpy()) def test_tfrecord_shard(): @@ -187,7 +187,7 @@ def test_tfrecord_shard(): shuffle=ds.Shuffle.FILES) data1 = data1.repeat(num_repeats) res = list() - for item in data1.create_dict_iterator(num_epochs=1): + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(item["scalars"][0]) return res @@ -215,7 +215,7 @@ def test_tfrecord_shard_equal_rows(): ds1 = ds.TFRecordDataset(tf_files, num_shards=num_shards, shard_id=shard_id, shard_equal_rows=True) ds1 = ds1.repeat(num_repeats) res = list() - for data in ds1.create_dict_iterator(num_epochs=1): + for data in ds1.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(data["scalars"][0]) return res @@ -238,7 +238,7 @@ def test_tfrecord_shard_equal_rows(): def test_tfrecord_no_schema_columns_list(): logger.info("test_tfrecord_no_schema_columns_list") data = ds.TFRecordDataset(FILES, shuffle=False, columns_list=["col_sint16"]) - row = data.create_dict_iterator(num_epochs=1).__next__() + row = data.create_dict_iterator(num_epochs=1, output_numpy=True).__next__() assert row["col_sint16"] == [-32768] with pytest.raises(KeyError) as info: @@ -258,7 +258,7 @@ def test_tfrecord_schema_columns_list(): schema.add_column('col_sint32', de_type=mstype.int64, shape=[1]) schema.add_column('col_sint64', de_type=mstype.int64, shape=[1]) data = ds.TFRecordDataset(FILES, schema=schema, shuffle=False, columns_list=["col_sint16"]) - row = data.create_dict_iterator(num_epochs=1).__next__() + row = data.create_dict_iterator(num_epochs=1, output_numpy=True).__next__() assert row["col_sint16"] == [-32768] with pytest.raises(KeyError) as info: @@ -275,7 +275,7 @@ def test_tfrecord_invalid_files(): data = ds.TFRecordDataset(files, SCHEMA_FILE, shuffle=ds.Shuffle.FILES) with pytest.raises(RuntimeError) as info: - _ = data.create_dict_iterator(num_epochs=1).get_next() + _ = data.create_dict_iterator(num_epochs=1, output_numpy=True).get_next() assert "cannot be opened" in str(info.value) assert "not valid tfrecord files" in str(info.value) assert valid_file not in str(info.value) diff --git a/tests/ut/python/dataset/test_datasets_voc.py b/tests/ut/python/dataset/test_datasets_voc.py index 972a1ce001f..f2d0f1012e5 100644 --- a/tests/ut/python/dataset/test_datasets_voc.py +++ b/tests/ut/python/dataset/test_datasets_voc.py @@ -23,7 +23,7 @@ TARGET_SHAPE = [680, 680, 680, 680, 642, 607, 561, 596, 612, 680] def test_voc_segmentation(): data1 = ds.VOCDataset(DATA_DIR, task="Segmentation", usage="train", shuffle=False, decode=True) num = 0 - for item in data1.create_dict_iterator(num_epochs=1): + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): assert item["image"].shape[0] == IMAGE_SHAPE[num] assert item["target"].shape[0] == TARGET_SHAPE[num] num += 1 @@ -34,7 +34,7 @@ def test_voc_detection(): data1 = ds.VOCDataset(DATA_DIR, task="Detection", usage="train", shuffle=False, decode=True) num = 0 count = [0, 0, 0, 0, 0, 0] - for item in data1.create_dict_iterator(num_epochs=1): + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): assert item["image"].shape[0] == IMAGE_SHAPE[num] for label in item["label"]: count[label[0]] += 1 @@ -53,7 +53,7 @@ def test_voc_class_index(): assert (class_index2 == {'car': 0, 'cat': 1, 'train': 5}) num = 0 count = [0, 0, 0, 0, 0, 0] - for item in data1.create_dict_iterator(num_epochs=1): + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): for label in item["label"]: count[label[0]] += 1 assert label[0] in (0, 1, 5) @@ -71,7 +71,7 @@ def test_voc_get_class_indexing(): assert (class_index2 == {'car': 0, 'cat': 1, 'chair': 2, 'dog': 3, 'person': 4, 'train': 5}) num = 0 count = [0, 0, 0, 0, 0, 0] - for item in data1.create_dict_iterator(num_epochs=1): + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): for label in item["label"]: count[label[0]] += 1 assert label[0] in (0, 1, 2, 3, 4, 5) diff --git a/tests/ut/python/dataset/test_decode.py b/tests/ut/python/dataset/test_decode.py index 266681e2ca1..0eb9f73e2f7 100644 --- a/tests/ut/python/dataset/test_decode.py +++ b/tests/ut/python/dataset/test_decode.py @@ -40,7 +40,8 @@ def test_decode_op(): # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): actual = item1["image"] expected = cv2.imdecode(item2["image"], cv2.IMREAD_COLOR) expected = cv2.cvtColor(expected, cv2.COLOR_BGR2RGB) @@ -65,7 +66,8 @@ def test_decode_op_tf_file_dataset(): # Second dataset data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): actual = item1["image"] expected = cv2.imdecode(item2["image"], cv2.IMREAD_COLOR) expected = cv2.cvtColor(expected, cv2.COLOR_BGR2RGB) diff --git a/tests/ut/python/dataset/test_duplicate_op.py b/tests/ut/python/dataset/test_duplicate_op.py index c9ab3236914..98d9f29a4f8 100644 --- a/tests/ut/python/dataset/test_duplicate_op.py +++ b/tests/ut/python/dataset/test_duplicate_op.py @@ -26,7 +26,7 @@ def compare(array): array = np.array(array) data = data.map(operations=ops.Duplicate(), input_columns=["x"], output_columns=["x", "y"], column_order=["x", "y"]) - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): np.testing.assert_array_equal(array, d["x"]) np.testing.assert_array_equal(array, d["y"]) diff --git a/tests/ut/python/dataset/test_epoch_ctrl.py b/tests/ut/python/dataset/test_epoch_ctrl.py index ac655df9d2d..2b0a309b19c 100644 --- a/tests/ut/python/dataset/test_epoch_ctrl.py +++ b/tests/ut/python/dataset/test_epoch_ctrl.py @@ -86,9 +86,9 @@ def test_decode_op(): num_epoch = 5 # iter1 will always assume there is a next epoch and never shutdown. - iter1 = data1.create_dict_iterator() + iter1 = data1.create_dict_iterator(output_numpy=True) # iter 2 will stop and shutdown pipeline after num_epoch - iter2 = data2.create_dict_iterator(num_epoch) + iter2 = data2.create_dict_iterator(num_epoch, output_numpy=True) for _ in range(num_epoch): i = 0 for item1, item2 in itertools.zip_longest(iter1, iter2): @@ -135,7 +135,7 @@ def test_generator_dict_0(): i = 0 # create the iterator inside the loop declaration - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(item["data"], golden) i = i + 1 @@ -154,7 +154,7 @@ def test_generator_dict_1(): i = 0 # BAD. Do not create iterator every time inside. # Create iterator outside the epoch for loop. - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(item["data"], golden) i = i + 1 @@ -174,7 +174,7 @@ def test_generator_dict_2(): i = 0 for item in iter1: # each data is a dictionary golden = np.array([i]) - np.testing.assert_array_equal(item["data"], golden) + np.testing.assert_array_equal(item["data"].asnumpy(), golden) i = i + 1 assert i == 64 @@ -197,7 +197,7 @@ def test_generator_dict_3(): i = 0 for item in iter1: # each data is a dictionary golden = np.array([i]) - np.testing.assert_array_equal(item["data"], golden) + np.testing.assert_array_equal(item["data"].asnumpy(), golden) i = i + 1 assert i == 64 # optional @@ -221,7 +221,7 @@ def test_generator_dict_4(): i = 0 for item in iter1: # each data is a dictionary golden = np.array([i]) - np.testing.assert_array_equal(item["data"], golden) + np.testing.assert_array_equal(item["data"].asnumpy(), golden) i = i + 1 assert i == 64 @@ -240,7 +240,7 @@ def test_generator_dict_4_1(): # apply dataset operations data1 = ds.GeneratorDataset(generator_1d, ["data"]) # epoch ctrl op will not be injected if num_epochs is 1. - iter1 = data1.create_dict_iterator(num_epochs=1) + iter1 = data1.create_dict_iterator(num_epochs=1, output_numpy=True) for _ in range(1): i = 0 for item in iter1: # each data is a dictionary @@ -266,7 +266,7 @@ def test_generator_dict_4_2(): # repeat will not be injected when num repeat is 1. data1 = data1.repeat(1) # epoch ctrl op will not be injected if num_epochs is 1. - iter1 = data1.create_dict_iterator(num_epochs=1) + iter1 = data1.create_dict_iterator(num_epochs=1, output_numpy=True) for _ in range(1): i = 0 for item in iter1: # each data is a dictionary @@ -289,7 +289,7 @@ def test_generator_dict_5(): # apply dataset operations data1 = ds.GeneratorDataset(generator_1d, ["data"]) - iter1 = data1.create_dict_iterator(num_epochs=11) + iter1 = data1.create_dict_iterator(num_epochs=11, output_numpy=True) for _ in range(10): i = 0 for item in iter1: # each data is a dictionary @@ -326,7 +326,7 @@ def test_generator_tuple_0(): i = 0 # create the iterator inside the loop declaration - for item in data1.create_tuple_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(item[0], golden) i = i + 1 @@ -345,7 +345,7 @@ def test_generator_tuple_1(): i = 0 # BAD. Do not create iterator every time inside. # Create iterator outside the epoch for loop. - for item in data1.create_tuple_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_tuple_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(item[0], golden) i = i + 1 @@ -360,7 +360,7 @@ def test_generator_tuple_2(): # apply dataset operations data1 = ds.GeneratorDataset(generator_1d, ["data"]) - iter1 = data1.create_tuple_iterator() + iter1 = data1.create_tuple_iterator(output_numpy=True) for _ in range(10): i = 0 for item in iter1: # each data is a dictionary @@ -383,7 +383,7 @@ def test_generator_tuple_3(): # apply dataset operations data1 = ds.GeneratorDataset(generator_1d, ["data"]) - iter1 = data1.create_tuple_iterator() + iter1 = data1.create_tuple_iterator(output_numpy=True) for _ in range(10): i = 0 for item in iter1: # each data is a dictionary @@ -407,7 +407,7 @@ def test_generator_tuple_4(): # apply dataset operations data1 = ds.GeneratorDataset(generator_1d, ["data"]) - iter1 = data1.create_tuple_iterator(num_epochs=10) + iter1 = data1.create_tuple_iterator(num_epochs=10, output_numpy=True) for _ in range(10): i = 0 for item in iter1: # each data is a dictionary @@ -430,7 +430,7 @@ def test_generator_tuple_5(): # apply dataset operations data1 = ds.GeneratorDataset(generator_1d, ["data"]) - iter1 = data1.create_tuple_iterator(num_epochs=11) + iter1 = data1.create_tuple_iterator(num_epochs=11, output_numpy=True) for _ in range(10): i = 0 for item in iter1: # each data is a dictionary @@ -464,7 +464,7 @@ def test_generator_tuple_repeat_1(): # apply dataset operations data1 = ds.GeneratorDataset(generator_1d, ["data"]) data1 = data1.repeat(2) - iter1 = data1.create_tuple_iterator(num_epochs=11) + iter1 = data1.create_tuple_iterator(num_epochs=11, output_numpy=True) for _ in range(10): i = 0 for item in iter1: # each data is a dictionary @@ -499,7 +499,7 @@ def test_generator_tuple_repeat_repeat_1(): data1 = ds.GeneratorDataset(generator_1d, ["data"]) data1 = data1.repeat(2) data1 = data1.repeat(3) - iter1 = data1.create_tuple_iterator(num_epochs=11) + iter1 = data1.create_tuple_iterator(num_epochs=11, output_numpy=True) for _ in range(10): i = 0 for item in iter1: # each data is a dictionary @@ -533,7 +533,7 @@ def test_generator_tuple_repeat_repeat_2(): data1 = ds.GeneratorDataset(generator_1d, ["data"]) data1 = data1.repeat(2) data1 = data1.repeat(3) - iter1 = data1.create_tuple_iterator() + iter1 = data1.create_tuple_iterator(output_numpy=True) for _ in range(10): i = 0 for item in iter1: # each data is a dictionary @@ -559,7 +559,7 @@ def test_generator_tuple_repeat_repeat_3(): data1 = ds.GeneratorDataset(generator_1d, ["data"]) data1 = data1.repeat(2) data1 = data1.repeat(3) - iter1 = data1.create_tuple_iterator() + iter1 = data1.create_tuple_iterator(output_numpy=True) for _ in range(10): i = 0 for item in iter1: # each data is a dictionary @@ -589,7 +589,7 @@ def test_generator_tuple_infinite_repeat_repeat_1(): data1 = ds.GeneratorDataset(generator_1d, ["data"]) data1 = data1.repeat() data1 = data1.repeat(3) - iter1 = data1.create_tuple_iterator(num_epochs=11) + iter1 = data1.create_tuple_iterator(num_epochs=11, output_numpy=True) i = 0 for item in iter1: # each data is a dictionary @@ -612,7 +612,7 @@ def test_generator_tuple_infinite_repeat_repeat_2(): data1 = ds.GeneratorDataset(generator_1d, ["data"]) data1 = data1.repeat(3) data1 = data1.repeat() - iter1 = data1.create_tuple_iterator(num_epochs=11) + iter1 = data1.create_tuple_iterator(num_epochs=11, output_numpy=True) i = 0 for item in iter1: # each data is a dictionary @@ -635,7 +635,7 @@ def test_generator_tuple_infinite_repeat_repeat_3(): data1 = ds.GeneratorDataset(generator_1d, ["data"]) data1 = data1.repeat() data1 = data1.repeat() - iter1 = data1.create_tuple_iterator(num_epochs=11) + iter1 = data1.create_tuple_iterator(num_epochs=11, output_numpy=True) i = 0 for item in iter1: # each data is a dictionary @@ -658,7 +658,7 @@ def test_generator_tuple_infinite_repeat_repeat_4(): data1 = ds.GeneratorDataset(generator_1d, ["data"]) data1 = data1.repeat() data1 = data1.repeat() - iter1 = data1.create_tuple_iterator() + iter1 = data1.create_tuple_iterator(output_numpy=True) i = 0 for item in iter1: # each data is a dictionary @@ -680,7 +680,7 @@ def test_generator_reusedataset(): # apply dataset operations data1 = ds.GeneratorDataset(generator_1d, ["data"]) data1 = data1.repeat(2) - iter1 = data1.create_tuple_iterator() + iter1 = data1.create_tuple_iterator(output_numpy=True) for _ in range(10): i = 0 for item in iter1: # each data is a dictionary @@ -690,7 +690,7 @@ def test_generator_reusedataset(): assert i == 64 * 2 data1 = data1.repeat(3) - iter1 = data1.create_tuple_iterator() + iter1 = data1.create_tuple_iterator(output_numpy=True) for _ in range(5): i = 0 for item in iter1: # each data is a dictionary @@ -700,7 +700,7 @@ def test_generator_reusedataset(): assert i == 64 * 2 * 3 data1 = data1.batch(2) - iter1 = data1.create_dict_iterator() + iter1 = data1.create_dict_iterator(output_numpy=True) for _ in range(5): i = 0 sample = 0 diff --git a/tests/ut/python/dataset/test_equalize.py b/tests/ut/python/dataset/test_equalize.py index c67552a5410..4411942c6ea 100644 --- a/tests/ut/python/dataset/test_equalize.py +++ b/tests/ut/python/dataset/test_equalize.py @@ -49,10 +49,10 @@ def test_equalize_py(plot=False): for idx, (image, _) in enumerate(ds_original): if idx == 0: - images_original = np.transpose(image, (0, 2, 3, 1)) + images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_original = np.append(images_original, - np.transpose(image, (0, 2, 3, 1)), + np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) # Color Equalized Images @@ -69,10 +69,10 @@ def test_equalize_py(plot=False): for idx, (image, _) in enumerate(ds_equalize): if idx == 0: - images_equalize = np.transpose(image, (0, 2, 3, 1)) + images_equalize = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_equalize = np.append(images_equalize, - np.transpose(image, (0, 2, 3, 1)), + np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) num_samples = images_original.shape[0] @@ -102,10 +102,10 @@ def test_equalize_c(plot=False): for idx, (image, _) in enumerate(ds_original): if idx == 0: - images_original = image + images_original = image.asnumpy() else: images_original = np.append(images_original, - image, + image.asnumpy(), axis=0) # Equalize Images @@ -120,10 +120,10 @@ def test_equalize_c(plot=False): for idx, (image, _) in enumerate(ds_equalize): if idx == 0: - images_equalize = image + images_equalize = image.asnumpy() else: images_equalize = np.append(images_equalize, - image, + image.asnumpy(), axis=0) if plot: visualize_list(images_original, images_equalize) @@ -151,10 +151,10 @@ def test_equalize_py_c(plot=False): for idx, (image, _) in enumerate(ds_c_equalize): if idx == 0: - images_c_equalize = image + images_c_equalize = image.asnumpy() else: images_c_equalize = np.append(images_c_equalize, - image, + image.asnumpy(), axis=0) # Equalize images in python @@ -172,10 +172,10 @@ def test_equalize_py_c(plot=False): for idx, (image, _) in enumerate(ds_p_equalize): if idx == 0: - images_p_equalize = image + images_p_equalize = image.asnumpy() else: images_p_equalize = np.append(images_p_equalize, - image, + image.asnumpy(), axis=0) num_samples = images_c_equalize.shape[0] @@ -223,9 +223,9 @@ def test_equalize_mnist_c(plot=False): for _, (data_orig, data_trans) in enumerate(zip(ds_orig, ds_equalize_c)): image_orig, label_orig = data_orig image_trans, _ = data_trans - images.append(image_orig) - labels.append(label_orig) - images_trans.append(image_trans) + images.append(image_orig.asnumpy()) + labels.append(label_orig.asnumpy()) + images_trans.append(image_trans.asnumpy()) # Compare with expected md5 from images filename = "equalize_mnist_result_c.npz" diff --git a/tests/ut/python/dataset/test_fill_op.py b/tests/ut/python/dataset/test_fill_op.py index 025b65840bc..eb94b2bca1f 100644 --- a/tests/ut/python/dataset/test_fill_op.py +++ b/tests/ut/python/dataset/test_fill_op.py @@ -31,7 +31,7 @@ def test_fillop_basic(): data = data.map(operations=fill_op, input_columns=["col"]) expected = np.array([3, 3, 3, 3], dtype=np.uint8) for data_row in data: - np.testing.assert_array_equal(data_row[0], expected) + np.testing.assert_array_equal(data_row[0].asnumpy(), expected) def test_fillop_down_type_cast(): @@ -44,7 +44,7 @@ def test_fillop_down_type_cast(): data = data.map(operations=fill_op, input_columns=["col"]) expected = np.array([253, 253, 253, 253], dtype=np.uint8) for data_row in data: - np.testing.assert_array_equal(data_row[0], expected) + np.testing.assert_array_equal(data_row[0].asnumpy(), expected) def test_fillop_up_type_cast(): @@ -57,7 +57,7 @@ def test_fillop_up_type_cast(): data = data.map(operations=fill_op, input_columns=["col"]) expected = np.array([3., 3., 3., 3.], dtype=np.float) for data_row in data: - np.testing.assert_array_equal(data_row[0], expected) + np.testing.assert_array_equal(data_row[0].asnumpy(), expected) def test_fillop_string(): @@ -69,7 +69,7 @@ def test_fillop_string(): data = data.map(operations=fill_op, input_columns=["col"]) expected = np.array(['error', 'error'], dtype='S') - for data_row in data: + for data_row in data.create_tuple_iterator(output_numpy=True): np.testing.assert_array_equal(data_row[0], expected) diff --git a/tests/ut/python/dataset/test_filterop.py b/tests/ut/python/dataset/test_filterop.py index 7d6cc3ae8f7..b0a168aa9ef 100644 --- a/tests/ut/python/dataset/test_filterop.py +++ b/tests/ut/python/dataset/test_filterop.py @@ -35,7 +35,7 @@ def test_diff_predicate_func(): num_iter = 0 label_list = [] - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): num_iter += 1 label = data["label"] label_list.append(label) @@ -64,7 +64,7 @@ def test_filter_by_generator_with_no(): dataset_f = dataset.filter(predicate=lambda data: data < 11, num_parallel_workers=4) num_iter = 0 expected_rs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - for item in dataset_f.create_dict_iterator(num_epochs=1): + for item in dataset_f.create_dict_iterator(num_epochs=1, output_numpy=True): assert item["data"] == expected_rs[num_iter] num_iter += 1 @@ -77,7 +77,7 @@ def test_filter_by_generator_with_repeat(): num_iter = 0 ret_data = [] expected_rs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - for item in dataset_f.create_dict_iterator(num_epochs=1): + for item in dataset_f.create_dict_iterator(num_epochs=1, output_numpy=True): num_iter += 1 ret_data.append(item["data"]) assert num_iter == 44 @@ -95,7 +95,7 @@ def test_filter_by_generator_with_repeat_after(): num_iter = 0 ret_data = [] expected_rs = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10] - for item in dataset_r.create_dict_iterator(num_epochs=1): + for item in dataset_r.create_dict_iterator(num_epochs=1, output_numpy=True): num_iter += 1 ret_data.append(item["data"]) assert num_iter == 44 @@ -120,7 +120,7 @@ def test_filter_by_generator_with_batch(): dataset_f = dataset_b.filter(predicate=filter_func_batch, num_parallel_workers=4) num_iter = 0 ret_data = [] - for item in dataset_f.create_dict_iterator(num_epochs=1): + for item in dataset_f.create_dict_iterator(num_epochs=1, output_numpy=True): num_iter += 1 ret_data.append(item["data"]) assert num_iter == 3 @@ -136,7 +136,7 @@ def test_filter_by_generator_with_batch_after(): dataset_b = dataset_f.batch(4) num_iter = 0 ret_data = [] - for item in dataset_b.create_dict_iterator(num_epochs=1): + for item in dataset_b.create_dict_iterator(num_epochs=1, output_numpy=True): num_iter += 1 ret_data.append(item["data"]) assert num_iter == 6 @@ -202,7 +202,7 @@ def test_filter_by_generator_with_zip(): dataset_f = dataz.filter(predicate=filter_func_zip, num_parallel_workers=1) num_iter = 0 ret_data = [] - for item in dataset_f.create_dict_iterator(num_epochs=1): + for item in dataset_f.create_dict_iterator(num_epochs=1, output_numpy=True): num_iter += 1 ret_data.append({"data1": item["data1"], "data2": item["data2"]}) assert num_iter == 21 @@ -221,7 +221,7 @@ def test_filter_by_generator_with_zip_after(): dataz = ds.zip((dt1, dt2)) num_iter = 0 ret_data = [] - for item in dataz.create_dict_iterator(num_epochs=1): + for item in dataz.create_dict_iterator(num_epochs=1, output_numpy=True): num_iter += 1 ret_data.append({"data1": item["data1"], "data2": item["data2"]}) assert num_iter == 21 @@ -266,7 +266,7 @@ def test_filter_by_generator_with_map_all_col(): dataset_f = dataset_map.filter(input_columns=["col1"], predicate=filter_func_map_part, num_parallel_workers=1) num_iter = 0 ret_data = [] - for item in dataset_f.create_dict_iterator(num_epochs=1): + for item in dataset_f.create_dict_iterator(num_epochs=1, output_numpy=True): num_iter += 1 ret_data.append(item["col1"]) assert num_iter == 3 @@ -282,7 +282,7 @@ def test_filter_by_generator_with_map_part_col(): dataset_f = dataset_map.filter(input_columns=["out1", "col2"], predicate=filter_func_map, num_parallel_workers=4) num_iter = 0 ret_data = [] - for item in dataset_f.create_dict_iterator(num_epochs=1): + for item in dataset_f.create_dict_iterator(num_epochs=1, output_numpy=True): num_iter += 1 print(item) ret_data.append(item["out1"]) @@ -302,7 +302,7 @@ def test_filter_by_generator_with_rename(): dataset_f = dataset_b.filter(predicate=filter_func_rename, num_parallel_workers=4) num_iter = 0 ret_data = [] - for item in dataset_f.create_dict_iterator(num_epochs=1): + for item in dataset_f.create_dict_iterator(num_epochs=1, output_numpy=True): num_iter += 1 ret_data.append(item["col1"]) assert num_iter == 55 @@ -336,7 +336,7 @@ def test_filter_by_generator_with_input_column(): dataset_f4 = dataset_f3.filter(predicate=filter_func_input_column1, num_parallel_workers=4) num_iter = 0 ret_data = [] - for item in dataset_f4.create_dict_iterator(num_epochs=1): + for item in dataset_f4.create_dict_iterator(num_epochs=1, output_numpy=True): num_iter += 1 ret_data.append(item["out1"]) assert num_iter == 8 @@ -370,7 +370,7 @@ def test_filter_by_generator_Partial0(): dataset_zip = ds.zip((dataset1, dataset2)) dataset_f1 = dataset_zip.filter(predicate=filter_func_Partial_0, num_parallel_workers=2) ret = [] - for item in dataset_f1.create_dict_iterator(num_epochs=1): + for item in dataset_f1.create_dict_iterator(num_epochs=1, output_numpy=True): ret.append(item["col1"]) assert ret[0] == 5 assert ret[6] == 12 @@ -384,7 +384,7 @@ def test_filter_by_generator_Partial1(): dataset_f1 = dataset_zip.filter(predicate=filter_func_Partial_0, num_parallel_workers=2) dataset_map = dataset_f1.map(operations=lambda x1: x1 + 400, input_columns=["col1"], output_columns=["out1"]) ret = [] - for item in dataset_map.create_dict_iterator(num_epochs=1): + for item in dataset_map.create_dict_iterator(num_epochs=1, output_numpy=True): ret.append(item["out1"]) assert ret[0] == 405 assert ret[6] == 412 @@ -403,7 +403,7 @@ def test_filter_by_generator_Partial2(): output_columns=["out1", "out3"]) ret1 = [] ret3 = [] - for item in dataset_map.create_dict_iterator(num_epochs=1): + for item in dataset_map.create_dict_iterator(num_epochs=1, output_numpy=True): ret1.append(item["out1"]) ret3.append(item["out3"]) assert ret1[0] == 400 @@ -428,7 +428,7 @@ def test_filter_by_generator_Partial(): dataset_s = dataset.shuffle(4) dataset_f1 = dataset_s.filter(input_columns=["col1", "col2"], predicate=filter_func_Partial, num_parallel_workers=1) - for item in dataset_f1.create_dict_iterator(num_epochs=1): + for item in dataset_f1.create_dict_iterator(num_epochs=1, output_numpy=True): assert item["col1"] % 3 == 0 @@ -442,7 +442,7 @@ def test_filte_case_dataset_cifar10(): DATA_DIR_10 = "../data/dataset/testCifar10Data" dataset_c = ds.Cifar10Dataset(dataset_dir=DATA_DIR_10, num_samples=100000, shuffle=False) dataset_f1 = dataset_c.filter(input_columns=["image", "label"], predicate=filter_func_cifar, num_parallel_workers=1) - for item in dataset_f1.create_dict_iterator(num_epochs=1): + for item in dataset_f1.create_dict_iterator(num_epochs=1, output_numpy=True): # in this example, each dictionary has keys "image" and "label" assert item["label"] % 3 == 0 @@ -476,7 +476,7 @@ def test_filter_by_generator_with_map_all_sort(): dataset_f = dataz.filter(predicate=filter_func_part_sort, num_parallel_workers=1) num_iter = 0 ret_data = [] - for item in dataset_f.create_dict_iterator(num_epochs=1): + for item in dataset_f.create_dict_iterator(num_epochs=1, output_numpy=True): num_iter += 1 ret_data.append(item) diff --git a/tests/ut/python/dataset/test_five_crop.py b/tests/ut/python/dataset/test_five_crop.py index 601426a0524..156d76c2df2 100644 --- a/tests/ut/python/dataset/test_five_crop.py +++ b/tests/ut/python/dataset/test_five_crop.py @@ -54,7 +54,8 @@ def test_five_crop_op(plot=False): data2 = data2.map(operations=transform_2, input_columns=["image"]) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): num_iter += 1 image_1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_2 = item2["image"] diff --git a/tests/ut/python/dataset/test_flat_map.py b/tests/ut/python/dataset/test_flat_map.py index e8161bbbea9..a154ae1ecad 100644 --- a/tests/ut/python/dataset/test_flat_map.py +++ b/tests/ut/python/dataset/test_flat_map.py @@ -34,7 +34,7 @@ def test_flat_map_1(): data = data.flat_map(flat_map_func) count = 0 - for d in data: + for d in data.create_tuple_iterator(output_numpy=True): assert isinstance(d[0], np.ndarray) count += 1 assert count == 52 @@ -60,7 +60,7 @@ def test_flat_map_2(): data = data.flat_map(flat_map_func_2) count = 0 - for d in data: + for d in data.create_tuple_iterator(output_numpy=True): assert isinstance(d[0], np.ndarray) count += 1 assert count == 104 diff --git a/tests/ut/python/dataset/test_from_dataset.py b/tests/ut/python/dataset/test_from_dataset.py index 95c4b32daa4..ef8653c6eae 100644 --- a/tests/ut/python/dataset/test_from_dataset.py +++ b/tests/ut/python/dataset/test_from_dataset.py @@ -28,7 +28,7 @@ def test_demo_basic_from_dataset(): special_first=True) data = data.map(operations=text.Lookup(vocab, ""), input_columns=["text"]) res = [] - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(d["text"].item()) assert res == [4, 5, 3, 6, 7, 2], res @@ -41,7 +41,7 @@ def test_demo_basic_from_dataset_with_tokenizer(): special_first=True) data = data.map(operations=text.Lookup(vocab, ""), input_columns=["text"]) res = [] - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(list(d["text"])) assert res == [[13, 3, 7, 14, 9, 17, 3, 2, 19, 9, 2, 11, 3, 4, 16, 4, 8, 6, 5], [21, 20, 10, 25, 23, 26], [24, 22, 10, 12, 8, 6, 7, 4, 18, 15, 5], [2, 2]] @@ -62,7 +62,7 @@ def test_from_dataset(): special_first=True) corpus_dataset = corpus_dataset.map(operations=text.Lookup(vocab, ""), input_columns="text") res = [] - for d in corpus_dataset.create_dict_iterator(num_epochs=1): + for d in corpus_dataset.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(list(d["text"])) return res @@ -110,7 +110,7 @@ def test_from_dataset_special_token(): data = ds.GeneratorDataset(gen_input(texts), column_names=["text"]) data = data.map(operations=text.Lookup(vocab, ""), input_columns="text") res = [] - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(d["text"].item()) return res diff --git a/tests/ut/python/dataset/test_graphdata.py b/tests/ut/python/dataset/test_graphdata.py index 7bee945df0d..053f232cfb1 100644 --- a/tests/ut/python/dataset/test_graphdata.py +++ b/tests/ut/python/dataset/test_graphdata.py @@ -186,7 +186,7 @@ def test_graphdata_generatordataset(): dataset = ds.GeneratorDataset(source=GNNGraphDataset(g, batch_num), column_names=out_column_names, sampler=RandomBatchedSampler(edge_num, batch_num), num_parallel_workers=4) dataset = dataset.repeat(2) - itr = dataset.create_dict_iterator(num_epochs=1) + itr = dataset.create_dict_iterator(num_epochs=1, output_numpy=True) i = 0 for data in itr: assert data['neighbors'].shape == (2, 7) diff --git a/tests/ut/python/dataset/test_graphdata_distributed.py b/tests/ut/python/dataset/test_graphdata_distributed.py index 5e695c02139..5fa9efc57d8 100644 --- a/tests/ut/python/dataset/test_graphdata_distributed.py +++ b/tests/ut/python/dataset/test_graphdata_distributed.py @@ -112,7 +112,7 @@ def test_graphdata_distributed(): sampler=RandomBatchedSampler(edge_num, batch_num), num_parallel_workers=4, python_multiprocessing=False) dataset = dataset.repeat(2) - itr = dataset.create_dict_iterator(num_epochs=1) + itr = dataset.create_dict_iterator(num_epochs=1, output_numpy=True) i = 0 for data in itr: assert data['neighbors'].shape == (2, 7) diff --git a/tests/ut/python/dataset/test_invert.py b/tests/ut/python/dataset/test_invert.py index 7ec5c79e493..d3d8dc98599 100644 --- a/tests/ut/python/dataset/test_invert.py +++ b/tests/ut/python/dataset/test_invert.py @@ -48,10 +48,10 @@ def test_invert_py(plot=False): for idx, (image, _) in enumerate(ds_original): if idx == 0: - images_original = np.transpose(image, (0, 2, 3, 1)) + images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_original = np.append(images_original, - np.transpose(image, (0, 2, 3, 1)), + np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) # Color Inverted Images @@ -68,10 +68,10 @@ def test_invert_py(plot=False): for idx, (image, _) in enumerate(ds_invert): if idx == 0: - images_invert = np.transpose(image, (0, 2, 3, 1)) + images_invert = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_invert = np.append(images_invert, - np.transpose(image, (0, 2, 3, 1)), + np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) num_samples = images_original.shape[0] @@ -101,10 +101,10 @@ def test_invert_c(plot=False): for idx, (image, _) in enumerate(ds_original): if idx == 0: - images_original = image + images_original = image.asnumpy() else: images_original = np.append(images_original, - image, + image.asnumpy(), axis=0) # Invert Images @@ -119,10 +119,10 @@ def test_invert_c(plot=False): for idx, (image, _) in enumerate(ds_invert): if idx == 0: - images_invert = image + images_invert = image.asnumpy() else: images_invert = np.append(images_invert, - image, + image.asnumpy(), axis=0) if plot: visualize_list(images_original, images_invert) @@ -150,10 +150,10 @@ def test_invert_py_c(plot=False): for idx, (image, _) in enumerate(ds_c_invert): if idx == 0: - images_c_invert = image + images_c_invert = image.asnumpy() else: images_c_invert = np.append(images_c_invert, - image, + image.asnumpy(), axis=0) # invert images in python @@ -171,10 +171,10 @@ def test_invert_py_c(plot=False): for idx, (image, _) in enumerate(ds_p_invert): if idx == 0: - images_p_invert = image + images_p_invert = image.asnumpy() else: images_p_invert = np.append(images_p_invert, - image, + image.asnumpy(), axis=0) num_samples = images_c_invert.shape[0] diff --git a/tests/ut/python/dataset/test_iterator.py b/tests/ut/python/dataset/test_iterator.py index 86b50c3f516..32272ea9fe6 100644 --- a/tests/ut/python/dataset/test_iterator.py +++ b/tests/ut/python/dataset/test_iterator.py @@ -15,6 +15,8 @@ import numpy as np import pytest +import mindspore.common.dtype as mstype +from mindspore.common.tensor import Tensor import mindspore.dataset as ds from mindspore.dataset.engine.iterators import ITERATORS_LIST, _cleanup @@ -28,15 +30,15 @@ def check(project_columns): data1 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=COLUMNS, shuffle=False) data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=project_columns, shuffle=False) - for data_actual, data_expected in zip(data1.create_tuple_iterator(project_columns, num_epochs=1), - data2.create_tuple_iterator(num_epochs=1)): + for data_actual, data_expected in zip(data1.create_tuple_iterator(project_columns, num_epochs=1, output_numpy=True), + data2.create_tuple_iterator(num_epochs=1, output_numpy=True)): assert len(data_actual) == len(data_expected) assert all([np.array_equal(d1, d2) for d1, d2 in zip(data_actual, data_expected)]) -def test_iterator_create_tuple(): +def test_iterator_create_tuple_numpy(): """ - Test creating tuple iterator + Test creating tuple iterator with output NumPy """ check(COLUMNS) check(COLUMNS[0:1]) @@ -45,6 +47,46 @@ def test_iterator_create_tuple(): check(COLUMNS[7:8]) check(COLUMNS[0:2:8]) +def test_iterator_create_dict_mstensor(): + """ + Test creating dict iterator with output MSTensor + """ + def generator(): + for i in range(64): + yield (np.array([i], dtype=np.float32),) + + # apply dataset operations + data1 = ds.GeneratorDataset(generator, ["data"]) + + i = 0 + for item in data1.create_dict_iterator(num_epochs=1): + golden = np.array([i], dtype=np.float32) + np.testing.assert_array_equal(item["data"].asnumpy(), golden) + assert isinstance(item["data"], Tensor) + assert item["data"].dtype == mstype.float32 + i += 1 + assert i == 64 + +def test_iterator_create_tuple_mstensor(): + """ + Test creating tuple iterator with output MSTensor + """ + def generator(): + for i in range(64): + yield (np.array([i], dtype=np.float32),) + + # apply dataset operations + data1 = ds.GeneratorDataset(generator, ["data"]) + + i = 0 + for item in data1.create_tuple_iterator(num_epochs=1): + golden = np.array([i], dtype=np.float32) + np.testing.assert_array_equal(item[0].asnumpy(), golden) + assert isinstance(item[0], Tensor) + assert item[0].dtype == mstype.float32 + i += 1 + assert i == 64 + def test_iterator_weak_ref(): ITERATORS_LIST.clear() @@ -113,6 +155,6 @@ def test_tree_copy(): if __name__ == '__main__': - test_iterator_create_tuple() + test_iterator_create_tuple_numpy() test_iterator_weak_ref() test_tree_copy() diff --git a/tests/ut/python/dataset/test_linear_transformation.py b/tests/ut/python/dataset/test_linear_transformation.py index 488cafa686a..17a7f386fc8 100644 --- a/tests/ut/python/dataset/test_linear_transformation.py +++ b/tests/ut/python/dataset/test_linear_transformation.py @@ -63,7 +63,8 @@ def test_linear_transformation_op(plot=False): image_transformed = [] image = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_transformed.append(image1) diff --git a/tests/ut/python/dataset/test_mask_op.py b/tests/ut/python/dataset/test_mask_op.py index 54f2cc65be5..95378226fa5 100644 --- a/tests/ut/python/dataset/test_mask_op.py +++ b/tests/ut/python/dataset/test_mask_op.py @@ -59,7 +59,7 @@ def mask_compare(array, op, constant, dtype=mstype.bool_): array = array.astype(dtype=mstype_to_np_type[dtype]) - np.testing.assert_array_equal(array, d[0]) + np.testing.assert_array_equal(array, d[0].asnumpy()) def test_mask_int_comparison(): diff --git a/tests/ut/python/dataset/test_minddataset.py b/tests/ut/python/dataset/test_minddataset.py index 1a36bdd766d..a08fb409567 100644 --- a/tests/ut/python/dataset/test_minddataset.py +++ b/tests/ut/python/dataset/test_minddataset.py @@ -187,7 +187,7 @@ def test_nlp_compress_data(add_and_remove_nlp_compress_file): NLP_FILE_NAME + "0", None, num_readers, shuffle=False) assert data_set.get_dataset_size() == 16 num_iter = 0 - for x, item in zip(data, data_set.create_dict_iterator(num_epochs=1)): + for x, item in zip(data, data_set.create_dict_iterator(num_epochs=1, output_numpy=True)): assert (item["array_a"] == x["array_a"]).all() assert (item["array_b"] == x["array_b"]).all() assert item["array_c"].tobytes() == x["array_c"] @@ -206,7 +206,8 @@ def test_nlp_compress_data_old_version(add_and_remove_nlp_compress_file): OLD_NLP_FILE_NAME + "0", None, num_readers, shuffle=False) assert old_data_set.get_dataset_size() == 16 num_iter = 0 - for x, item in zip(old_data_set.create_dict_iterator(num_epochs=1), data_set.create_dict_iterator(num_epochs=1)): + for x, item in zip(old_data_set.create_dict_iterator(num_epochs=1, output_numpy=True), + data_set.create_dict_iterator(num_epochs=1, output_numpy=True)): assert (item["array_a"] == x["array_a"]).all() assert (item["array_b"] == x["array_b"]).all() assert (item["array_c"] == x["array_c"]).all() @@ -255,7 +256,7 @@ def test_cv_minddataset_partition_tutorial(add_and_remove_cv_file): data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers, num_shards=num_shards, shard_id=partition_id) num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) @@ -278,7 +279,7 @@ def test_cv_minddataset_partition_num_samples_0(add_and_remove_cv_file): num_shards=num_shards, shard_id=partition_id, num_samples=1) num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) @@ -301,7 +302,7 @@ def test_cv_minddataset_partition_num_samples_1(add_and_remove_cv_file): num_shards=num_shards, shard_id=partition_id, num_samples=2) num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) @@ -324,7 +325,7 @@ def test_cv_minddataset_partition_num_samples_2(add_and_remove_cv_file): num_shards=num_shards, shard_id=partition_id, num_samples=3) num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) @@ -352,7 +353,7 @@ def test_cv_minddataset_partition_tutorial_check_shuffle_result(add_and_remove_c data_set = data_set.repeat(3) num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) @@ -391,7 +392,7 @@ def test_cv_minddataset_partition_tutorial_check_whole_reshuffle_result_per_epoc data_set = data_set.repeat(3) num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) @@ -424,7 +425,7 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file): data_set = data_set.repeat(3) num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) num_iter += 1 @@ -450,7 +451,7 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file): data_set2 = data_set2.repeat(3) num_iter = 0 - for item in data_set2.create_dict_iterator(num_epochs=1): + for item in data_set2.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) num_iter += 1 @@ -481,7 +482,7 @@ def test_cv_minddataset_check_shuffle_result(add_and_remove_cv_file): data_set3 = data_set3.repeat(3) num_iter = 0 - for item in data_set3.create_dict_iterator(num_epochs=1): + for item in data_set3.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("-------------- item[file_name]: {}-----------------------".format(item["file_name"])) logger.info("-------------- item[label]: {} -----------------------".format(item["label"])) num_iter += 1 @@ -513,7 +514,7 @@ def test_cv_minddataset_dataset_size(add_and_remove_cv_file): repeat_num = 2 data_set = data_set.repeat(repeat_num) num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- get dataset size {} -----------------".format(num_iter)) logger.info( @@ -542,7 +543,7 @@ def test_cv_minddataset_repeat_reshuffle(add_and_remove_cv_file): data_set = data_set.repeat(2) num_iter = 0 labels = [] - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- get dataset size {} -----------------".format(num_iter)) logger.info( @@ -571,7 +572,7 @@ def test_cv_minddataset_batch_size_larger_than_records(add_and_remove_cv_file): num_parallel_workers=2) data_set = data_set.batch(32, drop_remainder=True) num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- get dataset size {} -----------------".format(num_iter)) logger.info( @@ -603,7 +604,7 @@ def test_cv_minddataset_reader_file_list(add_and_remove_cv_file): for x in range(FILES_NUM)], columns_list, num_readers) assert data_set.get_dataset_size() == 10 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -625,7 +626,7 @@ def test_cv_minddataset_reader_one_partition(add_and_remove_cv_file): data_set = ds.MindDataset([CV_FILE_NAME + "0"], columns_list, num_readers) assert data_set.get_dataset_size() < 10 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -678,7 +679,7 @@ def test_cv_minddataset_reader_two_dataset(add_and_remove_cv_file): columns_list, num_readers) assert data_set.get_dataset_size() == 30 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -739,7 +740,7 @@ def test_cv_minddataset_reader_two_dataset_partition(add_and_remove_cv_file): columns_list, num_readers) assert data_set.get_dataset_size() < 20 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -770,7 +771,7 @@ def test_cv_minddataset_reader_basic_tutorial(add_and_remove_cv_file): data_set = ds.MindDataset(CV_FILE_NAME + "0", columns_list, num_readers) assert data_set.get_dataset_size() == 10 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -791,7 +792,7 @@ def test_nlp_minddataset_reader_basic_tutorial(add_and_remove_nlp_file): data_set = ds.MindDataset(NLP_FILE_NAME + "0", None, num_readers) assert data_set.get_dataset_size() == 10 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -821,7 +822,7 @@ def test_cv_minddataset_reader_basic_tutorial_5_epoch(add_and_remove_cv_file): assert data_set.get_dataset_size() == 10 for _ in range(5): num_iter = 0 - for data in data_set: + for data in data_set.create_tuple_iterator(output_numpy=True): logger.info("data is {}".format(data)) num_iter += 1 assert num_iter == 10 @@ -852,7 +853,7 @@ def test_cv_minddataset_reader_basic_tutorial_5_epoch_with_batch(add_and_remove_ assert data_set.get_dataset_size() == 5 for _ in range(5): num_iter = 0 - for data in data_set: + for data in data_set.create_tuple_iterator(output_numpy=True): logger.info("data is {}".format(data)) num_iter += 1 assert num_iter == 5 @@ -865,7 +866,7 @@ def test_cv_minddataset_reader_no_columns(add_and_remove_cv_file): data_set = ds.MindDataset(CV_FILE_NAME + "0") assert data_set.get_dataset_size() == 10 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -888,7 +889,7 @@ def test_cv_minddataset_reader_repeat_tutorial(add_and_remove_cv_file): repeat_num = 2 data_set = data_set.repeat(repeat_num) num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- repeat two test {} ------------------------".format(num_iter)) logger.info( @@ -1217,7 +1218,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): assert len(item) == 13 for field in item: if isinstance(item[field], np.ndarray): @@ -1236,7 +1237,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): assert len(item) == 3 for field in item: if isinstance(item[field], np.ndarray): @@ -1253,7 +1254,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): assert len(item) == 4 for field in item: if isinstance(item[field], np.ndarray): @@ -1272,7 +1273,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): assert len(item) == 3 for field in item: if isinstance(item[field], np.ndarray): @@ -1291,7 +1292,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): assert len(item) == 5 for field in item: if isinstance(item[field], np.ndarray): @@ -1310,7 +1311,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): assert len(item) == 5 for field in item: if isinstance(item[field], np.ndarray): @@ -1330,7 +1331,7 @@ def test_write_with_multi_bytes_and_array_and_read_by_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): assert len(item) == 11 for field in item: if isinstance(item[field], np.ndarray): @@ -1420,7 +1421,7 @@ def test_write_with_multi_bytes_and_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): assert len(item) == 7 for field in item: if isinstance(item[field], np.ndarray): @@ -1438,7 +1439,7 @@ def test_write_with_multi_bytes_and_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): assert len(item) == 3 for field in item: if isinstance(item[field], np.ndarray): @@ -1456,7 +1457,7 @@ def test_write_with_multi_bytes_and_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): assert len(item) == 2 for field in item: if isinstance(item[field], np.ndarray): @@ -1474,7 +1475,7 @@ def test_write_with_multi_bytes_and_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): assert len(item) == 2 for field in item: if isinstance(item[field], np.ndarray): @@ -1492,7 +1493,7 @@ def test_write_with_multi_bytes_and_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): assert len(item) == 3 for field in item: if isinstance(item[field], np.ndarray): @@ -1511,7 +1512,7 @@ def test_write_with_multi_bytes_and_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): assert len(item) == 5 for field in item: if isinstance(item[field], np.ndarray): @@ -1615,7 +1616,7 @@ def test_write_with_multi_array_and_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): assert len(item) == 8 for field in item: if isinstance(item[field], np.ndarray): @@ -1635,7 +1636,7 @@ def test_write_with_multi_array_and_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): assert len(item) == 6 for field in item: if isinstance(item[field], np.ndarray): @@ -1655,7 +1656,7 @@ def test_write_with_multi_array_and_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): assert len(item) == 3 for field in item: if isinstance(item[field], np.ndarray): @@ -1675,7 +1676,7 @@ def test_write_with_multi_array_and_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): assert len(item) == 3 for field in item: if isinstance(item[field], np.ndarray): @@ -1693,7 +1694,7 @@ def test_write_with_multi_array_and_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): assert len(item) == 1 for field in item: if isinstance(item[field], np.ndarray): @@ -1714,7 +1715,7 @@ def test_write_with_multi_array_and_MindDataset(): shuffle=False) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): assert len(item) == 8 for field in item: if isinstance(item[field], np.ndarray): @@ -1761,7 +1762,7 @@ def test_numpy_generic(): data_set = ds.MindDataset(CV_FILE_NAME + "0", None, num_readers, shuffle=False) assert data_set.get_dataset_size() == 10 idx = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): assert item['label1'] == item['label1'] assert item['label2'] == item['label2'] assert item['label3'] == item['label3'] @@ -1861,7 +1862,7 @@ def test_write_with_float32_float64_float32_array_float64_array_and_MindDataset( shuffle=False) assert data_set.get_dataset_size() == 5 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): assert len(item) == 8 for field in item: if isinstance(item[field], np.ndarray): @@ -1883,7 +1884,7 @@ def test_write_with_float32_float64_float32_array_float64_array_and_MindDataset( shuffle=False) assert data_set.get_dataset_size() == 5 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): assert len(item) == 2 for field in item: if isinstance(item[field], np.ndarray): @@ -1905,7 +1906,7 @@ def test_write_with_float32_float64_float32_array_float64_array_and_MindDataset( shuffle=False) assert data_set.get_dataset_size() == 5 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): assert len(item) == 2 for field in item: if isinstance(item[field], np.ndarray): diff --git a/tests/ut/python/dataset/test_minddataset_exception.py b/tests/ut/python/dataset/test_minddataset_exception.py index 4c7a8e7078f..022c84cca71 100644 --- a/tests/ut/python/dataset/test_minddataset_exception.py +++ b/tests/ut/python/dataset/test_minddataset_exception.py @@ -97,7 +97,7 @@ def test_invalid_mindrecord(): with pytest.raises(Exception, match="MindRecordOp init failed"): data_set = ds.MindDataset('dummy.mindrecord', columns_list, num_readers) num_iter = 0 - for _ in data_set.create_dict_iterator(num_epochs=1): + for _ in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): num_iter += 1 try: assert num_iter == 0 @@ -116,7 +116,7 @@ def test_minddataset_lack_db(): with pytest.raises(Exception, match="MindRecordOp init failed"): data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers) num_iter = 0 - for _ in data_set.create_dict_iterator(num_epochs=1): + for _ in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): num_iter += 1 try: assert num_iter == 0 @@ -135,7 +135,7 @@ def test_cv_minddataset_pk_sample_error_class_column(): with pytest.raises(Exception, match="MindRecordOp launch failed"): data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, sampler=sampler) num_iter = 0 - for _ in data_set.create_dict_iterator(num_epochs=1): + for _ in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): num_iter += 1 os.remove(CV_FILE_NAME) os.remove("{}.db".format(CV_FILE_NAME)) @@ -150,7 +150,7 @@ def test_cv_minddataset_pk_sample_exclusive_shuffle(): data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers, sampler=sampler, shuffle=False) num_iter = 0 - for _ in data_set.create_dict_iterator(num_epochs=1): + for _ in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): num_iter += 1 os.remove(CV_FILE_NAME) os.remove("{}.db".format(CV_FILE_NAME)) diff --git a/tests/ut/python/dataset/test_minddataset_multi_images.py b/tests/ut/python/dataset/test_minddataset_multi_images.py index e1dc485fdac..d2b7bd1ab11 100644 --- a/tests/ut/python/dataset/test_minddataset_multi_images.py +++ b/tests/ut/python/dataset/test_minddataset_multi_images.py @@ -29,7 +29,7 @@ def test_cv_minddataset_reader_two_png_tutorial(): data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers) assert data_set.get_dataset_size() == 5 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): assert len(item) == 5 logger.info("-------------- cv reader basic is {} -----------------".format(num_iter)) logger.info("-------------- item[id] is {} ------------------------".format(item["id"])) @@ -50,7 +50,7 @@ def test_cv_minddataset_reader_two_png_tutorial_just_image2(): data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers) assert data_set.get_dataset_size() == 5 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): assert len(item) == 2 logger.info("-------------- cv reader basic is {} -----------------".format(num_iter)) logger.info("-------------- item[img_data] is {} ------------------".format(item["img_data"])) diff --git a/tests/ut/python/dataset/test_minddataset_multi_images_and_ndarray.py b/tests/ut/python/dataset/test_minddataset_multi_images_and_ndarray.py index a0d206d8d00..2d79ae9a273 100644 --- a/tests/ut/python/dataset/test_minddataset_multi_images_and_ndarray.py +++ b/tests/ut/python/dataset/test_minddataset_multi_images_and_ndarray.py @@ -57,7 +57,7 @@ def test_cv_minddataset_reader_multi_image_and_ndarray_tutorial(): data_set = ds.MindDataset(CV_FILE_NAME, columns_list, num_readers) assert data_set.get_dataset_size() == 5 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): assert len(item) == 7 logger.info("item: {}".format(item)) assert item["image_0"].dtype == np.uint8 diff --git a/tests/ut/python/dataset/test_minddataset_padded.py b/tests/ut/python/dataset/test_minddataset_padded.py index 53ea6564e7f..b87e0500543 100644 --- a/tests/ut/python/dataset/test_minddataset_padded.py +++ b/tests/ut/python/dataset/test_minddataset_padded.py @@ -122,7 +122,7 @@ def test_cv_minddataset_reader_basic_padded_samples(add_and_remove_cv_file): assert data_set.get_dataset_size() == 15 num_iter = 0 num_padded_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info("-------------- item[file_name]: {} ------------------------".format(item["file_name"])) logger.info("-------------- item[label]: {} ----------------------------".format(item["label"])) @@ -157,7 +157,7 @@ def test_cv_minddataset_partition_padded_samples(add_and_remove_cv_file): padded_sample=padded_sample, num_padded=num_padded) assert data_set.get_dataset_size() == dataset_size - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"]))) logger.info("-------------- item[data]: {} -----------------------------".format(item["data"])) @@ -205,7 +205,7 @@ def test_cv_minddataset_partition_padded_samples_multi_epoch(add_and_remove_cv_f assert data_set.get_dataset_size() == dataset_size data_set = data_set.repeat(repeat_size) local_index = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"]))) logger.info("-------------- item[data]: {} -----------------------------".format(item["data"])) @@ -267,7 +267,7 @@ def test_cv_minddataset_partition_padded_samples_no_dividsible(add_and_remove_cv padded_sample=padded_sample, num_padded=num_padded) num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): num_iter += 1 return num_iter @@ -313,7 +313,7 @@ def test_cv_minddataset_partition_padded_samples_no_equal_column_list(add_and_re shard_id=partition_id, padded_sample=padded_sample, num_padded=num_padded) - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"]))) logger.info("-------------- item[data]: {} -----------------------------".format(item["data"])) @@ -337,7 +337,7 @@ def test_cv_minddataset_partition_padded_samples_no_column_list(add_and_remove_c shard_id=partition_id, padded_sample=padded_sample, num_padded=num_padded) - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"]))) logger.info("-------------- item[data]: {} -----------------------------".format(item["data"])) @@ -360,7 +360,7 @@ def test_cv_minddataset_partition_padded_samples_no_num_padded(add_and_remove_cv num_shards=num_shards, shard_id=partition_id, padded_sample=padded_sample) - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"]))) logger.info("-------------- item[data]: {} -----------------------------".format(item["data"])) @@ -383,7 +383,7 @@ def test_cv_minddataset_partition_padded_samples_no_padded_samples(add_and_remov num_shards=num_shards, shard_id=partition_id, num_padded=num_padded) - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("-------------- partition : {} ------------------------".format(partition_id)) logger.info("-------------- len(item[data]): {} ------------------------".format(len(item["data"]))) logger.info("-------------- item[data]: {} -----------------------------".format(item["data"])) @@ -413,7 +413,7 @@ def test_nlp_minddataset_reader_basic_padded_samples(add_and_remove_nlp_file): padded_sample=padded_sample, num_padded=num_padded) assert data_set.get_dataset_size() == dataset_size - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("-------------- item[id]: {} ------------------------".format(item["id"])) logger.info("-------------- item[rating]: {} --------------------".format(item["rating"])) logger.info("-------------- item[input_ids]: {}, shape: {} -----------------".format( @@ -461,7 +461,7 @@ def test_nlp_minddataset_reader_basic_padded_samples_multi_epoch(add_and_remove_ data_set = data_set.repeat(repeat_size) local_index = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("-------------- item[id]: {} ------------------------".format(item["id"])) logger.info("-------------- item[rating]: {} --------------------".format(item["rating"])) logger.info("-------------- item[input_ids]: {}, shape: {} -----------------".format( @@ -523,7 +523,7 @@ def test_nlp_minddataset_reader_basic_padded_samples_check_whole_reshuffle_resul assert data_set.get_dataset_size() == dataset_size data_set = data_set.repeat(repeat_size) inner_num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("-------------- item[id]: {} ------------------------".format(item["id"])) logger.info("-------------- item[rating]: {} --------------------".format(item["rating"])) logger.info("-------------- item[input_ids]: {}, shape: {} -----------------" diff --git a/tests/ut/python/dataset/test_minddataset_sampler.py b/tests/ut/python/dataset/test_minddataset_sampler.py index 19604bad8bf..d096b3fde08 100644 --- a/tests/ut/python/dataset/test_minddataset_sampler.py +++ b/tests/ut/python/dataset/test_minddataset_sampler.py @@ -70,7 +70,7 @@ def test_cv_minddataset_pk_sample_no_column(add_and_remove_cv_file): assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info("-------------- item[file_name]: \ @@ -90,7 +90,7 @@ def test_cv_minddataset_pk_sample_basic(add_and_remove_cv_file): assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info("-------------- item[data]: \ @@ -111,7 +111,7 @@ def test_cv_minddataset_pk_sample_shuffle(add_and_remove_cv_file): assert data_set.get_dataset_size() == 9 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info("-------------- item[file_name]: \ @@ -132,7 +132,7 @@ def test_cv_minddataset_pk_sample_shuffle_1(add_and_remove_cv_file): assert data_set.get_dataset_size() == 5 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info("-------------- item[file_name]: \ @@ -152,7 +152,7 @@ def test_cv_minddataset_pk_sample_shuffle_2(add_and_remove_cv_file): assert data_set.get_dataset_size() == 9 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info("-------------- item[file_name]: \ @@ -172,7 +172,7 @@ def test_cv_minddataset_pk_sample_out_of_range_0(add_and_remove_cv_file): sampler=sampler) assert data_set.get_dataset_size() == 15 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info("-------------- item[file_name]: \ @@ -191,7 +191,7 @@ def test_cv_minddataset_pk_sample_out_of_range_1(add_and_remove_cv_file): sampler=sampler) assert data_set.get_dataset_size() == 15 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info("-------------- item[file_name]: \ @@ -210,7 +210,7 @@ def test_cv_minddataset_pk_sample_out_of_range_2(add_and_remove_cv_file): sampler=sampler) assert data_set.get_dataset_size() == 10 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info("-------------- item[file_name]: \ @@ -231,7 +231,7 @@ def test_cv_minddataset_subset_random_sample_basic(add_and_remove_cv_file): sampler=sampler) assert data_set.get_dataset_size() == 5 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -254,7 +254,7 @@ def test_cv_minddataset_subset_random_sample_replica(add_and_remove_cv_file): sampler=sampler) assert data_set.get_dataset_size() == 6 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -277,7 +277,7 @@ def test_cv_minddataset_subset_random_sample_empty(add_and_remove_cv_file): sampler=sampler) assert data_set.get_dataset_size() == 0 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -300,7 +300,7 @@ def test_cv_minddataset_subset_random_sample_out_of_range(add_and_remove_cv_file sampler=sampler) assert data_set.get_dataset_size() == 5 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -322,7 +322,7 @@ def test_cv_minddataset_subset_random_sample_negative(add_and_remove_cv_file): sampler=sampler) assert data_set.get_dataset_size() == 5 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -345,7 +345,7 @@ def test_cv_minddataset_random_sampler_basic(add_and_remove_cv_file): assert data_set.get_dataset_size() == 10 num_iter = 0 new_dataset = [] - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -371,7 +371,7 @@ def test_cv_minddataset_random_sampler_repeat(add_and_remove_cv_file): epoch1_dataset = [] epoch2_dataset = [] epoch3_dataset = [] - for item in ds1.create_dict_iterator(num_epochs=1): + for item in ds1.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -400,7 +400,7 @@ def test_cv_minddataset_random_sampler_replacement(add_and_remove_cv_file): sampler=sampler) assert data_set.get_dataset_size() == 5 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -422,7 +422,7 @@ def test_cv_minddataset_sequential_sampler_basic(add_and_remove_cv_file): sampler=sampler) assert data_set.get_dataset_size() == 4 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -447,7 +447,7 @@ def test_cv_minddataset_sequential_sampler_exceed_size(add_and_remove_cv_file): dataset_size = data_set.get_dataset_size() assert dataset_size == 10 num_iter = 0 - for item in data_set.create_dict_iterator(num_epochs=1): + for item in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- cv reader basic: {} ------------------------".format(num_iter)) logger.info( @@ -473,7 +473,7 @@ def test_cv_minddataset_split_basic(add_and_remove_cv_file): assert d1.get_dataset_size() == 8 assert d2.get_dataset_size() == 2 num_iter = 0 - for item in d1.create_dict_iterator(num_epochs=1): + for item in d1.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- item[data]: {} -----------------------------".format(item["data"])) logger.info( @@ -485,7 +485,7 @@ def test_cv_minddataset_split_basic(add_and_remove_cv_file): num_iter += 1 assert num_iter == 8 num_iter = 0 - for item in d2.create_dict_iterator(num_epochs=1): + for item in d2.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- item[data]: {} -----------------------------".format(item["data"])) logger.info( @@ -509,7 +509,7 @@ def test_cv_minddataset_split_exact_percent(add_and_remove_cv_file): assert d1.get_dataset_size() == 8 assert d2.get_dataset_size() == 2 num_iter = 0 - for item in d1.create_dict_iterator(num_epochs=1): + for item in d1.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- item[data]: {} -----------------------------".format(item["data"])) logger.info( @@ -521,7 +521,7 @@ def test_cv_minddataset_split_exact_percent(add_and_remove_cv_file): num_iter += 1 assert num_iter == 8 num_iter = 0 - for item in d2.create_dict_iterator(num_epochs=1): + for item in d2.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- item[data]: {} -----------------------------".format(item["data"])) logger.info( @@ -545,7 +545,7 @@ def test_cv_minddataset_split_fuzzy_percent(add_and_remove_cv_file): assert d1.get_dataset_size() == 4 assert d2.get_dataset_size() == 6 num_iter = 0 - for item in d1.create_dict_iterator(num_epochs=1): + for item in d1.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- item[data]: {} -----------------------------".format(item["data"])) logger.info( @@ -557,7 +557,7 @@ def test_cv_minddataset_split_fuzzy_percent(add_and_remove_cv_file): num_iter += 1 assert num_iter == 4 num_iter = 0 - for item in d2.create_dict_iterator(num_epochs=1): + for item in d2.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- item[data]: {} -----------------------------".format(item["data"])) logger.info( @@ -585,7 +585,7 @@ def test_cv_minddataset_split_deterministic(add_and_remove_cv_file): d1_dataset = [] d2_dataset = [] num_iter = 0 - for item in d1.create_dict_iterator(num_epochs=1): + for item in d1.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- item[data]: {} -----------------------------".format(item["data"])) logger.info( @@ -596,7 +596,7 @@ def test_cv_minddataset_split_deterministic(add_and_remove_cv_file): num_iter += 1 assert num_iter == 8 num_iter = 0 - for item in d2.create_dict_iterator(num_epochs=1): + for item in d2.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- item[data]: {} -----------------------------".format(item["data"])) logger.info( @@ -628,7 +628,7 @@ def test_cv_minddataset_split_sharding(add_and_remove_cv_file): num_iter = 0 d1_shard1 = [] - for item in d1.create_dict_iterator(num_epochs=1): + for item in d1.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- item[data]: {} -----------------------------".format(item["data"])) logger.info( @@ -649,7 +649,7 @@ def test_cv_minddataset_split_sharding(add_and_remove_cv_file): epoch2_dataset = [] epoch3_dataset = [] num_iter = 0 - for item in d1s.create_dict_iterator(num_epochs=1): + for item in d1s.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info( "-------------- item[data]: {} -----------------------------".format(item["data"])) logger.info( diff --git a/tests/ut/python/dataset/test_mixup_label_smoothing.py b/tests/ut/python/dataset/test_mixup_label_smoothing.py index 9786707e6ec..227b9cd63ac 100644 --- a/tests/ut/python/dataset/test_mixup_label_smoothing.py +++ b/tests/ut/python/dataset/test_mixup_label_smoothing.py @@ -44,7 +44,7 @@ def test_one_hot_op(): golden_label = np.ones(num_classes) * epsilon_para / num_classes golden_label[1] = 1 - epsilon_para / num_classes - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): label = data["label"] logger.info("label is {}".format(label)) logger.info("golden_label is {}".format(golden_label)) @@ -83,7 +83,8 @@ def test_mix_up_single(): ] ds1 = ds1.map(operations=transforms, input_columns=["image", "label"]) - for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1), ds2.create_dict_iterator(num_epochs=1)): + for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1, output_numpy=True), + ds2.create_dict_iterator(num_epochs=1, output_numpy=True)): image1 = data1["image"] label = data1["label"] logger.info("label is {}".format(label)) @@ -133,7 +134,8 @@ def test_mix_up_multi(): ds1 = ds1.map(operations=transforms, input_columns=["image", "label"]) num_iter = 0 batch1_image1 = 0 - for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1), ds2.create_dict_iterator(num_epochs=1)): + for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1, output_numpy=True), + ds2.create_dict_iterator(num_epochs=1, output_numpy=True)): image1 = data1["image"] label1 = data1["label"] logger.info("label: {}".format(label1)) diff --git a/tests/ut/python/dataset/test_mixup_op.py b/tests/ut/python/dataset/test_mixup_op.py index 6357dc8f2a2..cb67aeeafd8 100644 --- a/tests/ut/python/dataset/test_mixup_op.py +++ b/tests/ut/python/dataset/test_mixup_op.py @@ -44,9 +44,9 @@ def test_mixup_batch_success1(plot=False): images_original = None for idx, (image, _) in enumerate(ds_original): if idx == 0: - images_original = image + images_original = image.asnumpy() else: - images_original = np.append(images_original, image, axis=0) + images_original = np.append(images_original, image.asnumpy(), axis=0) # MixUp Images data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) @@ -60,9 +60,9 @@ def test_mixup_batch_success1(plot=False): images_mixup = None for idx, (image, _) in enumerate(data1): if idx == 0: - images_mixup = image + images_mixup = image.asnumpy() else: - images_mixup = np.append(images_mixup, image, axis=0) + images_mixup = np.append(images_mixup, image.asnumpy(), axis=0) if plot: visualize_list(images_original, images_mixup) @@ -88,9 +88,9 @@ def test_mixup_batch_success2(plot=False): images_original = None for idx, (image, _) in enumerate(ds_original): if idx == 0: - images_original = image + images_original = image.asnumpy() else: - images_original = np.append(images_original, image, axis=0) + images_original = np.append(images_original, image.asnumpy(), axis=0) # MixUp Images data1 = ds.ImageFolderDataset(dataset_dir=DATA_DIR2, shuffle=False) @@ -108,9 +108,9 @@ def test_mixup_batch_success2(plot=False): images_mixup = None for idx, (image, _) in enumerate(data1): if idx == 0: - images_mixup = image + images_mixup = image.asnumpy() else: - images_mixup = np.append(images_mixup, image, axis=0) + images_mixup = np.append(images_mixup, image.asnumpy(), axis=0) if plot: visualize_list(images_original, images_mixup) @@ -135,9 +135,9 @@ def test_mixup_batch_success3(plot=False): images_original = None for idx, (image, _) in enumerate(ds_original): if idx == 0: - images_original = image + images_original = image.asnumpy() else: - images_original = np.append(images_original, image, axis=0) + images_original = np.append(images_original, image.asnumpy(), axis=0) # MixUp Images data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) @@ -151,9 +151,9 @@ def test_mixup_batch_success3(plot=False): images_mixup = np.array([]) for idx, (image, _) in enumerate(data1): if idx == 0: - images_mixup = image + images_mixup = image.asnumpy() else: - images_mixup = np.append(images_mixup, image, axis=0) + images_mixup = np.append(images_mixup, image.asnumpy(), axis=0) if plot: visualize_list(images_original, images_mixup) @@ -180,9 +180,9 @@ def test_mixup_batch_success4(plot=False): images_original = None for idx, (image, _) in enumerate(ds_original): if idx == 0: - images_original = image + images_original = image.asnumpy() else: - images_original = np.append(images_original, image, axis=0) + images_original = np.append(images_original, image.asnumpy(), axis=0) # MixUp Images data1 = ds.CelebADataset(DATA_DIR3, shuffle=False) @@ -200,9 +200,9 @@ def test_mixup_batch_success4(plot=False): images_mixup = np.array([]) for idx, (image, _) in enumerate(data1): if idx == 0: - images_mixup = image + images_mixup = image.asnumpy() else: - images_mixup = np.append(images_mixup, image, axis=0) + images_mixup = np.append(images_mixup, image.asnumpy(), axis=0) if plot: visualize_list(images_original, images_mixup) @@ -252,9 +252,9 @@ def test_mixup_batch_fail1(): images_original = np.array([]) for idx, (image, _) in enumerate(ds_original): if idx == 0: - images_original = image + images_original = image.asnumpy() else: - images_original = np.append(images_original, image, axis=0) + images_original = np.append(images_original, image.asnumpy(), axis=0) # MixUp Images data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) @@ -266,9 +266,9 @@ def test_mixup_batch_fail1(): data1 = data1.map(operations=mixup_batch_op, input_columns=["image", "label"]) for idx, (image, _) in enumerate(data1): if idx == 0: - images_mixup = image + images_mixup = image.asnumpy() else: - images_mixup = np.append(images_mixup, image, axis=0) + images_mixup = np.append(images_mixup, image.asnumpy(), axis=0) error_message = "You must make sure images are HWC or CHW and batched" assert error_message in str(error.value) @@ -287,9 +287,9 @@ def test_mixup_batch_fail2(): images_original = np.array([]) for idx, (image, _) in enumerate(ds_original): if idx == 0: - images_original = image + images_original = image.asnumpy() else: - images_original = np.append(images_original, image, axis=0) + images_original = np.append(images_original, image.asnumpy(), axis=0) # MixUp Images data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) @@ -315,9 +315,9 @@ def test_mixup_batch_fail3(): images_original = None for idx, (image, _) in enumerate(ds_original): if idx == 0: - images_original = image + images_original = image.asnumpy() else: - images_original = np.append(images_original, image, axis=0) + images_original = np.append(images_original, image.asnumpy(), axis=0) # MixUp Images data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) @@ -332,9 +332,9 @@ def test_mixup_batch_fail3(): images_mixup = np.array([]) for idx, (image, _) in enumerate(data1): if idx == 0: - images_mixup = image + images_mixup = image.asnumpy() else: - images_mixup = np.append(images_mixup, image, axis=0) + images_mixup = np.append(images_mixup, image.asnumpy(), axis=0) error_message = "Both images and labels columns are required" assert error_message in str(error.value) @@ -353,9 +353,9 @@ def test_mixup_batch_fail4(): images_original = np.array([]) for idx, (image, _) in enumerate(ds_original): if idx == 0: - images_original = image + images_original = image.asnumpy() else: - images_original = np.append(images_original, image, axis=0) + images_original = np.append(images_original, image.asnumpy(), axis=0) # MixUp Images data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) @@ -382,9 +382,9 @@ def test_mixup_batch_fail5(): images_original = np.array([]) for idx, (image, _) in enumerate(ds_original): if idx == 0: - images_original = image + images_original = image.asnumpy() else: - images_original = np.append(images_original, image, axis=0) + images_original = np.append(images_original, image.asnumpy(), axis=0) # MixUp Images data1 = ds.Cifar10Dataset(DATA_DIR, num_samples=10, shuffle=False) @@ -397,9 +397,9 @@ def test_mixup_batch_fail5(): images_mixup = np.array([]) for idx, (image, _) in enumerate(data1): if idx == 0: - images_mixup = image + images_mixup = image.asnumpy() else: - images_mixup = np.append(images_mixup, image, axis=0) + images_mixup = np.append(images_mixup, image.asnumpy(), axis=0) error_message = "MixUpBatch: Wrong labels shape. The second column (labels) must have a shape of NC or NLC" assert error_message in str(error.value) diff --git a/tests/ut/python/dataset/test_ngram_op.py b/tests/ut/python/dataset/test_ngram_op.py index ef65a8e561d..ec378b44555 100644 --- a/tests/ut/python/dataset/test_ngram_op.py +++ b/tests/ut/python/dataset/test_ngram_op.py @@ -42,7 +42,7 @@ def test_multiple_ngrams(): dataset = dataset.map(operations=text.Ngram([1, 2, 3], ("_", 2), ("_", 2), " "), input_columns="text") i = 0 - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): assert [d.decode("utf8") for d in data["text"]] == n_gram_mottos[i] i += 1 @@ -64,7 +64,7 @@ def test_simple_ngram(): dataset = dataset.map(operations=text.Ngram(3, separator=" "), input_columns="text") i = 0 - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): assert [d.decode("utf8") for d in data["text"]] == n_gram_mottos[i], i i += 1 @@ -79,7 +79,7 @@ def test_corner_cases(): try: dataset = ds.GeneratorDataset(gen(input_line), column_names=["text"]) dataset = dataset.map(operations=text.Ngram(n, l_pad, r_pad, separator=sep), input_columns=["text"]) - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): return [d.decode("utf8") for d in data["text"]] except (ValueError, TypeError) as e: return str(e) diff --git a/tests/ut/python/dataset/test_nlp.py b/tests/ut/python/dataset/test_nlp.py index 6fe2b029c5a..ad54672c297 100644 --- a/tests/ut/python/dataset/test_nlp.py +++ b/tests/ut/python/dataset/test_nlp.py @@ -38,7 +38,7 @@ def test_on_tokenized_line(): data = data.map(operations=lookup, input_columns=["text"]) res = np.array([[10, 1, 11, 1, 12, 1, 15, 1, 13, 1, 14], [11, 1, 12, 1, 10, 1, 14, 1, 13, 1, 15]], dtype=np.int32) - for i, d in enumerate(data.create_dict_iterator(num_epochs=1)): + for i, d in enumerate(data.create_dict_iterator(num_epochs=1, output_numpy=True)): np.testing.assert_array_equal(d["text"], res[i]) @@ -56,7 +56,7 @@ def test_on_tokenized_line_with_no_special_tokens(): data = data.map(operations=lookup, input_columns=["text"]) res = np.array([[8, 0, 9, 0, 10, 0, 13, 0, 11, 0, 12], [9, 0, 10, 0, 8, 0, 12, 0, 11, 0, 13]], dtype=np.int32) - for i, d in enumerate(data.create_dict_iterator(num_epochs=1)): + for i, d in enumerate(data.create_dict_iterator(num_epochs=1, output_numpy=True)): np.testing.assert_array_equal(d["text"], res[i]) diff --git a/tests/ut/python/dataset/test_normalizeOp.py b/tests/ut/python/dataset/test_normalizeOp.py index bfd585969d1..c4290c998d6 100644 --- a/tests/ut/python/dataset/test_normalizeOp.py +++ b/tests/ut/python/dataset/test_normalizeOp.py @@ -107,7 +107,8 @@ def test_normalize_op_c(plot=False): data2 = data2.map(operations=decode_op, input_columns=["image"]) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image_de_normalized = item1["image"] image_original = item2["image"] image_np_normalized = normalize_np(image_original, mean, std) @@ -144,7 +145,8 @@ def test_normalize_op_py(plot=False): data2 = data2.map(operations=transform, input_columns=["image"]) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image_de_normalized = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_np_normalized = (normalize_np(item2["image"].transpose(1, 2, 0), mean, std) * 255).astype(np.uint8) image_original = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) diff --git a/tests/ut/python/dataset/test_opt.py b/tests/ut/python/dataset/test_opt.py index f4dfe189ea5..579418c9121 100644 --- a/tests/ut/python/dataset/test_opt.py +++ b/tests/ut/python/dataset/test_opt.py @@ -39,7 +39,7 @@ def test_case_0(): data1 = data1.batch(2) expected_data = np.array([[[1], [2]], [[3], [0]]]) - for i, data_row in enumerate(data1): + for i, data_row in enumerate(data1.create_tuple_iterator(output_numpy=True)): np.testing.assert_array_equal(data_row[0], expected_data[i]) # Restore configuration diff --git a/tests/ut/python/dataset/test_opt_pass.py b/tests/ut/python/dataset/test_opt_pass.py index e1a519f680d..710254c6ca3 100644 --- a/tests/ut/python/dataset/test_opt_pass.py +++ b/tests/ut/python/dataset/test_opt_pass.py @@ -31,7 +31,7 @@ def test_map_reorder0(): data0 = data0.map(operations=(lambda x: x), input_columns="col0", output_columns="out", column_order=["col1", "out"]) - for item in data0.create_tuple_iterator(num_epochs=1): # each data is a dictionary + for item in data0.create_tuple_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary assert item == [np.array(1), np.array(0)] @@ -51,7 +51,7 @@ def test_map_reorder1(): data2 = ds.zip((data0, data1)) data2 = data2.map(operations=(lambda x: x), input_columns="a0", column_order=["b2", "a2", "b1", "a1", "b0", "a0"]) - for item in data2.create_tuple_iterator(num_epochs=1): + for item in data2.create_tuple_iterator(num_epochs=1, output_numpy=True): assert item == [np.array(2), np.array(2), np.array(1), np.array(1), np.array(0), np.array(0)] @@ -67,7 +67,7 @@ def test_shuffle(): data2 = ds.TFRecordDataset(FILES, schema=SCHEMA_FILE, shuffle=ds.Shuffle.FILES) data2 = data2.shuffle(10000) - for d1, d2 in zip(data1, data2): + for d1, d2 in zip(data1.create_tuple_iterator(output_numpy=True), data2.create_tuple_iterator(output_numpy=True)): for t1, t2 in zip(d1, d2): np.testing.assert_array_equal(t1, t2) @@ -77,7 +77,7 @@ def test_shuffle(): data2 = ds.TextFileDataset(DATA_ALL_FILE, shuffle=ds.Shuffle.FILES) data2 = data2.shuffle(10000) - for d1, d2 in zip(data1, data2): + for d1, d2 in zip(data1.create_tuple_iterator(output_numpy=True), data2.create_tuple_iterator(output_numpy=True)): for t1, t2 in zip(d1, d2): np.testing.assert_array_equal(t1, t2) @@ -87,7 +87,7 @@ def test_shuffle(): data2 = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train', shuffle=ds.Shuffle.FILES) data2 = data2.shuffle(10000) - for d1, d2 in zip(data1, data2): + for d1, d2 in zip(data1.create_tuple_iterator(output_numpy=True), data2.create_tuple_iterator(output_numpy=True)): for t1, t2 in zip(d1, d2): np.testing.assert_array_equal(t1, t2) diff --git a/tests/ut/python/dataset/test_pad.py b/tests/ut/python/dataset/test_pad.py index 803ea1e8d06..5a8b699a9a6 100644 --- a/tests/ut/python/dataset/test_pad.py +++ b/tests/ut/python/dataset/test_pad.py @@ -56,7 +56,8 @@ def test_pad_op(): data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"], shuffle=False) data2 = data2.map(operations=transform, input_columns=["image"]) - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): c_image = item1["image"] py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) @@ -94,7 +95,7 @@ def test_pad_grayscale(): pad_gray = c_vision.Pad(100, fill_value=(20, 20, 20)) data1 = data1.map(operations=pad_gray, input_columns=["image"]) dataset_shape_1 = [] - for item1 in data1.create_dict_iterator(num_epochs=1): + for item1 in data1.create_dict_iterator(num_epochs=1, output_numpy=True): c_image = item1["image"] dataset_shape_1.append(c_image.shape) @@ -108,7 +109,7 @@ def test_pad_grayscale(): data2 = data2.map(operations=ctrans, input_columns=["image"]) - for item2 in data2.create_dict_iterator(num_epochs=1): + for item2 in data2.create_dict_iterator(num_epochs=1, output_numpy=True): c_image = item2["image"] dataset_shape_2.append(c_image.shape) diff --git a/tests/ut/python/dataset/test_pad_batch.py b/tests/ut/python/dataset/test_pad_batch.py index e63c4a9ab73..c33e97fd2c3 100644 --- a/tests/ut/python/dataset/test_pad_batch.py +++ b/tests/ut/python/dataset/test_pad_batch.py @@ -62,7 +62,7 @@ def test_batch_padding_01(): data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"]) data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([2, 2], -2), "col1d": ([2], -1)}) data1 = data1.repeat(2) - for data in data1.create_dict_iterator(num_epochs=1): + for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True): np.testing.assert_array_equal([[0, -1], [1, -1]], data["col1d"]) np.testing.assert_array_equal([[[100, -2], [200, -2]], [[101, -2], [201, -2]]], data["col2d"]) @@ -71,7 +71,7 @@ def test_batch_padding_02(): data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"]) data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([1, 2], -2)}) data1 = data1.repeat(2) - for data in data1.create_dict_iterator(num_epochs=1): + for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True): np.testing.assert_array_equal([[0], [1]], data["col1d"]) np.testing.assert_array_equal([[[100, -2]], [[101, -2]]], data["col2d"]) @@ -81,7 +81,7 @@ def test_batch_padding_03(): data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col": (None, -1)}) # pad automatically data1 = data1.repeat(2) res = dict() - for ind, data in enumerate(data1.create_dict_iterator(num_epochs=1)): + for ind, data in enumerate(data1.create_dict_iterator(num_epochs=1, output_numpy=True)): res[ind] = data["col"].copy() np.testing.assert_array_equal(res[0], [[0, -1], [0, 1]]) np.testing.assert_array_equal(res[1], [[0, 1, 2, -1], [0, 1, 2, 3]]) @@ -93,7 +93,7 @@ def test_batch_padding_04(): data1 = ds.GeneratorDataset((lambda: gen_var_cols(2)), ["col1", "col2"]) data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={}) # pad automatically data1 = data1.repeat(2) - for data in data1.create_dict_iterator(num_epochs=1): + for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True): np.testing.assert_array_equal(data["col1"], [[0, 0], [0, 1]]) np.testing.assert_array_equal(data["col2"], [[100, 0], [100, 101]]) @@ -102,7 +102,7 @@ def test_batch_padding_05(): data1 = ds.GeneratorDataset((lambda: gen_var_cols_2d(3)), ["col1", "col2"]) data1 = data1.batch(batch_size=3, drop_remainder=False, pad_info={"col2": ([2, None], -2), "col1": (None, -1)}) # pad automatically - for data in data1.create_dict_iterator(num_epochs=1): + for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True): np.testing.assert_array_equal(data["col1"], [[[0, -1, -1]], [[0, 1, -1]], [[0, 1, 2]]]) np.testing.assert_array_equal(data["col2"], [[[100, -2, -2], [-2, -2, -2]], [[100, 101, -2], [-2, -2, -2]], [[100, 101, 102], [-2, -2, -2]]]) @@ -180,7 +180,7 @@ def test_pad_via_map(): data1 = data1.map(operations=(lambda x: np.pad(x, (0, 816))), input_columns="image") data1 = data1.batch(batch_size=25, drop_remainder=True) res = [] - for data in data1.create_dict_iterator(num_epochs=1): + for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(data["image"]) return res @@ -189,7 +189,7 @@ def test_pad_via_map(): data2 = data2.map(operations=(lambda x: x.reshape(-1)), input_columns="image") # reshape to 1d data2 = data2.batch(batch_size=25, drop_remainder=True, pad_info={"image": ([3888], 0)}) res = [] - for data in data2.create_dict_iterator(num_epochs=1): + for data in data2.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(data["image"]) return res diff --git a/tests/ut/python/dataset/test_pad_end_op.py b/tests/ut/python/dataset/test_pad_end_op.py index c25d6b9a95b..3649ed29140 100644 --- a/tests/ut/python/dataset/test_pad_end_op.py +++ b/tests/ut/python/dataset/test_pad_end_op.py @@ -30,7 +30,7 @@ def pad_compare(array, pad_shape, pad_value, res): data = data.map(operations=ops.PadEnd(pad_shape, pad_value)) else: data = data.map(operations=ops.PadEnd(pad_shape)) - for d in data: + for d in data.create_tuple_iterator(output_numpy=True): np.testing.assert_array_equal(res, d[0]) diff --git a/tests/ut/python/dataset/test_paddeddataset.py b/tests/ut/python/dataset/test_paddeddataset.py index 4f3e7da187a..cd7ef07ae7f 100644 --- a/tests/ut/python/dataset/test_paddeddataset.py +++ b/tests/ut/python/dataset/test_paddeddataset.py @@ -57,7 +57,7 @@ def test_TFRecord_Padded(): testsampler = ds.DistributedSampler(num_shards=shard_num, shard_id=i, shuffle=False, num_samples=None) concat_ds.use_sampler(testsampler) shard_list = [] - for item in concat_ds.create_dict_iterator(num_epochs=1): + for item in concat_ds.create_dict_iterator(num_epochs=1, output_numpy=True): shard_list.append(len(item['image'])) verify_list.append(shard_list) assert verify_list == result_list @@ -80,7 +80,7 @@ def test_GeneratorDataSet_Padded(): distributed_sampler = ds.DistributedSampler(num_shards=shard_num, shard_id=i, shuffle=False, num_samples=None) data3.use_sampler(distributed_sampler) tem_list = [] - for ele in data3.create_dict_iterator(num_epochs=1): + for ele in data3.create_dict_iterator(num_epochs=1, output_numpy=True): tem_list.append(ele['col1'][0]) verify_list.append(tem_list) @@ -105,7 +105,7 @@ def test_Reapeat_afterPadded(): ds3.use_sampler(testsampler) repeat_num = 2 ds3 = ds3.repeat(repeat_num) - for item in ds3.create_dict_iterator(num_epochs=1): + for item in ds3.create_dict_iterator(num_epochs=1, output_numpy=True): verify_list.append(len(item['image'])) assert verify_list == result_list * repeat_num @@ -149,7 +149,7 @@ def test_Unevenly_distributed(): tem_list = [] testsampler = ds.DistributedSampler(num_shards=numShard, shard_id=i, shuffle=False, num_samples=None) ds3.use_sampler(testsampler) - for item in ds3.create_dict_iterator(num_epochs=1): + for item in ds3.create_dict_iterator(num_epochs=1, output_numpy=True): tem_list.append(len(item['image'])) verify_list.append(tem_list) assert verify_list == result_list @@ -174,7 +174,7 @@ def test_three_datasets_connected(): distributed_sampler = ds.DistributedSampler(num_shards=shard_num, shard_id=i, shuffle=False, num_samples=None) data4.use_sampler(distributed_sampler) tem_list = [] - for ele in data4.create_dict_iterator(num_epochs=1): + for ele in data4.create_dict_iterator(num_epochs=1, output_numpy=True): tem_list.append(ele['col1'][0]) verify_list.append(tem_list) @@ -232,7 +232,7 @@ def test_imagefolder_padded(): assert sum([1 for _ in data3]) == 10 verify_list = [] - for ele in data3.create_dict_iterator(num_epochs=1): + for ele in data3.create_dict_iterator(num_epochs=1, output_numpy=True): verify_list.append(len(ele['image'])) assert verify_list[8] == 1 assert verify_list[9] == 6 @@ -259,7 +259,7 @@ def test_imagefolder_padded_with_decode(): data3.use_sampler(testsampler) data3 = data3.map(operations=V_C.Decode(), input_columns="image") shard_sample_count = 0 - for ele in data3.create_dict_iterator(num_epochs=1): + for ele in data3.create_dict_iterator(num_epochs=1, output_numpy=True): print("label: {}".format(ele['label'])) count += 1 shard_sample_count += 1 @@ -289,7 +289,7 @@ def test_imagefolder_padded_with_decode_and_get_dataset_size(): shard_dataset_size = data3.get_dataset_size() data3 = data3.map(operations=V_C.Decode(), input_columns="image") shard_sample_count = 0 - for ele in data3.create_dict_iterator(num_epochs=1): + for ele in data3.create_dict_iterator(num_epochs=1, output_numpy=True): print("label: {}".format(ele['label'])) count += 1 shard_sample_count += 1 @@ -313,7 +313,7 @@ def test_more_shard_padded(): tem_list = [] testsampler = ds.DistributedSampler(num_shards=numShard, shard_id=i, shuffle=False, num_samples=None) data3.use_sampler(testsampler) - for item in data3.create_dict_iterator(num_epochs=1): + for item in data3.create_dict_iterator(num_epochs=1, output_numpy=True): tem_list.append(item['col1']) vertifyList.append(tem_list) @@ -339,7 +339,7 @@ def test_more_shard_padded(): tem_list = [] testsampler = ds.DistributedSampler(num_shards=numShard, shard_id=i, shuffle=False, num_samples=None) ds3.use_sampler(testsampler) - for item in ds3.create_dict_iterator(num_epochs=1): + for item in ds3.create_dict_iterator(num_epochs=1, output_numpy=True): tem_list.append(len(item['image'])) vertifyList1.append(tem_list) @@ -426,7 +426,7 @@ def test_Mindrecord_Padded(remove_mindrecord_file): testsampler = ds.DistributedSampler(num_shards=shard_num, shard_id=i, shuffle=False, num_samples=None) ds2.use_sampler(testsampler) tem_list = [] - for ele in ds2.create_dict_iterator(num_epochs=1): + for ele in ds2.create_dict_iterator(num_epochs=1, output_numpy=True): tem_list.append(int(ele['file_name'].tostring().decode().lstrip('image_').rstrip('.jpg'))) result_list.append(tem_list) assert result_list == verify_list @@ -440,7 +440,7 @@ def test_clue_padded_and_skip_with_0_samples(): data = ds.CLUEDataset(TRAIN_FILE, task='AFQMC', usage='train') count = 0 - for _ in data.create_dict_iterator(num_epochs=1): + for _ in data.create_dict_iterator(num_epochs=1, output_numpy=True): count += 1 assert count == 3 @@ -456,20 +456,20 @@ def test_clue_padded_and_skip_with_0_samples(): dataset.use_sampler(testsampler) assert dataset.get_dataset_size() == 2 count = 0 - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): count += 1 assert count == 2 dataset = dataset.skip(count=2) # dataset2 has none samples count = 0 - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): count += 1 assert count == 0 with pytest.raises(ValueError, match="There is no samples in the "): dataset = dataset.concat(data_copy1) count = 0 - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): count += 1 assert count == 2 diff --git a/tests/ut/python/dataset/test_pair_truncate.py b/tests/ut/python/dataset/test_pair_truncate.py index c0733b997b4..da35ee1dd7e 100644 --- a/tests/ut/python/dataset/test_pair_truncate.py +++ b/tests/ut/python/dataset/test_pair_truncate.py @@ -24,7 +24,8 @@ import mindspore.dataset.text as text def compare(in1, in2, length, out1, out2): data = ds.NumpySlicesDataset({"s1": [in1], "s2": [in2]}) data = data.map(operations=text.TruncateSequencePair(length), input_columns=["s1", "s2"]) - for d in data.create_dict_iterator(num_epochs=1): + data = data.map(input_columns=["s1", "s2"], operations=text.TruncateSequencePair(length)) + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): np.testing.assert_array_equal(out1, d["s1"]) np.testing.assert_array_equal(out2, d["s2"]) diff --git a/tests/ut/python/dataset/test_pyfunc.py b/tests/ut/python/dataset/test_pyfunc.py index d7eb447a4c3..5fa177223b1 100644 --- a/tests/ut/python/dataset/test_pyfunc.py +++ b/tests/ut/python/dataset/test_pyfunc.py @@ -36,7 +36,7 @@ def test_case_0(): data1 = data1.map(operations=(lambda x: x + x), input_columns="col0", output_columns="out") i = 0 - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary # In this test, the dataset is 2x2 sequential tensors golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]]) np.testing.assert_array_equal(item["out"], golden) @@ -57,7 +57,7 @@ def test_case_1(): column_order=["out0", "out1"]) i = 0 - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary # In this test, the dataset is 2x2 sequential tensors golden = np.array([[i, i + 1], [i + 2, i + 3]]) np.testing.assert_array_equal(item["out0"], golden) @@ -81,7 +81,7 @@ def test_case_2(): column_order=["out"]) i = 0 - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary # In this test, the dataset is 2x2 sequential tensors golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]]) np.testing.assert_array_equal(item["out"], golden) @@ -103,7 +103,7 @@ def test_case_3(): output_columns=["out0", "out1", "out2"], column_order=["out0", "out1", "out2"]) i = 0 - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary # In this test, the dataset is 2x2 sequential tensors golden = np.array([[i, i + 1], [i + 2, i + 3]]) np.testing.assert_array_equal(item["out0"], golden) @@ -130,7 +130,7 @@ def test_case_4(): column_order=["out0", "out1", "out2"]) i = 0 - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary # In this test, the dataset is 2x2 sequential tensors golden = np.array([[i, i + 1], [i + 2, i + 3]]) np.testing.assert_array_equal(item["out0"], golden) @@ -157,7 +157,7 @@ def test_case_5(): data1 = data1.map(operations=func_5, input_columns="col0", output_columns="out") - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary # In this test, the dataset is 2x2 sequential tensors golden = np.array([[1, 1], [1, 1]]) np.testing.assert_array_equal(item["out"], golden) @@ -175,7 +175,7 @@ def test_case_6(): data1 = data1.map(operations=[(lambda x: x + x), (lambda x: x + x)], input_columns="col0", output_columns="out") i = 0 - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary # In this test, the dataset is 2x2 sequential tensors golden = np.array([[i * 4, (i + 1) * 4], [(i + 2) * 4, (i + 3) * 4]]) np.testing.assert_array_equal(item["out"], golden) @@ -195,7 +195,7 @@ def test_case_7(): num_parallel_workers=4, python_multiprocessing=True) i = 0 - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary # In this test, the dataset is 2x2 sequential tensors golden = np.array([[i * 2, (i + 1) * 2], [(i + 2) * 2, (i + 3) * 2]]) np.testing.assert_array_equal(item["out"], golden) @@ -219,7 +219,7 @@ def test_case_8(): python_multiprocessing=True) i = 0 - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary # In this test, the dataset is 2x2 sequential tensors golden = np.array([[i, i + 1], [i + 2, i + 3]]) np.testing.assert_array_equal(item["out0"], golden) @@ -243,7 +243,7 @@ def test_case_9(): output_columns="out", num_parallel_workers=4, python_multiprocessing=True) i = 0 - for item in data1.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary # In this test, the dataset is 2x2 sequential tensors golden = np.array([[i * 2 + 3, (i + 1) * 2 + 3], [(i + 2) * 2 + 3, (i + 3) * 2 + 3]]) np.testing.assert_array_equal(item["out"], golden) diff --git a/tests/ut/python/dataset/test_python_tokenizer.py b/tests/ut/python/dataset/test_python_tokenizer.py index 77b13c837bc..50001793ecd 100644 --- a/tests/ut/python/dataset/test_python_tokenizer.py +++ b/tests/ut/python/dataset/test_python_tokenizer.py @@ -41,7 +41,7 @@ def test_whitespace_tokenizer_ch(): tokenizer = text.PythonTokenizer(my_tokenizer) dataset = dataset.map(operations=tokenizer, num_parallel_workers=1) tokens = [] - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): s = text.to_str(i['text']).tolist() tokens.append(s) logger.info("The out tokens is : {}".format(tokens)) diff --git a/tests/ut/python/dataset/test_random_affine.py b/tests/ut/python/dataset/test_random_affine.py index 407bec759db..4c5f986cc1a 100644 --- a/tests/ut/python/dataset/test_random_affine.py +++ b/tests/ut/python/dataset/test_random_affine.py @@ -59,7 +59,8 @@ def test_random_affine_op(plot=False): image_affine = [] image_original = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_affine.append(image1) @@ -92,7 +93,8 @@ def test_random_affine_op_c(plot=False): image_affine = [] image_original = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image1 = item1["image"] image2 = item2["image"] image_affine.append(image1) diff --git a/tests/ut/python/dataset/test_random_apply.py b/tests/ut/python/dataset/test_random_apply.py index 44304be85d2..c84ccaefbbc 100644 --- a/tests/ut/python/dataset/test_random_apply.py +++ b/tests/ut/python/dataset/test_random_apply.py @@ -58,7 +58,8 @@ def test_random_apply_op(plot=False): image_apply = [] image_original = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_apply.append(image1) diff --git a/tests/ut/python/dataset/test_random_choice.py b/tests/ut/python/dataset/test_random_choice.py index 729f1bb92d8..0a829fa0db6 100644 --- a/tests/ut/python/dataset/test_random_choice.py +++ b/tests/ut/python/dataset/test_random_choice.py @@ -55,7 +55,8 @@ def test_random_choice_op(plot=False): image_choice = [] image_original = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_choice.append(image1) @@ -94,7 +95,8 @@ def test_random_choice_comp(plot=False): image_choice = [] image_original = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_choice.append(image1) diff --git a/tests/ut/python/dataset/test_random_color.py b/tests/ut/python/dataset/test_random_color.py index c3701742a07..4ea0de372cb 100644 --- a/tests/ut/python/dataset/test_random_color.py +++ b/tests/ut/python/dataset/test_random_color.py @@ -56,10 +56,10 @@ def test_random_color_py(degrees=(0.1, 1.9), plot=False): for idx, (image, _) in enumerate(ds_original): if idx == 0: - images_original = np.transpose(image, (0, 2, 3, 1)) + images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_original = np.append(images_original, - np.transpose(image, (0, 2, 3, 1)), + np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) # Random Color Adjusted Images @@ -76,10 +76,10 @@ def test_random_color_py(degrees=(0.1, 1.9), plot=False): for idx, (image, _) in enumerate(ds_random_color): if idx == 0: - images_random_color = np.transpose(image, (0, 2, 3, 1)) + images_random_color = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_random_color = np.append(images_random_color, - np.transpose(image, (0, 2, 3, 1)), + np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) num_samples = images_original.shape[0] @@ -117,7 +117,8 @@ def test_random_color_c(degrees=(0.1, 1.9), plot=False, run_golden=True): image_random_color_op = [] image = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): actual = item1["image"] expected = item2["image"] image.append(actual) @@ -193,7 +194,8 @@ def test_compare_random_color_op(degrees=None, plot=False): image_random_color_op = [] image = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): actual = item1["image"] expected = item2["image"] image_random_color_op.append(actual) diff --git a/tests/ut/python/dataset/test_random_color_adjust.py b/tests/ut/python/dataset/test_random_color_adjust.py index c8c9f76f6ba..638b74dd5de 100644 --- a/tests/ut/python/dataset/test_random_color_adjust.py +++ b/tests/ut/python/dataset/test_random_color_adjust.py @@ -93,7 +93,8 @@ def util_test_random_color_adjust_op(brightness=(1, 1), contrast=(1, 1), saturat data2 = data2.map(operations=transform, input_columns=["image"]) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): num_iter += 1 c_image = item1["image"] py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) diff --git a/tests/ut/python/dataset/test_random_crop.py b/tests/ut/python/dataset/test_random_crop.py index f1bddace300..acb1811c18b 100644 --- a/tests/ut/python/dataset/test_random_crop.py +++ b/tests/ut/python/dataset/test_random_crop.py @@ -53,7 +53,8 @@ def test_random_crop_op_c(plot=False): image_cropped = [] image = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image1 = item1["image"] image2 = item2["image"] image_cropped.append(image1) @@ -87,7 +88,8 @@ def test_random_crop_op_py(plot=False): crop_images = [] original_images = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): crop = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) original = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) crop_images.append(crop) @@ -530,7 +532,8 @@ def test_random_crop_comp(plot=False): image_c_cropped = [] image_py_cropped = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): c_image = item1["image"] py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_c_cropped.append(c_image) diff --git a/tests/ut/python/dataset/test_random_crop_and_resize.py b/tests/ut/python/dataset/test_random_crop_and_resize.py index d501af02b3c..023ed81f549 100644 --- a/tests/ut/python/dataset/test_random_crop_and_resize.py +++ b/tests/ut/python/dataset/test_random_crop_and_resize.py @@ -53,7 +53,8 @@ def test_random_crop_and_resize_op_c(plot=False): num_iter = 0 crop_and_resize_images = [] original_images = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): crop_and_resize = item1["image"] original = item2["image"] # Note: resize the original image with the same size as the one applied RandomResizedCrop() @@ -95,7 +96,8 @@ def test_random_crop_and_resize_op_py(plot=False): num_iter = 0 crop_and_resize_images = [] original_images = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): crop_and_resize = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) original = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) original = cv2.resize(original, (512, 256)) @@ -327,7 +329,8 @@ def test_random_crop_and_resize_comp(plot=False): image_c_cropped = [] image_py_cropped = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): c_image = item1["image"] py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_c_cropped.append(c_image) diff --git a/tests/ut/python/dataset/test_random_crop_and_resize_with_bbox.py b/tests/ut/python/dataset/test_random_crop_and_resize_with_bbox.py index bb93a924f75..2727325e007 100644 --- a/tests/ut/python/dataset/test_random_crop_and_resize_with_bbox.py +++ b/tests/ut/python/dataset/test_random_crop_and_resize_with_bbox.py @@ -57,7 +57,8 @@ def test_random_resized_crop_with_bbox_op_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), + dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -90,7 +91,8 @@ def test_random_resized_crop_with_bbox_op_coco_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataCoco1.create_dict_iterator(num_epochs=1), dataCoco2.create_dict_iterator(num_epochs=1)): + for unAug, Aug in zip(dataCoco1.create_dict_iterator(num_epochs=1, output_numpy=True), + dataCoco2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -127,7 +129,8 @@ def test_random_resized_crop_with_bbox_op_edge_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), + dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) diff --git a/tests/ut/python/dataset/test_random_crop_decode_resize.py b/tests/ut/python/dataset/test_random_crop_decode_resize.py index ffda69630ac..0ab1409a1b9 100644 --- a/tests/ut/python/dataset/test_random_crop_decode_resize.py +++ b/tests/ut/python/dataset/test_random_crop_decode_resize.py @@ -46,7 +46,8 @@ def test_random_crop_decode_resize_op(plot=False): data2 = data2.map(operations=random_crop_resize_op, input_columns=["image"]) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): if num_iter > 0: break image1 = item1["image"] diff --git a/tests/ut/python/dataset/test_random_crop_with_bbox.py b/tests/ut/python/dataset/test_random_crop_with_bbox.py index 3cdec02f167..f887d146fc0 100644 --- a/tests/ut/python/dataset/test_random_crop_with_bbox.py +++ b/tests/ut/python/dataset/test_random_crop_with_bbox.py @@ -52,7 +52,8 @@ def test_random_crop_with_bbox_op_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), + dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -81,7 +82,8 @@ def test_random_crop_with_bbox_op_coco_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataCoco1.create_dict_iterator(num_epochs=1), dataCoco2.create_dict_iterator(num_epochs=1)): + for unAug, Aug in zip(dataCoco1.create_dict_iterator(num_epochs=1, output_numpy=True), + dataCoco2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -115,7 +117,8 @@ def test_random_crop_with_bbox_op2_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), + dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -148,7 +151,8 @@ def test_random_crop_with_bbox_op3_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), + dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -186,7 +190,8 @@ def test_random_crop_with_bbox_op_edge_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), + dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) diff --git a/tests/ut/python/dataset/test_random_erasing.py b/tests/ut/python/dataset/test_random_erasing.py index 3ea212e29a4..fa77ff0d3f9 100644 --- a/tests/ut/python/dataset/test_random_erasing.py +++ b/tests/ut/python/dataset/test_random_erasing.py @@ -57,7 +57,8 @@ def test_random_erasing_op(plot=False): data2 = data2.map(operations=transform_2, input_columns=["image"]) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): num_iter += 1 image_1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) diff --git a/tests/ut/python/dataset/test_random_grayscale.py b/tests/ut/python/dataset/test_random_grayscale.py index 30cf9bca411..6080869b476 100644 --- a/tests/ut/python/dataset/test_random_grayscale.py +++ b/tests/ut/python/dataset/test_random_grayscale.py @@ -58,7 +58,8 @@ def test_random_grayscale_valid_prob(plot=False): image_gray = [] image = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_gray.append(image1) @@ -98,7 +99,8 @@ def test_random_grayscale_input_grayscale_images(): image_gray = [] image = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_gray.append(image1) diff --git a/tests/ut/python/dataset/test_random_horizontal_flip.py b/tests/ut/python/dataset/test_random_horizontal_flip.py index 4e7163fb25d..1fe894601b0 100644 --- a/tests/ut/python/dataset/test_random_horizontal_flip.py +++ b/tests/ut/python/dataset/test_random_horizontal_flip.py @@ -59,7 +59,8 @@ def test_random_horizontal_op(plot=False): data2 = data2.map(operations=decode_op, input_columns=["image"]) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): # with the seed value, we can only guarantee the first number generated if num_iter > 0: @@ -194,7 +195,8 @@ def test_random_horizontal_comp(plot=False): images_list_c = [] images_list_py = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image_c = item1["image"] image_py = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) images_list_c.append(image_c) diff --git a/tests/ut/python/dataset/test_random_horizontal_flip_with_bbox.py b/tests/ut/python/dataset/test_random_horizontal_flip_with_bbox.py index 22c1eed661e..e04cb33a17d 100644 --- a/tests/ut/python/dataset/test_random_horizontal_flip_with_bbox.py +++ b/tests/ut/python/dataset/test_random_horizontal_flip_with_bbox.py @@ -49,7 +49,8 @@ def test_random_horizontal_flip_with_bbox_op_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), + dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -78,7 +79,8 @@ def test_random_horizontal_flip_with_bbox_op_coco_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataCoco1.create_dict_iterator(num_epochs=1), dataCoco2.create_dict_iterator(num_epochs=1)): + for unAug, Aug in zip(dataCoco1.create_dict_iterator(num_epochs=1, output_numpy=True), + dataCoco2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -114,7 +116,8 @@ def test_random_horizontal_flip_with_bbox_valid_rand_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), + dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -156,7 +159,8 @@ def test_random_horizontal_flip_with_bbox_valid_edge_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), + dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) diff --git a/tests/ut/python/dataset/test_random_order.py b/tests/ut/python/dataset/test_random_order.py index 74bd5958cc6..5a3571e327d 100644 --- a/tests/ut/python/dataset/test_random_order.py +++ b/tests/ut/python/dataset/test_random_order.py @@ -58,7 +58,8 @@ def test_random_order_op(plot=False): image_order = [] image_original = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_order.append(image1) diff --git a/tests/ut/python/dataset/test_random_perspective.py b/tests/ut/python/dataset/test_random_perspective.py index 94e00783a86..116963d3a0f 100644 --- a/tests/ut/python/dataset/test_random_perspective.py +++ b/tests/ut/python/dataset/test_random_perspective.py @@ -59,7 +59,8 @@ def test_random_perspective_op(plot=False): image_perspective = [] image_original = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image2 = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_perspective.append(image1) diff --git a/tests/ut/python/dataset/test_random_posterize.py b/tests/ut/python/dataset/test_random_posterize.py index de910439c0b..f919b450f32 100644 --- a/tests/ut/python/dataset/test_random_posterize.py +++ b/tests/ut/python/dataset/test_random_posterize.py @@ -53,7 +53,8 @@ def test_random_posterize_op_c(plot=False, run_golden=False): image_posterize = [] image_original = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image1 = item1["image"] image2 = item2["image"] image_posterize.append(image1) @@ -99,7 +100,8 @@ def test_random_posterize_op_fixed_point_c(plot=False, run_golden=True): image_posterize = [] image_original = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image1 = item1["image"] image2 = item2["image"] image_posterize.append(image1) @@ -136,7 +138,8 @@ def test_random_posterize_default_c_md5(plot=False, run_golden=True): image_posterize = [] image_original = [] - for item1, item2 in zip(data1.create_dict_iterator(), data2.create_dict_iterator()): + for item1, item2 in zip(data1.create_dict_iterator(output_numpy=True), + data2.create_dict_iterator(output_numpy=True)): image1 = item1["image"] image2 = item2["image"] image_posterize.append(image1) diff --git a/tests/ut/python/dataset/test_random_resize.py b/tests/ut/python/dataset/test_random_resize.py index 2db2edc1678..25f34f67cf7 100644 --- a/tests/ut/python/dataset/test_random_resize.py +++ b/tests/ut/python/dataset/test_random_resize.py @@ -44,7 +44,8 @@ def test_random_resize_op(plot=False): image_original = [] image_resized = [] num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image_1 = item1["image"] image_2 = item2["image"] image_original.append(image_1) diff --git a/tests/ut/python/dataset/test_random_resize_with_bbox.py b/tests/ut/python/dataset/test_random_resize_with_bbox.py index f36b6e45ce4..dff65154700 100644 --- a/tests/ut/python/dataset/test_random_resize_with_bbox.py +++ b/tests/ut/python/dataset/test_random_resize_with_bbox.py @@ -55,7 +55,8 @@ def test_random_resize_with_bbox_op_voc_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), + dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -97,7 +98,8 @@ def test_random_resize_with_bbox_op_rand_coco_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataCoco1.create_dict_iterator(num_epochs=1), dataCoco2.create_dict_iterator(num_epochs=1)): + for unAug, Aug in zip(dataCoco1.create_dict_iterator(num_epochs=1, output_numpy=True), + dataCoco2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -137,7 +139,8 @@ def test_random_resize_with_bbox_op_edge_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), + dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) diff --git a/tests/ut/python/dataset/test_random_rotation.py b/tests/ut/python/dataset/test_random_rotation.py index e8aec50ed34..ed82e565276 100644 --- a/tests/ut/python/dataset/test_random_rotation.py +++ b/tests/ut/python/dataset/test_random_rotation.py @@ -52,7 +52,8 @@ def test_random_rotation_op_c(plot=False): data2 = data2.map(operations=decode_op, input_columns=["image"]) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): if num_iter > 0: break rotation_de = item1["image"] @@ -88,7 +89,8 @@ def test_random_rotation_op_py(plot=False): data2 = data2.map(operations=transform2, input_columns=["image"]) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): if num_iter > 0: break rotation_de = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) @@ -194,7 +196,8 @@ def test_rotation_diff(plot=False): num_iter = 0 image_list_c, image_list_py = [], [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): num_iter += 1 c_image = item1["image"] py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) diff --git a/tests/ut/python/dataset/test_random_select_subpolicy.py b/tests/ut/python/dataset/test_random_select_subpolicy.py index f08bbacac5a..919f3cc81c5 100644 --- a/tests/ut/python/dataset/test_random_select_subpolicy.py +++ b/tests/ut/python/dataset/test_random_select_subpolicy.py @@ -26,7 +26,7 @@ def test_random_select_subpolicy(): data = ds.NumpySlicesDataset(arr, column_names="col", shuffle=False) data = data.map(operations=visions.RandomSelectSubpolicy(policy), input_columns=["col"]) res = [] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(i["col"].tolist()) return res except (TypeError, ValueError) as e: diff --git a/tests/ut/python/dataset/test_random_sharpness.py b/tests/ut/python/dataset/test_random_sharpness.py index 5191d8538ee..0acf1a2e0fe 100644 --- a/tests/ut/python/dataset/test_random_sharpness.py +++ b/tests/ut/python/dataset/test_random_sharpness.py @@ -48,7 +48,7 @@ def test_random_sharpness_py(degrees=(0.7, 0.7), plot=False): ds_original = ds_original.batch(512) - for idx, (image, _) in enumerate(ds_original): + for idx, (image, _) in enumerate(ds_original.create_tuple_iterator(output_numpy=True)): if idx == 0: images_original = np.transpose(image, (0, 2, 3, 1)) else: @@ -72,7 +72,7 @@ def test_random_sharpness_py(degrees=(0.7, 0.7), plot=False): ds_random_sharpness = ds_random_sharpness.batch(512) - for idx, (image, _) in enumerate(ds_random_sharpness): + for idx, (image, _) in enumerate(ds_random_sharpness.create_tuple_iterator(output_numpy=True)): if idx == 0: images_random_sharpness = np.transpose(image, (0, 2, 3, 1)) else: @@ -137,7 +137,7 @@ def test_random_sharpness_c(degrees=(1.6, 1.6), plot=False): ds_original = ds_original.batch(512) - for idx, (image, _) in enumerate(ds_original): + for idx, (image, _) in enumerate(ds_original.create_tuple_iterator(output_numpy=True)): if idx == 0: images_original = image else: @@ -160,7 +160,7 @@ def test_random_sharpness_c(degrees=(1.6, 1.6), plot=False): ds_random_sharpness = ds_random_sharpness.batch(512) - for idx, (image, _) in enumerate(ds_random_sharpness): + for idx, (image, _) in enumerate(ds_random_sharpness.create_tuple_iterator(output_numpy=True)): if idx == 0: images_random_sharpness = image else: @@ -227,7 +227,7 @@ def test_random_sharpness_c_py(degrees=(1.0, 1.0), plot=False): ds_random_sharpness_py = ds_random_sharpness_py.batch(512) - for idx, (image, _) in enumerate(ds_random_sharpness_py): + for idx, (image, _) in enumerate(ds_random_sharpness_py.create_tuple_iterator(output_numpy=True)): if idx == 0: images_random_sharpness_py = image @@ -243,7 +243,7 @@ def test_random_sharpness_c_py(degrees=(1.0, 1.0), plot=False): ds_images_random_sharpness_c = ds_images_random_sharpness_c.batch(512) - for idx, (image, _) in enumerate(ds_images_random_sharpness_c): + for idx, (image, _) in enumerate(ds_images_random_sharpness_c.create_tuple_iterator(output_numpy=True)): if idx == 0: images_random_sharpness_c = image @@ -282,9 +282,9 @@ def test_random_sharpness_one_channel_c(degrees=(1.4, 1.4), plot=False): for _, (data_orig, data_trans) in enumerate(zip(data, ds_random_sharpness_c)): image_orig, label_orig = data_orig image_trans, _ = data_trans - images.append(image_orig) - labels.append(label_orig) - images_trans.append(image_trans) + images.append(image_orig.asnumpy()) + labels.append(label_orig.asnumpy()) + images_trans.append(image_trans.asnumpy()) if plot: visualize_one_channel_dataset(images, images_trans, labels) diff --git a/tests/ut/python/dataset/test_random_solarize_op.py b/tests/ut/python/dataset/test_random_solarize_op.py index 18ea4a13c73..10adcc8108f 100644 --- a/tests/ut/python/dataset/test_random_solarize_op.py +++ b/tests/ut/python/dataset/test_random_solarize_op.py @@ -62,7 +62,8 @@ def test_random_solarize_op(threshold=(10, 150), plot=False, run_golden=True): image_solarized = [] image = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image_solarized.append(item1["image"].copy()) image.append(item2["image"].copy()) if plot: @@ -88,9 +89,9 @@ def test_random_solarize_mnist(plot=False, run_golden=True): for _, (data_orig, data_trans) in enumerate(zip(mnist_1, mnist_2)): image_orig, label_orig = data_orig image_trans, _ = data_trans - images.append(image_orig) - labels.append(label_orig) - images_trans.append(image_trans) + images.append(image_orig.asnumpy()) + labels.append(label_orig.asnumpy()) + images_trans.append(image_trans.asnumpy()) if plot: visualize_one_channel_dataset(images, images_trans, labels) diff --git a/tests/ut/python/dataset/test_random_vertical_flip.py b/tests/ut/python/dataset/test_random_vertical_flip.py index 27db8888a31..d52c13d1a33 100644 --- a/tests/ut/python/dataset/test_random_vertical_flip.py +++ b/tests/ut/python/dataset/test_random_vertical_flip.py @@ -59,7 +59,8 @@ def test_random_vertical_op(plot=False): data2 = data2.map(operations=decode_op, input_columns=["image"]) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): # with the seed value, we can only guarantee the first number generated if num_iter > 0: @@ -194,7 +195,8 @@ def test_random_vertical_comp(plot=False): images_list_c = [] images_list_py = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image_c = item1["image"] image_py = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) images_list_c.append(image_c) diff --git a/tests/ut/python/dataset/test_random_vertical_flip_with_bbox.py b/tests/ut/python/dataset/test_random_vertical_flip_with_bbox.py index f226a8dabe7..5f4f97e7645 100644 --- a/tests/ut/python/dataset/test_random_vertical_flip_with_bbox.py +++ b/tests/ut/python/dataset/test_random_vertical_flip_with_bbox.py @@ -50,7 +50,8 @@ def test_random_vertical_flip_with_bbox_op_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), + dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -81,7 +82,8 @@ def test_random_vertical_flip_with_bbox_op_coco_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataCoco1.create_dict_iterator(num_epochs=1), dataCoco2.create_dict_iterator(num_epochs=1)): + for unAug, Aug in zip(dataCoco1.create_dict_iterator(num_epochs=1, output_numpy=True), + dataCoco2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -115,7 +117,8 @@ def test_random_vertical_flip_with_bbox_op_rand_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), + dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -155,7 +158,8 @@ def test_random_vertical_flip_with_bbox_op_edge_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), + dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) diff --git a/tests/ut/python/dataset/test_rename.py b/tests/ut/python/dataset/test_rename.py index 0e267daf3fa..1c60037c176 100644 --- a/tests/ut/python/dataset/test_rename.py +++ b/tests/ut/python/dataset/test_rename.py @@ -34,7 +34,7 @@ def test_rename(): num_iter = 0 - for _, item in enumerate(data.create_dict_iterator(num_epochs=1)): + for _, item in enumerate(data.create_dict_iterator(num_epochs=1, output_numpy=True)): logger.info("item[mask] is {}".format(item["masks"])) np.testing.assert_equal(item["masks"], item["input_ids"]) logger.info("item[seg_ids] is {}".format(item["seg_ids"])) diff --git a/tests/ut/python/dataset/test_repeat.py b/tests/ut/python/dataset/test_repeat.py index ee21fb831c6..196a9644fb6 100644 --- a/tests/ut/python/dataset/test_repeat.py +++ b/tests/ut/python/dataset/test_repeat.py @@ -117,7 +117,7 @@ def test_nested_repeat1(): data = data.repeat(2) data = data.repeat(3) - for i, d in enumerate(data): + for i, d in enumerate(data.create_tuple_iterator(output_numpy=True)): assert i % 3 == d[0][0] assert sum([1 for _ in data]) == 2 * 3 * 3 @@ -129,7 +129,7 @@ def test_nested_repeat2(): data = data.repeat(1) data = data.repeat(1) - for i, d in enumerate(data): + for i, d in enumerate(data.create_tuple_iterator(output_numpy=True)): assert i % 3 == d[0][0] assert sum([1 for _ in data]) == 3 @@ -141,7 +141,7 @@ def test_nested_repeat3(): data = data.repeat(1) data = data.repeat(2) - for i, d in enumerate(data): + for i, d in enumerate(data.create_tuple_iterator(output_numpy=True)): assert i % 3 == d[0][0] assert sum([1 for _ in data]) == 2 * 3 @@ -153,7 +153,7 @@ def test_nested_repeat4(): data = data.repeat(2) data = data.repeat(1) - for i, d in enumerate(data): + for i, d in enumerate(data.create_tuple_iterator(output_numpy=True)): assert i % 3 == d[0][0] assert sum([1 for _ in data]) == 2 * 3 @@ -167,7 +167,7 @@ def test_nested_repeat5(): data = data.repeat(3) for _, d in enumerate(data): - np.testing.assert_array_equal(d[0], np.asarray([[0], [1], [2]])) + np.testing.assert_array_equal(d[0].asnumpy(), np.asarray([[0], [1], [2]])) assert sum([1 for _ in data]) == 6 @@ -180,7 +180,7 @@ def test_nested_repeat6(): data = data.repeat(3) for _, d in enumerate(data): - np.testing.assert_array_equal(d[0], np.asarray([[0], [1], [2]])) + np.testing.assert_array_equal(d[0].asnumpy(), np.asarray([[0], [1], [2]])) assert sum([1 for _ in data]) == 6 @@ -193,7 +193,7 @@ def test_nested_repeat7(): data = data.batch(3) for _, d in enumerate(data): - np.testing.assert_array_equal(d[0], np.asarray([[0], [1], [2]])) + np.testing.assert_array_equal(d[0].asnumpy(), np.asarray([[0], [1], [2]])) assert sum([1 for _ in data]) == 6 @@ -207,9 +207,9 @@ def test_nested_repeat8(): for i, d in enumerate(data): if i % 2 == 0: - np.testing.assert_array_equal(d[0], np.asarray([[0], [1]])) + np.testing.assert_array_equal(d[0].asnumpy(), np.asarray([[0], [1]])) else: - np.testing.assert_array_equal(d[0], np.asarray([[2]])) + np.testing.assert_array_equal(d[0].asnumpy(), np.asarray([[2]])) assert sum([1 for _ in data]) == 6 * 2 @@ -221,7 +221,7 @@ def test_nested_repeat9(): data = data.repeat(3) for i, d in enumerate(data): - assert i % 3 == d[0][0] + assert i % 3 == d[0].asnumpy()[0] if i == 10: break @@ -233,7 +233,7 @@ def test_nested_repeat10(): data = data.repeat() for i, d in enumerate(data): - assert i % 3 == d[0][0] + assert i % 3 == d[0].asnumpy()[0] if i == 10: break @@ -247,7 +247,7 @@ def test_nested_repeat11(): data = data.repeat(5) for i, d in enumerate(data): - assert i % 3 == d[0][0] + assert i % 3 == d[0].asnumpy()[0] assert sum([1 for _ in data]) == 2 * 3 * 4 * 5 * 3 diff --git a/tests/ut/python/dataset/test_rescale_op.py b/tests/ut/python/dataset/test_rescale_op.py index 64633ad214d..b595b7886c8 100644 --- a/tests/ut/python/dataset/test_rescale_op.py +++ b/tests/ut/python/dataset/test_rescale_op.py @@ -44,7 +44,7 @@ def get_rescaled(image_id): data1 = data1.map(operations=decode_op, input_columns=["image"]) num_iter = 0 for item in data1.create_dict_iterator(num_epochs=1): - image = item["image"] + image = item["image"].asnumpy() if num_iter == image_id: return rescale_np(image) num_iter += 1 @@ -69,7 +69,8 @@ def test_rescale_op(plot=False): data2 = data1.map(operations=rescale_op, input_columns=["image"]) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image_original = item1["image"] image_de_rescaled = item2["image"] image_np_rescaled = get_rescaled(num_iter) diff --git a/tests/ut/python/dataset/test_resize.py b/tests/ut/python/dataset/test_resize.py index 03ec60731e9..968336b2c7e 100644 --- a/tests/ut/python/dataset/test_resize.py +++ b/tests/ut/python/dataset/test_resize.py @@ -47,7 +47,8 @@ def test_resize_op(plot=False): data2 = data1.map(operations=resize_op, input_columns=["image"]) image_original = [] image_resized = [] - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image_1 = item1["image"] image_2 = item2["image"] image_original.append(image_1) @@ -79,7 +80,8 @@ def test_resize_md5(plot=False): # Compare with expected md5 from images save_and_check_md5(data1, filename, generate_golden=GENERATE_GOLDEN) - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image_1 = item1["image"] image_2 = item2["image"] image_original.append(image_1) diff --git a/tests/ut/python/dataset/test_resize_with_bbox.py b/tests/ut/python/dataset/test_resize_with_bbox.py index 74d1340e06f..3f7896c4de3 100644 --- a/tests/ut/python/dataset/test_resize_with_bbox.py +++ b/tests/ut/python/dataset/test_resize_with_bbox.py @@ -55,7 +55,8 @@ def test_resize_with_bbox_op_voc_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), + dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -91,7 +92,8 @@ def test_resize_with_bbox_op_coco_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataCOCO1.create_dict_iterator(num_epochs=1), dataCOCO2.create_dict_iterator(num_epochs=1)): + for unAug, Aug in zip(dataCOCO1.create_dict_iterator(num_epochs=1, output_numpy=True), + dataCOCO2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) @@ -127,7 +129,8 @@ def test_resize_with_bbox_op_edge_c(plot_vis=False): unaugSamp, augSamp = [], [] - for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1), dataVoc2.create_dict_iterator(num_epochs=1)): + for unAug, Aug in zip(dataVoc1.create_dict_iterator(num_epochs=1, output_numpy=True), + dataVoc2.create_dict_iterator(num_epochs=1, output_numpy=True)): unaugSamp.append(unAug) augSamp.append(Aug) diff --git a/tests/ut/python/dataset/test_rgb_hsv.py b/tests/ut/python/dataset/test_rgb_hsv.py index 4f5672a6527..162cb05e18c 100644 --- a/tests/ut/python/dataset/test_rgb_hsv.py +++ b/tests/ut/python/dataset/test_rgb_hsv.py @@ -157,8 +157,8 @@ def test_rgb_hsv_pipeline(): num_iter = 0 for data1, data2 in zip(ds1.create_dict_iterator(num_epochs=1), ds2.create_dict_iterator(num_epochs=1)): num_iter += 1 - ori_img = data1["image"] - cvt_img = data2["image"] + ori_img = data1["image"].asnumpy() + cvt_img = data2["image"].asnumpy() assert_allclose(ori_img.flatten(), cvt_img.flatten(), rtol=1e-5, atol=0) assert ori_img.shape == cvt_img.shape diff --git a/tests/ut/python/dataset/test_sampler.py b/tests/ut/python/dataset/test_sampler.py index dae48c2fa8e..2216fdacdba 100644 --- a/tests/ut/python/dataset/test_sampler.py +++ b/tests/ut/python/dataset/test_sampler.py @@ -33,7 +33,7 @@ def test_sequential_sampler(print_res=False): if num_repeats is not None: data1 = data1.repeat(num_repeats) res = [] - for item in data1.create_dict_iterator(num_epochs=1): + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("item[image].shape[0]: {}, item[label].item(): {}" .format(item["image"].shape[0], item["label"].item())) res.append(map_[(item["image"].shape[0], item["label"].item())]) @@ -55,7 +55,7 @@ def test_random_sampler(print_res=False): data1 = ds.ManifestDataset(manifest_file, sampler=sampler) data1 = data1.repeat(num_repeats) res = [] - for item in data1.create_dict_iterator(num_epochs=1): + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(map_[(item["image"].shape[0], item["label"].item())]) if print_res: logger.info("image.shapes and labels: {}".format(res)) @@ -78,7 +78,7 @@ def test_random_sampler_multi_iter(print_res=False): data1 = ds.ManifestDataset(manifest_file, sampler=sampler) while num_repeats > 0: res = [] - for item in data1.create_dict_iterator(num_epochs=1): + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(map_[(item["image"].shape[0], item["label"].item())]) if print_res: logger.info("image.shapes and labels: {}".format(res)) @@ -135,7 +135,7 @@ def test_python_sampler(): if num_repeats is not None: data1 = data1.repeat(num_repeats) res = [] - for item in data1.create_dict_iterator(num_epochs=1): + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("item[image].shape[0]: {}, item[label].item(): {}" .format(item["image"].shape[0], item["label"].item())) res.append(map_[(item["image"].shape[0], item["label"].item())]) @@ -151,7 +151,7 @@ def test_python_sampler(): data1 = ds.GeneratorDataset([(np.array(i),) for i in range(100)], ["data"], sampler=MySampler()) i = 99 for data in data1: - assert data[0] == (np.array(i),) + assert data[0].asnumpy() == (np.array(i),) i = i - 1 assert test_config(2, Sp1(5)) == [0, 1, 2, 3, 4, 0, 1, 2, 3, 4] @@ -174,7 +174,7 @@ def test_subset_sampler(): d = ds.ManifestDataset(manifest_file, sampler=sampler) res = [] - for item in d.create_dict_iterator(num_epochs=1): + for item in d.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(map_[(item["image"].shape[0], item["label"].item())]) return res @@ -202,7 +202,7 @@ def test_sampler_chain(): data1 = ds.ManifestDataset(manifest_file, sampler=sampler) res = [] - for item in data1.create_dict_iterator(num_epochs=1): + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info("item[image].shape[0]: {}, item[label].item(): {}" .format(item["image"].shape[0], item["label"].item())) res.append(map_[(item["image"].shape[0], item["label"].item())]) diff --git a/tests/ut/python/dataset/test_save_op.py b/tests/ut/python/dataset/test_save_op.py index 967b9ab86b6..0679fab56fb 100644 --- a/tests/ut/python/dataset/test_save_op.py +++ b/tests/ut/python/dataset/test_save_op.py @@ -109,7 +109,7 @@ def test_case_00(add_and_remove_cv_file): # only bin data shuffle=False) assert d2.get_dataset_size() == 5 num_iter = 0 - for item in d2.create_dict_iterator(num_epochs=1): + for item in d2.create_dict_iterator(num_epochs=1, output_numpy=True): assert len(item) == 5 for field in item: if isinstance(item[field], np.ndarray): @@ -152,7 +152,7 @@ def test_case_01(add_and_remove_cv_file): # only raw data shuffle=False) assert d2.get_dataset_size() == 6 num_iter = 0 - for item in d2.create_dict_iterator(num_epochs=1): + for item in d2.create_dict_iterator(num_epochs=1, output_numpy=True): logger.info(item) assert len(item) == 2 for field in item: @@ -289,7 +289,7 @@ def test_case_02(add_and_remove_cv_file): # muti-bytes shuffle=False) assert d2.get_dataset_size() == 6 num_iter = 0 - for item in d2.create_dict_iterator(num_epochs=1): + for item in d2.create_dict_iterator(num_epochs=1, output_numpy=True): assert len(item) == 13 for field in item: if isinstance(item[field], np.ndarray): @@ -322,7 +322,7 @@ def test_case_03(add_and_remove_cv_file): shuffle=False) i = 0 - for item in d2.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in d2.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary golden = np.array([i]) np.testing.assert_array_equal(item["data"], golden) i = i + 1 @@ -351,7 +351,7 @@ def type_tester(t): i = 0 num_repeat = 0 - for item in d2.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in d2.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary golden = np.array([[i], [i + 1], [i + 2], [i + 3]], dtype=t) logger.info(item) np.testing.assert_array_equal(item["data"], golden) @@ -409,14 +409,14 @@ def test_case_07(): os.remove("{}.db".format(CV_FILE_NAME2)) d1 = ds.TFRecordDataset(TFRECORD_FILES, shuffle=False) tf_data = [] - for x in d1.create_dict_iterator(num_epochs=1): + for x in d1.create_dict_iterator(num_epochs=1, output_numpy=True): tf_data.append(x) d1.save(CV_FILE_NAME2, FILES_NUM) d2 = ds.MindDataset(dataset_file=CV_FILE_NAME2, num_parallel_workers=num_readers, shuffle=False) mr_data = [] - for x in d2.create_dict_iterator(num_epochs=1): + for x in d2.create_dict_iterator(num_epochs=1, output_numpy=True): mr_data.append(x) count = 0 for x in tf_data: diff --git a/tests/ut/python/dataset/test_sentencepiece_tokenizer.py b/tests/ut/python/dataset/test_sentencepiece_tokenizer.py index 731b19677c5..3906dc87495 100644 --- a/tests/ut/python/dataset/test_sentencepiece_tokenizer.py +++ b/tests/ut/python/dataset/test_sentencepiece_tokenizer.py @@ -27,7 +27,7 @@ def test_from_vocab_to_str_UNIGRAM(): dataset = ds.TextFileDataset(DATA_FILE, shuffle=False) dataset = dataset.map(operations=tokenizer) expect = ['▁I', '▁sa', 'w', '▁a', '▁girl', '▁with', '▁a', '▁te', 'les', 'co', 'pe', '.'] - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["text"]) for key, value in enumerate(ret): assert value == expect[key] @@ -39,7 +39,7 @@ def test_from_vocab_to_str_BPE(): dataset = ds.TextFileDataset(DATA_FILE, shuffle=False) dataset = dataset.map(operations=tokenizer) expect = ['▁I', '▁saw', '▁a', '▁girl', '▁with', '▁a', '▁te', 'les', 'c', 'ope', '.'] - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["text"]) for key, value in enumerate(ret): assert value == expect[key] @@ -52,7 +52,7 @@ def test_from_vocab_to_str_CHAR(): dataset = dataset.map(operations=tokenizer) expect = ['▁', 'I', '▁', 's', 'a', 'w', '▁', 'a', '▁', 'g', 'i', 'r', 'l', '▁', 'w', 'i', 't', 'h',\ '▁', 'a', '▁', 't', 'e', 'l', 'e', 's', 'c', 'o', 'p', 'e', '.'] - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["text"]) for key, value in enumerate(ret): assert value == expect[key] @@ -64,7 +64,7 @@ def test_from_vocab_to_str_WORD(): dataset = ds.TextFileDataset(DATA_FILE, shuffle=False) dataset = dataset.map(operations=tokenizer) expect = ['▁I', '▁saw', '▁a', '▁girl', '▁with', '▁a', '▁telescope.'] - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["text"]) for key, value in enumerate(ret): assert value == expect[key] @@ -76,7 +76,7 @@ def test_from_vocab_to_int(): dataset = ds.TextFileDataset(DATA_FILE, shuffle=False) dataset = dataset.map(operations=tokenizer) expect = [6, 329, 183, 8, 945, 23, 8, 3783, 4382, 4641, 1405, 4] - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): ret = i["text"] for key, value in enumerate(ret): assert value == expect[key] @@ -89,7 +89,7 @@ def test_from_file_to_str(): dataset = ds.TextFileDataset(DATA_FILE, shuffle=False) dataset = dataset.map(operations=tokenizer) expect = ['▁I', '▁sa', 'w', '▁a', '▁girl', '▁with', '▁a', '▁te', 'les', 'co', 'pe', '.'] - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["text"]) for key, value in enumerate(ret): assert value == expect[key] @@ -102,7 +102,7 @@ def test_from_file_to_int(): dataset = ds.TextFileDataset(DATA_FILE, shuffle=False) dataset = dataset.map(operations=tokenizer) expect = [6, 329, 183, 8, 945, 23, 8, 3783, 4382, 4641, 1405, 4] - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): ret = i["text"] for key, value in enumerate(ret): assert value == expect[key] @@ -115,7 +115,7 @@ def test_build_from_dataset(): dataset = ds.TextFileDataset(DATA_FILE, shuffle=False) dataset = dataset.map(operations=tokenizer) expect = ['▁I', '▁sa', 'w', '▁a', '▁girl', '▁with', '▁a', '▁te', 'les', 'co', 'pe', '.'] - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["text"]) for key, value in enumerate(ret): assert value == expect[key] @@ -134,7 +134,7 @@ def zip_test(dataset): dataset_1 = dataset_1.apply(apply_func) dataset_zip = ds.zip((dataset_1, dataset_2)) expect = ['▁I', '▁sa', 'w', '▁a', '▁girl', '▁with', '▁a', '▁te', 'les', 'co', 'pe', '.'] - for i in dataset_zip.create_dict_iterator(num_epochs=1): + for i in dataset_zip.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["text"]) for key, value in enumerate(ret): assert value == expect[key] @@ -144,7 +144,7 @@ def concat_test(dataset): dataset_1 = copy.deepcopy(dataset) dataset = dataset.concat(dataset_1) expect = ['▁I', '▁sa', 'w', '▁a', '▁girl', '▁with', '▁a', '▁te', 'les', 'co', 'pe', '.'] - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["text"]) for key, value in enumerate(ret): assert value == expect[key] diff --git a/tests/ut/python/dataset/test_serdes_dataset.py b/tests/ut/python/dataset/test_serdes_dataset.py index 718635e666c..693005ef6ba 100644 --- a/tests/ut/python/dataset/test_serdes_dataset.py +++ b/tests/ut/python/dataset/test_serdes_dataset.py @@ -78,10 +78,10 @@ def test_imagefolder(remove_json_files=True): data4 = ds.deserialize(input_dict=ds1_dict) num_samples = 0 # Iterate and compare the data in the original pipeline (data1) against the deserialized pipeline (data2) - for item1, item2, item3, item4 in zip(data1.create_dict_iterator(num_epochs=1), - data2.create_dict_iterator(num_epochs=1), - data3.create_dict_iterator(num_epochs=1), - data4.create_dict_iterator(num_epochs=1)): + for item1, item2, item3, item4 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True), + data3.create_dict_iterator(num_epochs=1, output_numpy=True), + data4.create_dict_iterator(num_epochs=1, output_numpy=True)): np.testing.assert_array_equal(item1['image'], item2['image']) np.testing.assert_array_equal(item1['image'], item3['image']) np.testing.assert_array_equal(item1['label'], item2['label']) @@ -120,8 +120,9 @@ def test_mnist_dataset(remove_json_files=True): data3 = ds.deserialize(json_filepath="mnist_dataset_pipeline_1.json") num = 0 - for data1, data2, data3 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1), - data3.create_dict_iterator(num_epochs=1)): + for data1, data2, data3 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True), + data3.create_dict_iterator(num_epochs=1, output_numpy=True)): np.testing.assert_array_equal(data1['image'], data2['image']) np.testing.assert_array_equal(data1['image'], data3['image']) np.testing.assert_array_equal(data1['label'], data2['label']) @@ -159,7 +160,8 @@ def test_zip_dataset(remove_json_files=True): assert filecmp.cmp('zip_dataset_pipeline.json', 'zip_dataset_pipeline_1.json') rows = 0 - for d0, d3, d4 in zip(ds0, data3, data4): + for d0, d3, d4 in zip(ds0.create_tuple_iterator(output_numpy=True), data3.create_tuple_iterator(output_numpy=True), + data4.create_tuple_iterator(output_numpy=True)): num_cols = len(d0) offset = 0 for t1 in d0: @@ -200,9 +202,9 @@ def test_random_crop(): data2 = ds.TFRecordDataset(DATA_DIR, SCHEMA_DIR, columns_list=["image"]) data2 = data2.map(operations=decode_op, input_columns="image") - for item1, item1_1, item2 in zip(data1.create_dict_iterator(num_epochs=1), - data1_1.create_dict_iterator(num_epochs=1), - data2.create_dict_iterator(num_epochs=1)): + for item1, item1_1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data1_1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): np.testing.assert_array_equal(item1['image'], item1_1['image']) _ = item2["image"] @@ -255,7 +257,7 @@ def test_minddataset(add_and_remove_cv_file): _ = get_data(CV_DIR_NAME) assert data_set.get_dataset_size() == 5 num_iter = 0 - for _ in data_set.create_dict_iterator(num_epochs=1): + for _ in data_set.create_dict_iterator(num_epochs=1, output_numpy=True): num_iter += 1 assert num_iter == 5 diff --git a/tests/ut/python/dataset/test_shuffle.py b/tests/ut/python/dataset/test_shuffle.py index be488f62f58..f79dd4a400d 100644 --- a/tests/ut/python/dataset/test_shuffle.py +++ b/tests/ut/python/dataset/test_shuffle.py @@ -129,7 +129,8 @@ def test_shuffle_06(): data2 = ds.TFRecordDataset(DATA_DIR, shuffle=ds.Shuffle.FILES) data2 = data2.shuffle(buffer_size=buffer_size) - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): np.testing.assert_equal(item1, item2) diff --git a/tests/ut/python/dataset/test_skip.py b/tests/ut/python/dataset/test_skip.py index 55b13160c21..242ad7ac63e 100644 --- a/tests/ut/python/dataset/test_skip.py +++ b/tests/ut/python/dataset/test_skip.py @@ -58,7 +58,7 @@ def test_generator_skip(): ds1 = ds1.skip(3) buf = [] - for data in ds1: + for data in ds1.create_tuple_iterator(output_numpy=True): buf.append(data[0][0]) assert len(buf) == 2 assert buf == [3, 4] @@ -71,7 +71,7 @@ def test_skip_1(): ds1 = ds1.skip(7) buf = [] - for data in ds1: + for data in ds1.create_tuple_iterator(output_numpy=True): buf.append(data[0][0]) assert buf == [] @@ -83,7 +83,7 @@ def test_skip_2(): ds1 = ds1.skip(0) buf = [] - for data in ds1: + for data in ds1.create_tuple_iterator(output_numpy=True): buf.append(data[0][0]) assert len(buf) == 5 assert buf == [0, 1, 2, 3, 4] @@ -99,7 +99,7 @@ def test_skip_repeat_1(): ds1 = ds1.skip(3) buf = [] - for data in ds1: + for data in ds1.create_tuple_iterator(output_numpy=True): buf.append(data[0][0]) assert len(buf) == 7 assert buf == [3, 4, 0, 1, 2, 3, 4] @@ -115,7 +115,7 @@ def test_skip_repeat_2(): ds1 = ds1.repeat(2) buf = [] - for data in ds1: + for data in ds1.create_tuple_iterator(output_numpy=True): buf.append(data[0][0]) assert len(buf) == 4 assert buf == [3, 4, 3, 4] @@ -134,7 +134,7 @@ def test_skip_repeat_3(): ds1 = ds1.repeat(3) buf = [] - for data in ds1: + for data in ds1.create_tuple_iterator(output_numpy=True): buf.append(data[0][0]) assert len(buf) == 6 assert buf == [3, 4, 3, 4, 3, 4] @@ -150,7 +150,7 @@ def test_skip_take_1(): ds1 = ds1.skip(2) buf = [] - for data in ds1: + for data in ds1.create_tuple_iterator(output_numpy=True): buf.append(data[0][0]) assert len(buf) == 2 assert buf == [2, 3] @@ -166,7 +166,7 @@ def test_skip_take_2(): ds1 = ds1.take(2) buf = [] - for data in ds1: + for data in ds1.create_tuple_iterator(output_numpy=True): buf.append(data[0][0]) assert len(buf) == 2 assert buf == [2, 3] @@ -183,7 +183,7 @@ def test_skip_filter_1(): dataset = dataset.filter(predicate=lambda data: data < 11, num_parallel_workers=4) buf = [] - for item in dataset: + for item in dataset.create_tuple_iterator(output_numpy=True): buf.append(item[0][0]) assert buf == [5, 6, 7, 8, 9, 10] @@ -194,7 +194,7 @@ def test_skip_filter_2(): dataset = dataset.skip(5) buf = [] - for item in dataset: + for item in dataset.create_tuple_iterator(output_numpy=True): buf.append(item[0][0]) assert buf == [5, 6, 7, 8, 9, 10] @@ -205,7 +205,7 @@ def test_skip_exception_1(): try: data1 = data1.skip(count=-1) num_iter = 0 - for _ in data1.create_dict_iterator(num_epochs=1): + for _ in data1.create_dict_iterator(num_epochs=1, output_numpy=True): num_iter += 1 except RuntimeError as e: diff --git a/tests/ut/python/dataset/test_slice_op.py b/tests/ut/python/dataset/test_slice_op.py index 72417bff710..443e850c64c 100644 --- a/tests/ut/python/dataset/test_slice_op.py +++ b/tests/ut/python/dataset/test_slice_op.py @@ -26,7 +26,7 @@ def slice_compare(array, indexing): data = ds.NumpySlicesDataset([array]) array = np.array(array) data = data.map(operations=ops.Slice(indexing)) - for d in data: + for d in data.create_tuple_iterator(output_numpy=True): if indexing is None: array = array[:] else: @@ -93,7 +93,7 @@ def test_slice_multiple_rows(): for i, d in enumerate(data): array = np.array(dataset[i]) array = array[indexing] - np.testing.assert_array_equal(array, d[0]) + np.testing.assert_array_equal(array, d[0].asnumpy()) def test_slice_slice_obj_3s_double(): diff --git a/tests/ut/python/dataset/test_sliding_window.py b/tests/ut/python/dataset/test_sliding_window.py index 8588f6dec6f..b0fa49c2360 100644 --- a/tests/ut/python/dataset/test_sliding_window.py +++ b/tests/ut/python/dataset/test_sliding_window.py @@ -29,7 +29,7 @@ def test_sliding_window_string(): dataset = dataset.map(operations=text.SlidingWindow(2, 0), input_columns=["text"]) result = [] - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): for i in range(data['text'].shape[0]): result.append([]) for j in range(data['text'].shape[1]): @@ -48,7 +48,7 @@ def test_sliding_window_number(): dataset = ds.GeneratorDataset(gen(inputs), column_names=["number"]) dataset = dataset.map(operations=text.SlidingWindow(1, -1), input_columns=["number"]) - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): np.testing.assert_array_equal(data['number'], expect) @@ -59,7 +59,7 @@ def test_sliding_window_big_width(): dataset = ds.NumpySlicesDataset(inputs, column_names=["number"], shuffle=False) dataset = dataset.map(operations=text.SlidingWindow(30, 0), input_columns=["number"]) - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): np.testing.assert_array_equal(data['number'], expect) @@ -86,7 +86,7 @@ def test_sliding_window_exception(): inputs = [[1, 2, 3, 4, 5]] dataset = ds.NumpySlicesDataset(inputs, column_names=["text"], shuffle=False) dataset = dataset.map(operations=text.SlidingWindow(3, -100), input_columns=["text"]) - for _ in dataset.create_dict_iterator(num_epochs=1): + for _ in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): pass assert False except RuntimeError as e: @@ -96,7 +96,7 @@ def test_sliding_window_exception(): inputs = ["aa", "bb", "cc"] dataset = ds.NumpySlicesDataset(inputs, column_names=["text"], shuffle=False) dataset = dataset.map(operations=text.SlidingWindow(2, 0), input_columns=["text"]) - for _ in dataset.create_dict_iterator(num_epochs=1): + for _ in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): pass assert False except RuntimeError as e: diff --git a/tests/ut/python/dataset/test_soft_dvpp.py b/tests/ut/python/dataset/test_soft_dvpp.py index f84da207b10..2a44ee0b4f7 100644 --- a/tests/ut/python/dataset/test_soft_dvpp.py +++ b/tests/ut/python/dataset/test_soft_dvpp.py @@ -42,7 +42,8 @@ def test_soft_dvpp_decode_resize_jpeg(plot=False): data2 = data2.map(operations=soft_dvpp_decode_resize_op, input_columns=["image"]) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): if num_iter > 0: break image1 = item1["image"] @@ -72,7 +73,8 @@ def test_soft_dvpp_decode_random_crop_resize_jpeg(plot=False): data2 = data2.map(operations=soft_dvpp_random_crop_decode_resize_op, input_columns=["image"]) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): if num_iter > 0: break image1 = item1["image"] @@ -103,7 +105,8 @@ def test_soft_dvpp_decode_resize_jpeg_supplement(plot=False): data2 = data2.map(operations=soft_dvpp_decode_resize_op, input_columns=["image"]) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): if num_iter > 0: break image1 = item1["image"] diff --git a/tests/ut/python/dataset/test_split.py b/tests/ut/python/dataset/test_split.py index bfa12e91889..3d09685b31b 100644 --- a/tests/ut/python/dataset/test_split.py +++ b/tests/ut/python/dataset/test_split.py @@ -90,11 +90,11 @@ def test_unmappable_split(): s1, s2 = d.split([4, 1], randomize=False) s1_output = [] - for item in s1.create_dict_iterator(num_epochs=1): + for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): s1_output.append(item["text"].item().decode("utf8")) s2_output = [] - for item in s2.create_dict_iterator(num_epochs=1): + for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): s2_output.append(item["text"].item().decode("utf8")) assert s1_output == text_file_data[0:4] @@ -104,11 +104,11 @@ def test_unmappable_split(): s1, s2 = d.split([0.8, 0.2], randomize=False) s1_output = [] - for item in s1.create_dict_iterator(num_epochs=1): + for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): s1_output.append(item["text"].item().decode("utf8")) s2_output = [] - for item in s2.create_dict_iterator(num_epochs=1): + for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): s2_output.append(item["text"].item().decode("utf8")) assert s1_output == text_file_data[0:4] @@ -118,11 +118,11 @@ def test_unmappable_split(): s1, s2 = d.split([0.33, 0.67], randomize=False) s1_output = [] - for item in s1.create_dict_iterator(num_epochs=1): + for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): s1_output.append(item["text"].item().decode("utf8")) s2_output = [] - for item in s2.create_dict_iterator(num_epochs=1): + for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): s2_output.append(item["text"].item().decode("utf8")) assert s1_output == text_file_data[0:2] @@ -143,11 +143,11 @@ def test_unmappable_randomize_deterministic(): for _ in range(10): s1_output = [] - for item in s1.create_dict_iterator(num_epochs=1): + for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): s1_output.append(item["text"].item().decode("utf8")) s2_output = [] - for item in s2.create_dict_iterator(num_epochs=1): + for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): s2_output.append(item["text"].item().decode("utf8")) # note no overlap @@ -172,11 +172,11 @@ def test_unmappable_randomize_repeatable(): s2 = s2.repeat(num_epochs) s1_output = [] - for item in s1.create_dict_iterator(num_epochs=1): + for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): s1_output.append(item["text"].item().decode("utf8")) s2_output = [] - for item in s2.create_dict_iterator(num_epochs=1): + for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): s2_output.append(item["text"].item().decode("utf8")) # note no overlap @@ -208,7 +208,7 @@ def test_unmappable_multi_split(): s1_correct_output = [text_file_data[0], text_file_data[2], text_file_data[1], text_file_data[4]] s1_output = [] - for item in s1.create_dict_iterator(num_epochs=1): + for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): s1_output.append(item["text"].item().decode("utf8")) assert s1_output == s1_correct_output @@ -216,15 +216,15 @@ def test_unmappable_multi_split(): s1s1, s1s2, s1s3 = s1.split([1, 2, 1], randomize=False) s1s1_output = [] - for item in s1s1.create_dict_iterator(num_epochs=1): + for item in s1s1.create_dict_iterator(num_epochs=1, output_numpy=True): s1s1_output.append(item["text"].item().decode("utf8")) s1s2_output = [] - for item in s1s2.create_dict_iterator(num_epochs=1): + for item in s1s2.create_dict_iterator(num_epochs=1, output_numpy=True): s1s2_output.append(item["text"].item().decode("utf8")) s1s3_output = [] - for item in s1s3.create_dict_iterator(num_epochs=1): + for item in s1s3.create_dict_iterator(num_epochs=1, output_numpy=True): s1s3_output.append(item["text"].item().decode("utf8")) assert s1s1_output == [s1_correct_output[0]] @@ -232,7 +232,7 @@ def test_unmappable_multi_split(): assert s1s3_output == [s1_correct_output[3]] s2_output = [] - for item in s2.create_dict_iterator(num_epochs=1): + for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): s2_output.append(item["text"].item().decode("utf8")) assert s2_output == [text_file_data[3]] @@ -243,15 +243,15 @@ def test_unmappable_multi_split(): s1s1, s1s2, s1s3 = s1.split([1, 2, 1]) s1s1_output = [] - for item in s1s1.create_dict_iterator(num_epochs=1): + for item in s1s1.create_dict_iterator(num_epochs=1, output_numpy=True): s1s1_output.append(item["text"].item().decode("utf8")) s1s2_output = [] - for item in s1s2.create_dict_iterator(num_epochs=1): + for item in s1s2.create_dict_iterator(num_epochs=1, output_numpy=True): s1s2_output.append(item["text"].item().decode("utf8")) s1s3_output = [] - for item in s1s3.create_dict_iterator(num_epochs=1): + for item in s1s3.create_dict_iterator(num_epochs=1, output_numpy=True): s1s3_output.append(item["text"].item().decode("utf8")) assert s1s1_output == [s1_correct_output[shuffled_ids[0]]] @@ -259,7 +259,7 @@ def test_unmappable_multi_split(): assert s1s3_output == [s1_correct_output[shuffled_ids[3]]] s2_output = [] - for item in s2.create_dict_iterator(num_epochs=1): + for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): s2_output.append(item["text"].item().decode("utf8")) assert s2_output == [text_file_data[3]] @@ -285,11 +285,11 @@ def test_mappable_split_general(): s1, s2 = d.split([4, 1], randomize=False) s1_output = [] - for item in s1.create_dict_iterator(num_epochs=1): + for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output = [] - for item in s2.create_dict_iterator(num_epochs=1): + for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s1_output == [0, 1, 2, 3] @@ -299,11 +299,11 @@ def test_mappable_split_general(): s1, s2 = d.split([0.8, 0.2], randomize=False) s1_output = [] - for item in s1.create_dict_iterator(num_epochs=1): + for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output = [] - for item in s2.create_dict_iterator(num_epochs=1): + for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s1_output == [0, 1, 2, 3] @@ -313,11 +313,11 @@ def test_mappable_split_general(): s1, s2 = d.split([0.33, 0.67], randomize=False) s1_output = [] - for item in s1.create_dict_iterator(num_epochs=1): + for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output = [] - for item in s2.create_dict_iterator(num_epochs=1): + for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s1_output == [0, 1] @@ -331,11 +331,11 @@ def test_mappable_split_optimized(): s1, s2 = d.split([4, 1], randomize=False) s1_output = [] - for item in s1.create_dict_iterator(num_epochs=1): + for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output = [] - for item in s2.create_dict_iterator(num_epochs=1): + for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s1_output == [0, 1, 2, 3] @@ -345,11 +345,11 @@ def test_mappable_split_optimized(): s1, s2 = d.split([0.8, 0.2], randomize=False) s1_output = [] - for item in s1.create_dict_iterator(num_epochs=1): + for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output = [] - for item in s2.create_dict_iterator(num_epochs=1): + for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s1_output == [0, 1, 2, 3] @@ -359,11 +359,11 @@ def test_mappable_split_optimized(): s1, s2 = d.split([0.33, 0.67], randomize=False) s1_output = [] - for item in s1.create_dict_iterator(num_epochs=1): + for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output = [] - for item in s2.create_dict_iterator(num_epochs=1): + for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s1_output == [0, 1] @@ -379,11 +379,11 @@ def test_mappable_randomize_deterministic(): for _ in range(10): s1_output = [] - for item in s1.create_dict_iterator(num_epochs=1): + for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output = [] - for item in s2.create_dict_iterator(num_epochs=1): + for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) # note no overlap @@ -403,11 +403,11 @@ def test_mappable_randomize_repeatable(): s2 = s2.repeat(num_epochs) s1_output = [] - for item in s1.create_dict_iterator(num_epochs=1): + for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output = [] - for item in s2.create_dict_iterator(num_epochs=1): + for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) # note no overlap @@ -442,12 +442,12 @@ def test_mappable_sharding(): # shard 0 s1_output = [] - for item in s1.create_dict_iterator(num_epochs=1): + for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) # shard 1 d2s1_output = [] - for item in d2s1.create_dict_iterator(num_epochs=1): + for item in d2s1.create_dict_iterator(num_epochs=1, output_numpy=True): d2s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) rows_per_shard_per_epoch = 2 @@ -468,11 +468,11 @@ def test_mappable_sharding(): # test other split s2_output = [] - for item in s2.create_dict_iterator(num_epochs=1): + for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) d2s2_output = [] - for item in d2s2.create_dict_iterator(num_epochs=1): + for item in d2s2.create_dict_iterator(num_epochs=1, output_numpy=True): d2s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s2_output == [2] @@ -498,7 +498,7 @@ def test_mappable_multi_split(): s1_correct_output = [0, 1, 3, 4] s1_output = [] - for item in s1.create_dict_iterator(num_epochs=1): + for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s1_output == s1_correct_output @@ -506,15 +506,15 @@ def test_mappable_multi_split(): s1s1, s1s2, s1s3 = s1.split([1, 2, 1], randomize=False) s1s1_output = [] - for item in s1s1.create_dict_iterator(num_epochs=1): + for item in s1s1.create_dict_iterator(num_epochs=1, output_numpy=True): s1s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s1s2_output = [] - for item in s1s2.create_dict_iterator(num_epochs=1): + for item in s1s2.create_dict_iterator(num_epochs=1, output_numpy=True): s1s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s1s3_output = [] - for item in s1s3.create_dict_iterator(num_epochs=1): + for item in s1s3.create_dict_iterator(num_epochs=1, output_numpy=True): s1s3_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s1s1_output == [s1_correct_output[0]] @@ -522,7 +522,7 @@ def test_mappable_multi_split(): assert s1s3_output == [s1_correct_output[3]] s2_output = [] - for item in s2.create_dict_iterator(num_epochs=1): + for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s2_output == [2] @@ -533,15 +533,15 @@ def test_mappable_multi_split(): s1s1, s1s2, s1s3 = s1.split([1, 2, 1]) s1s1_output = [] - for item in s1s1.create_dict_iterator(num_epochs=1): + for item in s1s1.create_dict_iterator(num_epochs=1, output_numpy=True): s1s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s1s2_output = [] - for item in s1s2.create_dict_iterator(num_epochs=1): + for item in s1s2.create_dict_iterator(num_epochs=1, output_numpy=True): s1s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s1s3_output = [] - for item in s1s3.create_dict_iterator(num_epochs=1): + for item in s1s3.create_dict_iterator(num_epochs=1, output_numpy=True): s1s3_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s1s1_output == [s1_correct_output[random_sampler_ids[0]]] @@ -549,7 +549,7 @@ def test_mappable_multi_split(): assert s1s3_output == [s1_correct_output[random_sampler_ids[3]]] s2_output = [] - for item in s2.create_dict_iterator(num_epochs=1): + for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s2_output == [2] @@ -561,11 +561,11 @@ def test_rounding(): s1, s2 = d.split([0.5, 0.5], randomize=False) s1_output = [] - for item in s1.create_dict_iterator(num_epochs=1): + for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output = [] - for item in s2.create_dict_iterator(num_epochs=1): + for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s1_output == [0, 1, 2] @@ -575,15 +575,15 @@ def test_rounding(): s1, s2, s3 = d.split([0.15, 0.55, 0.3], randomize=False) s1_output = [] - for item in s1.create_dict_iterator(num_epochs=1): + for item in s1.create_dict_iterator(num_epochs=1, output_numpy=True): s1_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s2_output = [] - for item in s2.create_dict_iterator(num_epochs=1): + for item in s2.create_dict_iterator(num_epochs=1, output_numpy=True): s2_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) s3_output = [] - for item in s3.create_dict_iterator(num_epochs=1): + for item in s3.create_dict_iterator(num_epochs=1, output_numpy=True): s3_output.append(manifest_map[(item["image"].shape[0], item["label"].item())]) assert s1_output == [0] diff --git a/tests/ut/python/dataset/test_sync_wait.py b/tests/ut/python/dataset/test_sync_wait.py index f57355c905d..616051abdf6 100644 --- a/tests/ut/python/dataset/test_sync_wait.py +++ b/tests/ut/python/dataset/test_sync_wait.py @@ -48,7 +48,7 @@ def test_simple_sync_wait(): dataset = dataset.map(operations=[aug.preprocess], input_columns=["input"]) dataset = dataset.batch(batch_size) count = 0 - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): assert data["input"][0] == count count += batch_size data = {"loss": count} @@ -72,7 +72,7 @@ def test_simple_shuffle_sync(): dataset = dataset.batch(batch_size) count = 0 - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): count += 1 data = {"loss": count} dataset.sync_update(condition_name="policy", data=data) @@ -98,7 +98,7 @@ def test_two_sync(): dataset = dataset.batch(batch_size) count = 0 - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): count += 1 data = {"loss": count} dataset.sync_update(condition_name="every batch", data=data) @@ -122,7 +122,7 @@ def test_sync_epoch(): for _ in range(3): aug.update({"loss": 0}) count = 0 - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): assert data["input"][0] == count count += batch_size data = {"loss": count} @@ -149,7 +149,8 @@ def test_multiple_iterators(): dataset2 = dataset2.map(operations=[aug.preprocess], input_columns=["input"]) dataset2 = dataset2.batch(batch_size, drop_remainder=True) - for item1, item2 in zip(dataset.create_dict_iterator(num_epochs=1), dataset2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(dataset.create_dict_iterator(num_epochs=1, output_numpy=True), + dataset2.create_dict_iterator(num_epochs=1, output_numpy=True)): assert item1["input"][0] == item2["input"][0] data1 = {"loss": item1["input"][0]} data2 = {"loss": item2["input"][0]} @@ -222,7 +223,7 @@ def test_sync_exception_04(): dataset = dataset.map(operations=[aug.preprocess], input_columns=["input"]) count = 0 with pytest.raises(RuntimeError) as e: - for _ in dataset.create_dict_iterator(num_epochs=1): + for _ in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): count += 1 data = {"loss": count} dataset.sync_update(condition_name="every batch", num_batch=-1, data=data) @@ -242,7 +243,7 @@ def test_sync_exception_05(): dataset = dataset.sync_wait(condition_name="every batch", callback=aug.update) dataset = dataset.map(operations=[aug.preprocess], input_columns=["input"]) with pytest.raises(RuntimeError) as e: - for _ in dataset.create_dict_iterator(num_epochs=1): + for _ in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): dataset.disable_sync() count += 1 data = {"loss": count} diff --git a/tests/ut/python/dataset/test_take.py b/tests/ut/python/dataset/test_take.py index f0c6642b124..98271fea28f 100644 --- a/tests/ut/python/dataset/test_take.py +++ b/tests/ut/python/dataset/test_take.py @@ -48,7 +48,7 @@ def test_take_01(): # Here i refers to index, d refers to data element for _, d in enumerate(data1): - assert d[0][0] == 0 + assert d[0].asnumpy()[0] == 0 assert sum([1 for _ in data1]) == 2 @@ -65,7 +65,7 @@ def test_take_02(): # Here i refers to index, d refers to data element for i, d in enumerate(data1): - assert i % 2 == d[0][0] + assert i % 2 == d[0].asnumpy()[0] assert sum([1 for _ in data1]) == 4 @@ -82,7 +82,7 @@ def test_take_03(): # Here i refers to index, d refers to data elements for i, d in enumerate(data1): - assert i % 3 == d[0][0] + assert i % 3 == d[0].asnumpy()[0] assert sum([1 for _ in data1]) == 6 @@ -99,7 +99,7 @@ def test_take_04(): # Here i refers to index, d refers to data element for i, d in enumerate(data1): - assert i % 3 == d[0][0] + assert i % 3 == d[0].asnumpy()[0] assert sum([1 for _ in data1]) == 6 @@ -115,7 +115,7 @@ def test_take_05(): # Here i refers to index, d refers to data element for i, d in enumerate(data1): - assert i == d[0][0] + assert i == d[0].asnumpy()[0] assert sum([1 for _ in data1]) == 2 @@ -132,7 +132,7 @@ def test_take_06(): # Here i refers to index, d refers to data element for i, d in enumerate(data1): - assert i % 3 == d[0][0] + assert i % 3 == d[0].asnumpy()[0] assert sum([1 for _ in data1]) == 4 @@ -173,7 +173,7 @@ def test_take_09(): # Here i refers to index, d refers to data element for i, d in enumerate(data1): - assert i % 3 == d[0][0] + assert i % 3 == d[0].asnumpy()[0] assert sum([1 for _ in data1]) == 6 @@ -190,7 +190,7 @@ def test_take_10(): # Here i refers to index, d refers to data element for i, d in enumerate(data1): - assert i % 3 == d[0][0] + assert i % 3 == d[0].asnumpy()[0] assert sum([1 for _ in data1]) == 6 @@ -208,7 +208,7 @@ def test_take_11(): # Here i refers to index, d refers to data element for i, d in enumerate(data1): - assert 2 * (i % 2) == d[0][0] + assert 2 * (i % 2) == d[0].asnumpy()[0] assert sum([1 for _ in data1]) == 4 @@ -226,7 +226,7 @@ def test_take_12(): # Here i refers to index, d refers to data element for _, d in enumerate(data1): - assert d[0][0] == 0 + assert d[0].asnumpy()[0] == 0 assert sum([1 for _ in data1]) == 2 @@ -245,7 +245,7 @@ def test_take_13(): # Here i refers to index, d refers to data element for _, d in enumerate(data1): - assert d[0][0] == 2 + assert d[0].asnumpy()[0] == 2 assert sum([1 for _ in data1]) == 2 @@ -264,7 +264,7 @@ def test_take_14(): # Here i refers to index, d refers to data element for _, d in enumerate(data1): - assert d[0][0] == 2 + assert d[0].asnumpy()[0] == 2 assert sum([1 for _ in data1]) == 2 @@ -281,7 +281,7 @@ def test_take_15(): # Here i refers to index, d refers to data element for i, d in enumerate(data1): - assert (i + 2) == d[0][0] + assert (i + 2) == d[0].asnumpy()[0] assert sum([1 for _ in data1]) == 4 @@ -298,7 +298,7 @@ def test_take_16(): # Here i refers to index, d refers to data element for i, d in enumerate(data1): - assert (i + 3) == d[0][0] + assert (i + 3) == d[0].asnumpy()[0] assert sum([1 for _ in data1]) == 5 @@ -315,7 +315,7 @@ def test_take_17(): # Here i refers to index, d refers to data element for i, d in enumerate(data1): - assert i == d[0][0] + assert i == d[0].asnumpy()[0] assert sum([1 for _ in data1]) == 4 @@ -336,7 +336,7 @@ def test_take_18(): # Here i refers to index, d refers to data element for _, d in enumerate(data1): - assert d[0][0] == 2 + assert d[0].asnumpy()[0] == 2 assert sum([1 for _ in data1]) == 2 diff --git a/tests/ut/python/dataset/test_ten_crop.py b/tests/ut/python/dataset/test_ten_crop.py index 3dd16734a1a..0b92b78d9c0 100644 --- a/tests/ut/python/dataset/test_ten_crop.py +++ b/tests/ut/python/dataset/test_ten_crop.py @@ -51,7 +51,8 @@ def util_test_ten_crop(crop_size, vertical_flip=False, plot=False): transform_2 = mindspore.dataset.transforms.py_transforms.Compose(transforms_2) data2 = data2.map(operations=transform_2, input_columns=["image"]) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): num_iter += 1 image_1 = (item1["image"].transpose(1, 2, 0) * 255).astype(np.uint8) image_2 = item2["image"] diff --git a/tests/ut/python/dataset/test_tensor_empty.py b/tests/ut/python/dataset/test_tensor_empty.py index 7d156305c6a..7075df9a95d 100644 --- a/tests/ut/python/dataset/test_tensor_empty.py +++ b/tests/ut/python/dataset/test_tensor_empty.py @@ -25,7 +25,7 @@ def test_tensor_empty(): data = ds.GeneratorDataset(gen, column_names=["col1", "col2", "col3"]) - for d in data: + for d in data.create_tuple_iterator(output_numpy=True): np.testing.assert_array_equal(np.array([], dtype=np.int64), d[0]) np.testing.assert_array_equal(np.array([], dtype='S').reshape([0, 4]), d[1]) np.testing.assert_array_equal(np.array([1], dtype=np.float64), d[2]) @@ -46,7 +46,7 @@ def test_tensor_empty_map(): data = data.map(operations=func, input_columns=["col1", "col2", "col3"]) - for d in data: + for d in data.create_tuple_iterator(output_numpy=True): np.testing.assert_array_equal(np.array([1], dtype=np.int64), d[0]) np.testing.assert_array_equal(np.array(["Hi"], dtype='S'), d[1]) np.testing.assert_array_equal(np.array([], dtype=np.float64), d[2]) @@ -60,7 +60,7 @@ def test_tensor_empty_batch(): data = ds.GeneratorDataset(gen, column_names=["col1", "col2", "col3"]).batch(2) - for d in data: + for d in data.create_tuple_iterator(output_numpy=True): np.testing.assert_array_equal(np.array([], dtype=np.int64).reshape([2, 0]), d[0]) np.testing.assert_array_equal(np.array([], dtype='S').reshape([2, 0, 4]), d[1]) np.testing.assert_array_equal(np.array([[1], [1]], dtype=np.float64), d[2]) diff --git a/tests/ut/python/dataset/test_tensor_string.py b/tests/ut/python/dataset/test_tensor_string.py index 94ed6a86bcb..ba48097c4cb 100644 --- a/tests/ut/python/dataset/test_tensor_string.py +++ b/tests/ut/python/dataset/test_tensor_string.py @@ -35,7 +35,7 @@ def compare(strings, dtype='S'): data = ds.GeneratorDataset(gen, column_names=["col"]) - for d in data: + for d in data.create_tuple_iterator(output_numpy=True): np.testing.assert_array_equal(d[0], arr.astype('S')) @@ -79,7 +79,7 @@ def test_batching_strings(): data = ds.GeneratorDataset(gen, column_names=["col"]) data = data.batch(2, drop_remainder=True) - for d in data: + for d in data.create_tuple_iterator(output_numpy=True): np.testing.assert_array_equal(d[0], to_bytes(chinese[0:2])) @@ -96,7 +96,7 @@ def test_map(): data = data.map(operations=split, input_columns=["col"]) expected = np.array(["ab", "cde", "121"], dtype='S') - for d in data: + for d in data.create_tuple_iterator(output_numpy=True): np.testing.assert_array_equal(d[0], expected) @@ -112,7 +112,7 @@ def test_map2(): data = data.map(operations=upper, input_columns=["col"]) expected = np.array(["AB CDE 121"], dtype='S') - for d in data: + for d in data.create_tuple_iterator(output_numpy=True): np.testing.assert_array_equal(d[0], expected) @@ -124,7 +124,7 @@ def test_tfrecord1(): data = ds.TFRecordDataset("../data/dataset/testTextTFRecord/text.tfrecord", shuffle=False, schema=s) - for i, d in enumerate(data.create_dict_iterator(num_epochs=1)): + for i, d in enumerate(data.create_dict_iterator(num_epochs=1, output_numpy=True)): assert d["line"].shape == line[i].shape assert d["words"].shape == words[i].shape assert d["chinese"].shape == chinese[i].shape @@ -136,7 +136,7 @@ def test_tfrecord1(): def test_tfrecord2(): data = ds.TFRecordDataset("../data/dataset/testTextTFRecord/text.tfrecord", shuffle=False, schema='../data/dataset/testTextTFRecord/datasetSchema.json') - for i, d in enumerate(data.create_dict_iterator(num_epochs=1)): + for i, d in enumerate(data.create_dict_iterator(num_epochs=1, output_numpy=True)): assert d["line"].shape == line[i].shape assert d["words"].shape == words[i].shape assert d["chinese"].shape == chinese[i].shape @@ -153,7 +153,7 @@ def test_tfrecord3(): data = ds.TFRecordDataset("../data/dataset/testTextTFRecord/text.tfrecord", shuffle=False, schema=s) - for i, d in enumerate(data.create_dict_iterator(num_epochs=1)): + for i, d in enumerate(data.create_dict_iterator(num_epochs=1, output_numpy=True)): assert d["line"].shape == line[i].shape assert d["words"].shape == words[i].reshape([2, 2]).shape assert d["chinese"].shape == chinese[i].shape @@ -186,7 +186,7 @@ def create_text_mindrecord(): def test_mindrecord(): data = ds.MindDataset("../data/dataset/testTextMindRecord/test.mindrecord", shuffle=False) - for i, d in enumerate(data.create_dict_iterator(num_epochs=1)): + for i, d in enumerate(data.create_dict_iterator(num_epochs=1, output_numpy=True)): assert d["english"].shape == line[i].shape assert d["chinese"].shape == chinese[i].shape np.testing.assert_array_equal(line[i], to_str(d["english"])) @@ -231,7 +231,7 @@ def test_batch_padding_01(): data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"]) data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([2, 2], b"-2"), "col1d": ([2], b"-1")}) data1 = data1.repeat(2) - for data in data1.create_dict_iterator(num_epochs=1): + for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True): np.testing.assert_array_equal([[b"0", b"-1"], [b"1", b"-1"]], data["col1d"]) np.testing.assert_array_equal([[[b"100", b"-2"], [b"200", b"-2"]], [[b"101", b"-2"], [b"201", b"-2"]]], data["col2d"]) @@ -241,7 +241,7 @@ def test_batch_padding_02(): data1 = ds.GeneratorDataset((lambda: gen_2cols(2)), ["col1d", "col2d"]) data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col2d": ([1, 2], "")}) data1 = data1.repeat(2) - for data in data1.create_dict_iterator(num_epochs=1): + for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True): np.testing.assert_array_equal([[b"0"], [b"1"]], data["col1d"]) np.testing.assert_array_equal([[[b"100", b""]], [[b"101", b""]]], data["col2d"]) @@ -251,7 +251,7 @@ def test_batch_padding_03(): data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={"col": (None, "PAD_VALUE")}) # pad automatically data1 = data1.repeat(2) res = dict() - for ind, data in enumerate(data1.create_dict_iterator(num_epochs=1)): + for ind, data in enumerate(data1.create_dict_iterator(num_epochs=1, output_numpy=True)): res[ind] = data["col"].copy() np.testing.assert_array_equal(res[0], [[b"0", b"PAD_VALUE"], [0, 1]]) np.testing.assert_array_equal(res[1], [[b"0", b"1", b"2", b"PAD_VALUE"], [b"0", b"1", b"2", b"3"]]) @@ -263,7 +263,7 @@ def test_batch_padding_04(): data1 = ds.GeneratorDataset((lambda: gen_var_cols(2)), ["col1", "col2"]) data1 = data1.batch(batch_size=2, drop_remainder=False, pad_info={}) # pad automatically data1 = data1.repeat(2) - for data in data1.create_dict_iterator(num_epochs=1): + for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True): np.testing.assert_array_equal(data["col1"], [[b"0", b""], [b"0", b"1"]]) np.testing.assert_array_equal(data["col2"], [[b"100", b""], [b"100", b"101"]]) @@ -272,7 +272,7 @@ def test_batch_padding_05(): data1 = ds.GeneratorDataset((lambda: gen_var_cols_2d(3)), ["col1", "col2"]) data1 = data1.batch(batch_size=3, drop_remainder=False, pad_info={"col2": ([2, None], "-2"), "col1": (None, "-1")}) # pad automatically - for data in data1.create_dict_iterator(num_epochs=1): + for data in data1.create_dict_iterator(num_epochs=1, output_numpy=True): np.testing.assert_array_equal(data["col1"], [[[b"0", b"-1", b"-1"]], [[b"0", b"1", b"-1"]], [[b"0", b"1", b"2"]]]) np.testing.assert_array_equal(data["col2"], diff --git a/tests/ut/python/dataset/test_text_basic_tokenizer.py b/tests/ut/python/dataset/test_text_basic_tokenizer.py index 5d12f2940d1..c7d9361c8cc 100644 --- a/tests/ut/python/dataset/test_text_basic_tokenizer.py +++ b/tests/ut/python/dataset/test_text_basic_tokenizer.py @@ -82,7 +82,7 @@ def check_basic_tokenizer_default(first, last, expected_tokens, expected_offsets dataset = dataset.map(operations=basic_tokenizer) count = 0 - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): token = text.to_str(i['text']) logger.info("Out:", token) logger.info("Exp:", expected_tokens[count]) @@ -109,7 +109,7 @@ def check_basic_tokenizer_with_offsets(first, last, expected_tokens, expected_of output_columns=['token', 'offsets_start', 'offsets_limit'], column_order=['token', 'offsets_start', 'offsets_limit']) count = 0 - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): token = text.to_str(i['token']) logger.info("Out:", token) logger.info("Exp:", expected_tokens[count]) diff --git a/tests/ut/python/dataset/test_text_bert_tokenizer.py b/tests/ut/python/dataset/test_text_bert_tokenizer.py index a2b8b9c6378..51c4ab6ebbc 100644 --- a/tests/ut/python/dataset/test_text_bert_tokenizer.py +++ b/tests/ut/python/dataset/test_text_bert_tokenizer.py @@ -187,7 +187,7 @@ def check_bert_tokenizer_default(first, last, expect_str, preserve_unused_token=preserve_unused_token) dataset = dataset.map(operations=tokenizer_op) count = 0 - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): token = text.to_str(i['text']) logger.info("Out:", token) logger.info("Exp:", expect_str[count]) @@ -216,7 +216,7 @@ def check_bert_tokenizer_with_offsets(first, last, expect_str, output_columns=['token', 'offsets_start', 'offsets_limit'], column_order=['token', 'offsets_start', 'offsets_limit']) count = 0 - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): token = text.to_str(i['token']) logger.info("Out:", token) logger.info("Exp:", expect_str[count]) diff --git a/tests/ut/python/dataset/test_text_jieba_tokenizer.py b/tests/ut/python/dataset/test_text_jieba_tokenizer.py index 51991518267..21a9c611be4 100644 --- a/tests/ut/python/dataset/test_text_jieba_tokenizer.py +++ b/tests/ut/python/dataset/test_text_jieba_tokenizer.py @@ -32,7 +32,7 @@ def test_jieba_1(): num_parallel_workers=1) expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧'] ret = [] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["text"]) for index, item in enumerate(ret): assert item == expect[index] @@ -45,7 +45,7 @@ def test_jieba_1_1(): data = data.map(operations=jieba_op, input_columns=["text"], num_parallel_workers=1) expect = ['今天', '天气', '太', '好', '了', '我们', '一起', '去', '外面', '玩', '吧'] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["text"]) for index, item in enumerate(ret): assert item == expect[index] @@ -58,7 +58,7 @@ def test_jieba_1_2(): data = data.map(operations=jieba_op, input_columns=["text"], num_parallel_workers=1) expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧'] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["text"]) for index, item in enumerate(ret): assert item == expect[index] @@ -73,7 +73,7 @@ def test_jieba_2(): expect = ['男默女泪', '市', '长江大桥'] data = data.map(operations=jieba_op, input_columns=["text"], num_parallel_workers=2) - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["text"]) for index, item in enumerate(ret): assert item == expect[index] @@ -88,7 +88,7 @@ def test_jieba_2_1(): data = data.map(operations=jieba_op, input_columns=["text"], num_parallel_workers=2) expect = ['男默女泪', '市', '长江大桥'] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["text"]) for index, item in enumerate(ret): assert item == expect[index] @@ -112,7 +112,7 @@ def test_jieba_2_3(): data = data.map(operations=jieba_op, input_columns=["text"], num_parallel_workers=2) expect = ['江州', '市长', '江大桥', '参加', '了', '长江大桥', '的', '通车', '仪式'] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["text"]) for index, item in enumerate(ret): assert item == expect[index] @@ -130,7 +130,7 @@ def test_jieba_3(): data = data.map(operations=jieba_op, input_columns=["text"], num_parallel_workers=1) expect = ['男默女泪', '市', '长江大桥'] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["text"]) for index, item in enumerate(ret): assert item == expect[index] @@ -149,7 +149,7 @@ def test_jieba_3_1(): data = data.map(operations=jieba_op, input_columns=["text"], num_parallel_workers=1) expect = ['男默女泪', '市长', '江大桥'] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["text"]) for index, item in enumerate(ret): assert item == expect[index] @@ -165,7 +165,7 @@ def test_jieba_4(): data = data.map(operations=jieba_op, input_columns=["text"], num_parallel_workers=1) expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧'] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["text"]) for index, item in enumerate(ret): assert item == expect[index] @@ -191,7 +191,7 @@ def test_jieba_5(): data = data.map(operations=jieba_op, input_columns=["text"], num_parallel_workers=1) expect = ['江州', '市长', '江大桥', '参加', '了', '长江大桥', '的', '通车', '仪式'] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["text"]) for index, item in enumerate(ret): assert item == expect[index] @@ -209,7 +209,7 @@ def test_jieba_with_offsets_1(): expected_offsets_start = [0, 12, 21, 27, 33, 36, 42] expected_offsets_limit = [12, 21, 27, 33, 36, 42, 48] ret = [] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["token"]) for index, item in enumerate(ret): assert item == expect[index] @@ -230,7 +230,7 @@ def test_jieba_with_offsets_1_1(): expect = ['今天', '天气', '太', '好', '了', '我们', '一起', '去', '外面', '玩', '吧'] expected_offsets_start = [0, 6, 12, 15, 18, 21, 27, 33, 36, 42, 45] expected_offsets_limit = [6, 12, 15, 18, 21, 27, 33, 36, 42, 45, 48] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["token"]) for index, item in enumerate(ret): assert item == expect[index] @@ -251,7 +251,7 @@ def test_jieba_with_offsets_1_2(): expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧'] expected_offsets_start = [0, 12, 21, 27, 33, 36, 42] expected_offsets_limit = [12, 21, 27, 33, 36, 42, 48] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["token"]) for index, item in enumerate(ret): assert item == expect[index] @@ -274,7 +274,7 @@ def test_jieba_with_offsets_2(): num_parallel_workers=2) expected_offsets_start = [0, 12, 15] expected_offsets_limit = [12, 15, 27] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["token"]) for index, item in enumerate(ret): assert item == expect[index] @@ -297,7 +297,7 @@ def test_jieba_with_offsets_2_1(): expect = ['男默女泪', '市', '长江大桥'] expected_offsets_start = [0, 12, 15] expected_offsets_limit = [12, 15, 27] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["token"]) for index, item in enumerate(ret): assert item == expect[index] @@ -320,7 +320,7 @@ def test_jieba_with_offsets_2_2(): expect = ['江州', '市长', '江大桥', '参加', '了', '长江大桥', '的', '通车', '仪式'] expected_offsets_start = [0, 6, 12, 21, 27, 30, 42, 45, 51] expected_offsets_limit = [6, 12, 21, 27, 30, 42, 45, 51, 57] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["token"]) for index, item in enumerate(ret): assert item == expect[index] @@ -346,7 +346,7 @@ def test_jieba_with_offsets_3(): expect = ['男默女泪', '市', '长江大桥'] expected_offsets_start = [0, 12, 15] expected_offsets_limit = [12, 15, 27] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["token"]) for index, item in enumerate(ret): assert item == expect[index] @@ -373,7 +373,7 @@ def test_jieba_with_offsets_3_1(): expect = ['男默女泪', '市长', '江大桥'] expected_offsets_start = [0, 12, 18] expected_offsets_limit = [12, 18, 27] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["token"]) for index, item in enumerate(ret): assert item == expect[index] @@ -397,7 +397,7 @@ def test_jieba_with_offsets_4(): expect = ['今天天气', '太好了', '我们', '一起', '去', '外面', '玩吧'] expected_offsets_start = [0, 12, 21, 27, 33, 36, 42] expected_offsets_limit = [12, 21, 27, 33, 36, 42, 48] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["token"]) for index, item in enumerate(ret): assert item == expect[index] @@ -421,7 +421,7 @@ def test_jieba_with_offsets_5(): expect = ['江州', '市长', '江大桥', '参加', '了', '长江大桥', '的', '通车', '仪式'] expected_offsets_start = [0, 6, 12, 21, 27, 30, 42, 45, 51] expected_offsets_limit = [6, 12, 21, 27, 30, 42, 45, 51, 57] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["token"]) for index, item in enumerate(ret): assert item == expect[index] @@ -450,7 +450,7 @@ def test_jieba_6(): data = data.map(operations=pytoken_op, input_columns=["text"], num_parallel_workers=1) expect = ['今天天气太', '好了我们一', '起去外面玩吧'] - for i in data.create_dict_iterator(num_epochs=1): + for i in data.create_dict_iterator(num_epochs=1, output_numpy=True): ret = to_str(i["text"]) for index, item in enumerate(ret): assert item == expect[index] diff --git a/tests/ut/python/dataset/test_text_tokenizer.py b/tests/ut/python/dataset/test_text_tokenizer.py index a22ee4f2f29..c30da7d6729 100644 --- a/tests/ut/python/dataset/test_text_tokenizer.py +++ b/tests/ut/python/dataset/test_text_tokenizer.py @@ -45,7 +45,7 @@ def test_unicode_char_tokenizer_default(): tokenizer = text.UnicodeCharTokenizer() dataset = dataset.map(operations=tokenizer) tokens = [] - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): token = text.to_str(i['text']).tolist() tokens.append(token) logger.info("The out tokens is : {}".format(tokens)) @@ -68,7 +68,7 @@ def test_unicode_char_tokenizer_with_offsets(): expected_offsets_limit = [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], [3, 6, 9, 12, 15, 18], [3, 6, 9, 10, 11, 12, 13, 14, 15, 16, 17], [1, 2]] count = 0 - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): token = text.to_str(i['token']).tolist() tokens.append(token) np.testing.assert_array_equal(i['offsets_start'], expected_offsets_start[count]) @@ -90,7 +90,7 @@ def test_whitespace_tokenizer_default(): tokenizer = text.WhitespaceTokenizer() dataset = dataset.map(operations=tokenizer) tokens = [] - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): token = text.to_str(i['text']).tolist() tokens.append(token) logger.info("The out tokens is : {}".format(tokens)) @@ -114,7 +114,7 @@ def test_whitespace_tokenizer_with_offsets(): expected_offsets_start = [[0, 8, 11], [0], [0], [0]] expected_offsets_limit = [[7, 10, 19], [18], [17], [0]] count = 0 - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): token = text.to_str(i['token']).tolist() tokens.append(token) np.testing.assert_array_equal(i['offsets_start'], expected_offsets_start[count]) @@ -138,7 +138,7 @@ def test_unicode_script_tokenizer_default(): dataset = dataset.map(operations=tokenizer) tokens = [] - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): token = text.to_str(i['text']).tolist() tokens.append(token) logger.info("The out tokens is : {}".format(tokens)) @@ -157,7 +157,7 @@ def test_unicode_script_tokenizer_default2(): tokenizer = text.UnicodeScriptTokenizer(keep_whitespace=True) dataset = dataset.map(operations=tokenizer) tokens = [] - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): token = text.to_str(i['text']).tolist() tokens.append(token) logger.info("The out tokens is :", tokens) @@ -181,7 +181,7 @@ def test_unicode_script_tokenizer_with_offsets(): expected_offsets_start = [[0, 8, 11, 18], [0, 15], [0, 9, 16], [0]] expected_offsets_limit = [[7, 10, 18, 19], [15, 18], [9, 16, 17], [0]] count = 0 - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): token = text.to_str(i['token']).tolist() tokens.append(token) np.testing.assert_array_equal(i['offsets_start'], expected_offsets_start[count]) @@ -208,7 +208,7 @@ def test_unicode_script_tokenizer_with_offsets2(): expected_offsets_start = [[0, 7, 8, 10, 11, 18], [0, 15], [0, 9, 16], [0]] expected_offsets_limit = [[7, 8, 10, 11, 18, 19], [15, 18], [9, 16, 17], [2]] count = 0 - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): token = text.to_str(i['token']).tolist() tokens.append(token) np.testing.assert_array_equal(i['offsets_start'], expected_offsets_start[count]) @@ -228,7 +228,7 @@ def test_case_fold(): dataset = dataset.map(operations=op) lower_strs = [] - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): token = text.to_str(i['text']).tolist() lower_strs.append(token) assert lower_strs == expect_strs @@ -245,7 +245,7 @@ def test_normalize_utf8(): dataset = dataset.map(operations=normalize) out_bytes = [] out_texts = [] - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): out_bytes.append(i['text']) out_texts.append(text.to_str(i['text']).tolist()) logger.info("The out bytes is : ", out_bytes) @@ -286,7 +286,7 @@ def test_regex_replace(): replace_op = text.RegexReplace(pattern, replace) dataset = dataset.map(operations=replace_op) out_text = [] - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): token = text.to_str(i['text']).tolist() out_text.append(token) logger.info("Out:", out_text) @@ -314,7 +314,7 @@ def test_regex_tokenizer_default(): dataset = dataset.map(operations=tokenizer_op) out_text = [] count = 0 - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): token = text.to_str(i['text']).tolist() np.testing.assert_array_equal(token, expect_str[count]) count += 1 @@ -348,7 +348,7 @@ def test_regex_tokenizer_with_offsets(): column_order=['token', 'offsets_start', 'offsets_limit']) out_text = [] count = 0 - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): token = text.to_str(i['token']).tolist() np.testing.assert_array_equal(token, expect_str[count]) np.testing.assert_array_equal(i['offsets_start'], expected_offsets_start[count]) diff --git a/tests/ut/python/dataset/test_text_wordpiece_tokenizer.py b/tests/ut/python/dataset/test_text_wordpiece_tokenizer.py index b879191170a..6d9202bea3a 100644 --- a/tests/ut/python/dataset/test_text_wordpiece_tokenizer.py +++ b/tests/ut/python/dataset/test_text_wordpiece_tokenizer.py @@ -108,7 +108,7 @@ def check_wordpiece_tokenizer_default(first, last, expect_str, expected_offsets_ max_bytes_per_token=max_bytes_per_token) dataset = dataset.map(operations=tokenizer_op) count = 0 - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): token = text.to_str(i['text']) logger.info("Out:", token) logger.info("Exp:", expect_str[count]) @@ -130,7 +130,7 @@ def check_wordpiece_tokenizer_with_offsets(first, last, expect_str, expected_off output_columns=['token', 'offsets_start', 'offsets_limit'], column_order=['token', 'offsets_start', 'offsets_limit']) count = 0 - for i in dataset.create_dict_iterator(num_epochs=1): + for i in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): token = text.to_str(i['token']) logger.info("Out:", token) logger.info("Exp:", expect_str[count]) diff --git a/tests/ut/python/dataset/test_to_number_op.py b/tests/ut/python/dataset/test_to_number_op.py index 8e65c288e32..c51367c5997 100644 --- a/tests/ut/python/dataset/test_to_number_op.py +++ b/tests/ut/python/dataset/test_to_number_op.py @@ -43,7 +43,7 @@ def test_to_number_typical_case_integral(): expected_output = [int(string) for string in inputs] output = [] - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): output.append(data["strings"]) assert output == expected_output @@ -59,7 +59,7 @@ def test_to_number_typical_case_non_integral(): expected_output = [float(string) for string in inputs] output = [] - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): output.append(data["strings"]) for expected, actual, epsilon in zip(expected_output, output, epsilons): @@ -73,7 +73,7 @@ def out_of_bounds_error_message_check(dataset, np_type, value_to_cast): type_name = str(np.dtype(np_type)) with pytest.raises(RuntimeError) as info: - for _ in dataset.create_dict_iterator(num_epochs=1): + for _ in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): pass assert "String input " + value_to_cast + " will be out of bounds if casted to " + type_name in str(info.value) assert "valid range is: [" + type_min + ", " + type_max + "]" in str(info.value) @@ -101,7 +101,7 @@ def test_to_number_out_of_bounds_non_integral(): dataset = dataset.map(operations=text.ToNumber(ms_non_integral_types[0]), input_columns=["strings"]) with pytest.raises(RuntimeError) as info: - for _ in dataset.create_dict_iterator(num_epochs=1): + for _ in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): pass assert "outside of valid float16 range" in str(info.value) @@ -110,7 +110,7 @@ def test_to_number_out_of_bounds_non_integral(): dataset = dataset.map(operations=text.ToNumber(ms_non_integral_types[1]), input_columns=["strings"]) with pytest.raises(RuntimeError) as info: - for _ in dataset.create_dict_iterator(num_epochs=1): + for _ in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): pass assert "String input " + input_strings[0] + " will be out of bounds if casted to float32" in str(info.value) @@ -119,7 +119,7 @@ def test_to_number_out_of_bounds_non_integral(): dataset = dataset.map(operations=text.ToNumber(ms_non_integral_types[2]), input_columns=["strings"]) with pytest.raises(RuntimeError) as info: - for _ in dataset.create_dict_iterator(num_epochs=1): + for _ in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): pass assert "String input " + input_strings[0] + " will be out of bounds if casted to float64" in str(info.value) @@ -130,7 +130,7 @@ def test_to_number_out_of_bounds_non_integral(): dataset = dataset.map(operations=text.ToNumber(ms_non_integral_types[0]), input_columns=["strings"]) with pytest.raises(RuntimeError) as info: - for _ in dataset.create_dict_iterator(num_epochs=1): + for _ in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): pass assert "outside of valid float16 range" in str(info.value) @@ -139,7 +139,7 @@ def test_to_number_out_of_bounds_non_integral(): dataset = dataset.map(operations=text.ToNumber(ms_non_integral_types[1]), input_columns=["strings"]) with pytest.raises(RuntimeError) as info: - for _ in dataset.create_dict_iterator(num_epochs=1): + for _ in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): pass assert "String input " + input_strings[0] + " will be out of bounds if casted to float32" in str(info.value) @@ -148,7 +148,7 @@ def test_to_number_out_of_bounds_non_integral(): dataset = dataset.map(operations=text.ToNumber(ms_non_integral_types[2]), input_columns=["strings"]) with pytest.raises(RuntimeError) as info: - for _ in dataset.create_dict_iterator(num_epochs=1): + for _ in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): pass assert "String input " + input_strings[0] + " will be out of bounds if casted to float64" in str(info.value) @@ -159,19 +159,19 @@ def test_to_number_boundaries_integral(): input_strings = [str(type_info.max)] dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings") dataset = dataset.map(operations=text.ToNumber(ms_type), input_columns=["strings"]) - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): assert data["strings"] == int(input_strings[0]) input_strings = [str(type_info.min)] dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings") dataset = dataset.map(operations=text.ToNumber(ms_type), input_columns=["strings"]) - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): assert data["strings"] == int(input_strings[0]) input_strings = [str(0)] dataset = ds.GeneratorDataset(string_dataset_generator(input_strings), "strings") dataset = dataset.map(operations=text.ToNumber(ms_type), input_columns=["strings"]) - for data in dataset.create_dict_iterator(num_epochs=1): + for data in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): assert data["strings"] == int(input_strings[0]) @@ -181,7 +181,7 @@ def test_to_number_invalid_input(): dataset = dataset.map(operations=text.ToNumber(mstype.int32), input_columns=["strings"]) with pytest.raises(RuntimeError) as info: - for _ in dataset.create_dict_iterator(num_epochs=1): + for _ in dataset.create_dict_iterator(num_epochs=1, output_numpy=True): pass assert "It is invalid to convert " + input_strings[0] + " to a number" in str(info.value) diff --git a/tests/ut/python/dataset/test_to_type.py b/tests/ut/python/dataset/test_to_type.py index f1fb046fff4..b05463c0cce 100644 --- a/tests/ut/python/dataset/test_to_type.py +++ b/tests/ut/python/dataset/test_to_type.py @@ -54,7 +54,8 @@ def test_to_type_op(): transform2 = mindspore.dataset.transforms.py_transforms.Compose(transforms2) data2 = data2.map(operations=transform2, input_columns=["image"]) - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): image1 = item1["image"] image2 = item2["image"] diff --git a/tests/ut/python/dataset/test_type_cast.py b/tests/ut/python/dataset/test_type_cast.py index 359b6d08b2a..58a366ed751 100644 --- a/tests/ut/python/dataset/test_type_cast.py +++ b/tests/ut/python/dataset/test_type_cast.py @@ -56,7 +56,8 @@ def test_type_cast(): data2 = data2.map(operations=transform, input_columns=["image"]) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): num_iter += 1 c_image = item1["image"] py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) @@ -96,7 +97,8 @@ def test_type_cast_string(): data2 = data2.map(operations=transform, input_columns=["image"]) num_iter = 0 - for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1), data2.create_dict_iterator(num_epochs=1)): + for item1, item2 in zip(data1.create_dict_iterator(num_epochs=1, output_numpy=True), + data2.create_dict_iterator(num_epochs=1, output_numpy=True)): num_iter += 1 c_image = item1["image"] py_image = (item2["image"].transpose(1, 2, 0) * 255).astype(np.uint8) diff --git a/tests/ut/python/dataset/test_uniform_augment.py b/tests/ut/python/dataset/test_uniform_augment.py index cf07d28ff0d..8d09cf9df70 100644 --- a/tests/ut/python/dataset/test_uniform_augment.py +++ b/tests/ut/python/dataset/test_uniform_augment.py @@ -47,10 +47,10 @@ def test_uniform_augment(plot=False, num_ops=2): for idx, (image, _) in enumerate(ds_original): if idx == 0: - images_original = np.transpose(image, (0, 2, 3, 1)) + images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_original = np.append(images_original, - np.transpose(image, (0, 2, 3, 1)), + np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) # UniformAugment Images @@ -76,10 +76,10 @@ def test_uniform_augment(plot=False, num_ops=2): for idx, (image, _) in enumerate(ds_ua): if idx == 0: - images_ua = np.transpose(image, (0, 2, 3, 1)) + images_ua = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_ua = np.append(images_ua, - np.transpose(image, (0, 2, 3, 1)), + np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) num_samples = images_original.shape[0] @@ -110,10 +110,10 @@ def test_cpp_uniform_augment(plot=False, num_ops=2): for idx, (image, _) in enumerate(ds_original): if idx == 0: - images_original = np.transpose(image, (0, 2, 3, 1)) + images_original = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_original = np.append(images_original, - np.transpose(image, (0, 2, 3, 1)), + np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) # UniformAugment Images @@ -136,10 +136,10 @@ def test_cpp_uniform_augment(plot=False, num_ops=2): for idx, (image, _) in enumerate(ds_ua): if idx == 0: - images_ua = np.transpose(image, (0, 2, 3, 1)) + images_ua = np.transpose(image.asnumpy(), (0, 2, 3, 1)) else: images_ua = np.append(images_ua, - np.transpose(image, (0, 2, 3, 1)), + np.transpose(image.asnumpy(), (0, 2, 3, 1)), axis=0) if plot: visualize_list(images_original, images_ua) @@ -254,7 +254,7 @@ def test_cpp_uniform_augment_random_crop_badinput(num_ops=1): ds1 = ds1.batch(batch_size, drop_remainder=True, num_parallel_workers=1) num_batches = 0 try: - for _ in ds1.create_dict_iterator(num_epochs=1): + for _ in ds1.create_dict_iterator(num_epochs=1, output_numpy=True): num_batches += 1 except Exception as e: assert "Crop size" in str(e) diff --git a/tests/ut/python/dataset/test_var_batch_map.py b/tests/ut/python/dataset/test_var_batch_map.py index 83c663437e3..c055d1526e1 100644 --- a/tests/ut/python/dataset/test_var_batch_map.py +++ b/tests/ut/python/dataset/test_var_batch_map.py @@ -25,12 +25,12 @@ def test_batch_corner_cases(): def test_repeat_batch(gen_num, repeats, batch_size, drop, res): data1 = ds.GeneratorDataset((lambda: gen(gen_num)), ["num"]).repeat(repeats).batch(batch_size, drop) - for item in data1.create_dict_iterator(num_epochs=1): + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(item["num"]) def test_batch_repeat(gen_num, repeats, batch_size, drop, res): data1 = ds.GeneratorDataset((lambda: gen(gen_num)), ["num"]).batch(batch_size, drop).repeat(repeats) - for item in data1.create_dict_iterator(num_epochs=1): + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(item["num"]) tst1, tst2, tst3, tst4 = [], [], [], [] @@ -81,7 +81,7 @@ def test_variable_size_batch(): def test_repeat_batch(gen_num, r, drop, func, res): data1 = ds.GeneratorDataset((lambda: gen(gen_num)), ["num"]).repeat(r).batch(batch_size=func, drop_remainder=drop) - for item in data1.create_dict_iterator(num_epochs=1): + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(item["num"]) # same as test_repeat_batch except each row is passed through via a map which makes a copy of each element @@ -89,14 +89,14 @@ def test_variable_size_batch(): res = [] data1 = ds.GeneratorDataset((lambda: gen(gen_num)), ["num"]).repeat(r) \ .batch(batch_size=func, drop_remainder=drop, input_columns=["num"], per_batch_map=simple_copy) - for item in data1.create_dict_iterator(num_epochs=1): + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(item["num"]) return res def test_batch_repeat(gen_num, r, drop, func, res): data1 = ds.GeneratorDataset((lambda: gen(gen_num)), ["num"]).batch(batch_size=func, drop_remainder=drop).repeat( r) - for item in data1.create_dict_iterator(num_epochs=1): + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(item["num"]) # same as test_batch_repeat except each row is passed through via a map which makes a copy of each element @@ -104,7 +104,7 @@ def test_variable_size_batch(): res = [] data1 = ds.GeneratorDataset((lambda: gen(gen_num)), ["num"]) \ .batch(batch_size=func, drop_remainder=drop, input_columns=["num"], per_batch_map=simple_copy).repeat(r) - for item in data1.create_dict_iterator(num_epochs=1): + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(item["num"]) return res @@ -162,7 +162,7 @@ def test_basic_batch_map(): def batch_map_config(num, r, batch_size, func, res): data1 = ds.GeneratorDataset((lambda: gen(num)), ["num"]) \ .batch(batch_size=batch_size, input_columns=["num"], per_batch_map=func).repeat(r) - for item in data1.create_dict_iterator(num_epochs=1): + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(item["num"]) tst1, tst2, = [], [] @@ -201,7 +201,7 @@ def test_batch_multi_col_map(): def batch_map_config(num, r, batch_size, func, col_names, res): data1 = ds.GeneratorDataset((lambda: gen(num)), ["num", "num_square"]) \ .batch(batch_size=batch_size, input_columns=col_names, per_batch_map=func).repeat(r) - for item in data1.create_dict_iterator(num_epochs=1): + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(np.array([item["num"], item["num_square"]])) tst1, tst2, tst3, tst4 = [], [], [], [] @@ -253,7 +253,7 @@ def test_var_batch_multi_col_map(): def batch_map_config(num, r, fbatch, fmap, col_names, res): data1 = ds.GeneratorDataset((lambda: gen_3_cols(num)), ["col1", "col2", "col3"]) \ .batch(batch_size=fbatch, input_columns=col_names, per_batch_map=fmap).repeat(r) - for item in data1.create_dict_iterator(num_epochs=1): + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(np.array([item["col1"], item["col2"], item["col3"]])) tst1 = [] @@ -277,7 +277,7 @@ def test_var_batch_var_resize(): data1 = data1.batch(batch_size=add_one, drop_remainder=True, input_columns=["image"], per_batch_map=np_psedo_resize) # i-th batch has shape [i, i^2, i^2, 3] i = 1 - for item in data1.create_dict_iterator(num_epochs=1): + for item in data1.create_dict_iterator(num_epochs=1, output_numpy=True): assert item["image"].shape == (i, i ** 2, i ** 2, 3), "\ntest_var_batch_var_resize FAILED\n" i += 1 diff --git a/tests/ut/python/dataset/test_vocab.py b/tests/ut/python/dataset/test_vocab.py index afcfa225a6f..9241b759bdc 100644 --- a/tests/ut/python/dataset/test_vocab.py +++ b/tests/ut/python/dataset/test_vocab.py @@ -32,7 +32,7 @@ def test_from_list_tutorial(): data = data.map(operations=lookup, input_columns=["text"]) ind = 0 res = [2, 1, 4, 5, 6, 7] - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): assert d["text"] == res[ind], ind ind += 1 @@ -44,7 +44,7 @@ def test_from_file_tutorial(): data = data.map(operations=lookup, input_columns=["text"]) ind = 0 res = [10, 11, 12, 15, 13, 14] - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): assert d["text"] == res[ind], ind ind += 1 @@ -56,7 +56,7 @@ def test_from_dict_tutorial(): data = data.map(operations=lookup, input_columns=["text"]) res = [3, 6, 2, 4, 5, 6] ind = 0 - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): assert d["text"] == res[ind], ind ind += 1 @@ -81,7 +81,7 @@ def test_from_list(): data = ds.GeneratorDataset(gen(lookup_str), column_names=["text"]) data = data.map(operations=text.Lookup(vocab, unknown_token), input_columns=["text"]) res = [] - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(d["text"].item()) return res except (ValueError, RuntimeError, TypeError) as e: @@ -120,7 +120,7 @@ def test_from_file(): data = ds.GeneratorDataset(gen(lookup_str), column_names=["text"]) data = data.map(operations=text.Lookup(vocab, "s2"), input_columns=["text"]) res = [] - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(d["text"].item()) return res except ValueError as e: @@ -152,7 +152,7 @@ def test_lookup_cast_type(): op = text.Lookup(vocab, "") if data_type is None else text.Lookup(vocab, "", data_type) data = data.map(operations=op, input_columns=["text"]) res = [] - for d in data.create_dict_iterator(num_epochs=1): + for d in data.create_dict_iterator(num_epochs=1, output_numpy=True): res.append(d["text"]) return res[0].dtype except (ValueError, RuntimeError, TypeError) as e: diff --git a/tests/ut/python/dataset/test_zip.py b/tests/ut/python/dataset/test_zip.py index 69ea31cd340..ab6136473f7 100644 --- a/tests/ut/python/dataset/test_zip.py +++ b/tests/ut/python/dataset/test_zip.py @@ -138,7 +138,7 @@ def test_zip_exception_01(): dataz = ds.zip((data1, data1)) num_iter = 0 - for _, item in enumerate(dataz.create_dict_iterator(num_epochs=1)): + for _, item in enumerate(dataz.create_dict_iterator(num_epochs=1, output_numpy=True)): logger.info("item[input_mask] is {}".format(item["input_mask"])) num_iter += 1 logger.info("Number of data in zipped dataz: {}".format(num_iter)) @@ -159,7 +159,7 @@ def test_zip_exception_02(): dataz = ds.zip((data1, data2)) num_iter = 0 - for _, item in enumerate(dataz.create_dict_iterator(num_epochs=1)): + for _, item in enumerate(dataz.create_dict_iterator(num_epochs=1, output_numpy=True)): logger.info("item[input_mask] is {}".format(item["input_mask"])) num_iter += 1 logger.info("Number of data in zipped dataz: {}".format(num_iter)) @@ -180,7 +180,7 @@ def test_zip_exception_03(): dataz = dataz.repeat(2) num_iter = 0 - for _, item in enumerate(dataz.create_dict_iterator(num_epochs=1)): + for _, item in enumerate(dataz.create_dict_iterator(num_epochs=1, output_numpy=True)): logger.info("item[input_mask] is {}".format(item["input_mask"])) num_iter += 1 logger.info("Number of data in zipped dataz: {}".format(num_iter)) @@ -200,7 +200,7 @@ def test_zip_exception_04(): dataz = dataz.repeat(2) num_iter = 0 - for _, item in enumerate(dataz.create_dict_iterator(num_epochs=1)): + for _, item in enumerate(dataz.create_dict_iterator(num_epochs=1, output_numpy=True)): logger.info("item[input_mask] is {}".format(item["input_mask"])) num_iter += 1 logger.info("Number of data in zipped dataz: {}".format(num_iter)) @@ -221,7 +221,7 @@ def test_zip_exception_05(): dataz = ds.zip(data1, data2) num_iter = 0 - for _, item in enumerate(dataz.create_dict_iterator(num_epochs=1)): + for _, item in enumerate(dataz.create_dict_iterator(num_epochs=1, output_numpy=True)): logger.info("item[input_mask] is {}".format(item["input_mask"])) num_iter += 1 logger.info("Number of data in zipped dataz: {}".format(num_iter)) @@ -241,7 +241,7 @@ def test_zip_exception_06(): dataz = ds.zip(data1) num_iter = 0 - for _, item in enumerate(dataz.create_dict_iterator(num_epochs=1)): + for _, item in enumerate(dataz.create_dict_iterator(num_epochs=1, output_numpy=True)): logger.info("item[input_mask] is {}".format(item["input_mask"])) num_iter += 1 logger.info("Number of data in zipped dataz: {}".format(num_iter)) diff --git a/tests/ut/python/dataset/util.py b/tests/ut/python/dataset/util.py index ea061d2b9cb..3f20bfe2d5c 100644 --- a/tests/ut/python/dataset/util.py +++ b/tests/ut/python/dataset/util.py @@ -88,7 +88,7 @@ def save_and_check_dict(data, filename, generate_golden=False): num_iter = 0 result_dict = {} - for item in data.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary for data_key in list(item.keys()): if data_key not in result_dict: result_dict[data_key] = [] @@ -119,7 +119,7 @@ def save_and_check_md5(data, filename, generate_golden=False): num_iter = 0 result_dict = {} - for item in data.create_dict_iterator(num_epochs=1): # each data is a dictionary + for item in data.create_dict_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary for data_key in list(item.keys()): if data_key not in result_dict: result_dict[data_key] = [] @@ -146,7 +146,7 @@ def save_and_check_tuple(data, parameters, filename, generate_golden=False): num_iter = 0 result_dict = {} - for item in data.create_tuple_iterator(num_epochs=1): # each data is a dictionary + for item in data.create_tuple_iterator(num_epochs=1, output_numpy=True): # each data is a dictionary for data_key, _ in enumerate(item): if data_key not in result_dict: result_dict[data_key] = [] @@ -392,7 +392,7 @@ def check_bad_bbox(data, test_op, invalid_bbox_type, expected_error): data = data.map(operations=[test_op], input_columns=["image", "bbox"], output_columns=["image", "bbox"], column_order=["image", "bbox"]) # Add column for "bbox" - for _, _ in enumerate(data.create_dict_iterator(num_epochs=1)): + for _, _ in enumerate(data.create_dict_iterator(num_epochs=1, output_numpy=True)): break except RuntimeError as error: logger.info("Got an exception in DE: {}".format(str(error))) @@ -406,7 +406,7 @@ def dataset_equal(data1, data2, mse_threshold): equal = True for item1, item2 in itertools.zip_longest(data1, data2): for column1, column2 in itertools.zip_longest(item1, item2): - mse = diff_mse(column1, column2) + mse = diff_mse(column1.asnumpy(), column2.asnumpy()) if mse > mse_threshold: equal = False break @@ -428,8 +428,8 @@ def dataset_equal_with_function(data_unchanged, data_target, mse_threshold, foo, for item1, item2 in itertools.zip_longest(data_unchanged, data_target): for column1, column2 in itertools.zip_longest(item1, item2): # note the function is to be applied to the second dataset - column2 = foo(column2, *foo_args) - mse = diff_mse(column1, column2) + column2 = foo(column2.asnumpy(), *foo_args) + mse = diff_mse(column1.asnumpy(), column2) if mse > mse_threshold: equal = False break